@@ -409,7 +409,7 @@ struct bpf_prog {
union bpf_prog_subtype subtype; /* For fine-grained verifications */
struct bpf_prog_aux *aux; /* Auxiliary fields */
struct sock_fprog_kern *orig_prog; /* Original BPF program */
- unsigned int (*bpf_func)(const struct sk_buff *skb,
+ unsigned int (*bpf_func)(const void *ctx,
const struct bpf_insn *filter);
/* Instructions for interpreter */
union {
@@ -104,6 +104,7 @@ enum bpf_prog_type {
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK,
__MAX_BPF_ATTACH_TYPE
};
@@ -532,6 +533,10 @@ struct bpf_tunnel_key {
__u32 tunnel_label;
};
+struct bpf_sock {
+ __u32 bound_dev_if;
+};
+
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
@@ -117,6 +117,12 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
}
}
+static int __cgroup_bpf_run_filter_sock(struct sock *sk,
+ struct bpf_prog *prog)
+{
+ return prog->bpf_func(sk, prog->insnsi) == 1 ? 0 : -EPERM;
+}
+
static int __cgroup_bpf_run_filter_skb(struct sk_buff *skb,
struct bpf_prog *prog)
{
@@ -171,6 +177,9 @@ int __cgroup_bpf_run_filter(struct sock *sk,
case BPF_CGROUP_INET_EGRESS:
ret = __cgroup_bpf_run_filter_skb(skb, prog);
break;
+ case BPF_CGROUP_INET_SOCK:
+ ret = __cgroup_bpf_run_filter_sock(sk, prog);
+ break;
/* make gcc happy else complains about missing enum value */
default:
return 0;
@@ -843,6 +843,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
+ case BPF_CGROUP_INET_SOCK:
prog = bpf_prog_get_type(attr->attach_bpf_fd,
BPF_PROG_TYPE_CGROUP);
if (IS_ERR(prog))
@@ -880,6 +881,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
+ case BPF_CGROUP_INET_SOCK:
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
@@ -2634,6 +2634,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return __is_valid_access(off, size, type);
}
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return false;
+
+ /* The verifier guarantees that size > 0. */
+ if (off % size != 0)
+ return false;
+
+ return true;
+}
+
+static bool cgroup_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type,
+ union bpf_prog_subtype *prog_subtype)
+{
+ if (prog_subtype->cgroup.sock)
+ return sock_filter_is_valid_access(off, size, type);
+
+ return sk_filter_is_valid_access(off, size, type, reg_type,
+ prog_subtype);
+}
+
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
{
@@ -2894,6 +2928,45 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
return insn - insn_buf;
}
+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
+ int dst_reg, int src_reg,
+ int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+static u32 cgroup_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ union bpf_prog_subtype *prog_subtype = &prog->subtype;
+
+ if (prog_subtype->cgroup.sock)
+ return sock_filter_convert_ctx_access(type, dst_reg, src_reg,
+ ctx_off, insn_buf, prog);
+
+ return sk_filter_convert_ctx_access(type, dst_reg, src_reg, ctx_off,
+ insn_buf, prog);
+}
+
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
@@ -2963,8 +3036,8 @@ static const struct bpf_verifier_ops xdp_ops = {
static const struct bpf_verifier_ops cgroup_ops = {
.get_func_proto = cgroup_func_proto,
- .is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .is_valid_access = cgroup_is_valid_access,
+ .convert_ctx_access = cgroup_convert_ctx_access,
};
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
@@ -377,6 +377,10 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
if (err)
sk_common_release(sk);
}
+
+ if (!kern)
+ cgroup_bpf_run_filter(sk, NULL, BPF_CGROUP_INET_SOCK);
+
out:
return err;
out_rcu_unlock:
@@ -257,6 +257,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
goto out;
}
}
+
+ if (!kern)
+ cgroup_bpf_run_filter(sk, NULL, BPF_CGROUP_INET_SOCK);
out:
return err;
out_rcu_unlock:
Allow BPF_PROG_TYPE_CGROUP programs with cgroup.sock subtype to modify sk_bound_dev_if for newly created AF_INET or AF_INET6 sockets. The program can be attached to a cgroup using attach type BPF_CGROUP_INET_SOCK. The cgroup verifier ops are updated to handle the sock offsets as well as the existing skb accesses. This allows a cgroup to be configured such that AF_INET{6} sockets opened by processes are automatically bound to a specific device. In turn, this enables the running of programs that do not support SO_BINDTODEVICE in a specific VRF context / L3 domain. v2 - dropped the bpf_sock_store_u32 helper - dropped the new prog type BPF_PROG_TYPE_CGROUP_SOCK - moved valid access and context conversion to use subtype - dropped CREATE from BPF_CGROUP_INET_SOCK and related function names - moved running of filter from sk_alloc to inet{6}_create Signed-off-by: David Ahern <dsa@cumulusnetworks.com> --- include/linux/filter.h | 2 +- include/uapi/linux/bpf.h | 5 ++++ kernel/bpf/cgroup.c | 9 ++++++ kernel/bpf/syscall.c | 2 ++ net/core/filter.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++-- net/ipv4/af_inet.c | 4 +++ net/ipv6/af_inet6.c | 3 ++ 7 files changed, 99 insertions(+), 3 deletions(-)