diff mbox

[net-next,v3,1/3] bpf: Refactor cgroups code in prep for new type

Message ID 1480348130-31354-2-git-send-email-dsa@cumulusnetworks.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

David Ahern Nov. 28, 2016, 3:48 p.m. UTC
Code move only; no functional change intended.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v3
- dropped the rename

v2
- fix bpf_prog_run_clear_cb to bpf_prog_run_save_cb as caught by Daniel

- rename BPF_PROG_TYPE_CGROUP_SKB and its cg_skb functions to
  BPF_PROG_TYPE_CGROUP and cgroup

 kernel/bpf/cgroup.c  | 27 ++++++++++++++++++++++-----
 kernel/bpf/syscall.c | 28 +++++++++++++++-------------
 2 files changed, 37 insertions(+), 18 deletions(-)

Comments

Alexei Starovoitov Nov. 28, 2016, 8:06 p.m. UTC | #1
On Mon, Nov 28, 2016 at 07:48:48AM -0800, David Ahern wrote:
> Code move only; no functional change intended.

not quite...

> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
...
>   * @sk: The socken sending or receiving traffic
> @@ -153,11 +166,15 @@ int __cgroup_bpf_run_filter(struct sock *sk,
>  
>  	prog = rcu_dereference(cgrp->bpf.effective[type]);
>  	if (prog) {
> -		unsigned int offset = skb->data - skb_network_header(skb);
> -
> -		__skb_push(skb, offset);
> -		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
> -		__skb_pull(skb, offset);
> +		switch (type) {
> +		case BPF_CGROUP_INET_INGRESS:
> +		case BPF_CGROUP_INET_EGRESS:
> +			ret = __cgroup_bpf_run_filter_skb(skb, prog);
> +			break;

hmm. what's a point of double jump table? It's only burning cycles
in the fast path. We already have
prog = rcu_dereference(cgrp->bpf.effective[type]); if (prog) {...}
Could you do a variant of __cgroup_bpf_run_filter() instead ?
That doesnt't take 'skb' as an argument.
It will also solve scary looking NULL skb from patch 2:
__cgroup_bpf_run_filter(sk, NULL, ...

and to avoid copy-pasting first dozen lines of current
__cgroup_bpf_run_filter can be moved into a helper that
__cgroup_bpf_run_filter_skb and
__cgroup_bpf_run_filter_sk will call.
Or some other way to rearrange that code.
David Ahern Nov. 28, 2016, 8:31 p.m. UTC | #2
On 11/28/16 1:06 PM, Alexei Starovoitov wrote:
> On Mon, Nov 28, 2016 at 07:48:48AM -0800, David Ahern wrote:
>> Code move only; no functional change intended.
> 
> not quite...
> 
>> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
> ...
>>   * @sk: The socken sending or receiving traffic
>> @@ -153,11 +166,15 @@ int __cgroup_bpf_run_filter(struct sock *sk,
>>  
>>  	prog = rcu_dereference(cgrp->bpf.effective[type]);
>>  	if (prog) {
>> -		unsigned int offset = skb->data - skb_network_header(skb);
>> -
>> -		__skb_push(skb, offset);
>> -		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
>> -		__skb_pull(skb, offset);
>> +		switch (type) {
>> +		case BPF_CGROUP_INET_INGRESS:
>> +		case BPF_CGROUP_INET_EGRESS:
>> +			ret = __cgroup_bpf_run_filter_skb(skb, prog);
>> +			break;
> 
> hmm. what's a point of double jump table? It's only burning cycles
> in the fast path. We already have
> prog = rcu_dereference(cgrp->bpf.effective[type]); if (prog) {...}
> Could you do a variant of __cgroup_bpf_run_filter() instead ?
> That doesnt't take 'skb' as an argument.
> It will also solve scary looking NULL skb from patch 2:
> __cgroup_bpf_run_filter(sk, NULL, ...
> 
> and to avoid copy-pasting first dozen lines of current
> __cgroup_bpf_run_filter can be moved into a helper that
> __cgroup_bpf_run_filter_skb and
> __cgroup_bpf_run_filter_sk will call.
> Or some other way to rearrange that code.
> 

sure
1. rename the existing __cgroup_bpf_run_filter to __cgroup_bpf_run_filter_skb

2. create new __cgroup_bpf_run_filter_sk for this new program type. the new run_filter does not need the family or full sock checks for this use case so the common code is only the sock_cgroup_ptr and prog lookups.
diff mbox

Patch

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index a0ab43f264b0..d5746aec8f34 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -117,6 +117,19 @@  void __cgroup_bpf_update(struct cgroup *cgrp,
 	}
 }
 
+static int __cgroup_bpf_run_filter_skb(struct sk_buff *skb,
+				       struct bpf_prog *prog)
+{
+	unsigned int offset = skb->data - skb_network_header(skb);
+	int ret;
+
+	__skb_push(skb, offset);
+	ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
+	__skb_pull(skb, offset);
+
+	return ret;
+}
+
 /**
  * __cgroup_bpf_run_filter() - Run a program for packet filtering
  * @sk: The socken sending or receiving traffic
@@ -153,11 +166,15 @@  int __cgroup_bpf_run_filter(struct sock *sk,
 
 	prog = rcu_dereference(cgrp->bpf.effective[type]);
 	if (prog) {
-		unsigned int offset = skb->data - skb_network_header(skb);
-
-		__skb_push(skb, offset);
-		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
-		__skb_pull(skb, offset);
+		switch (type) {
+		case BPF_CGROUP_INET_INGRESS:
+		case BPF_CGROUP_INET_EGRESS:
+			ret = __cgroup_bpf_run_filter_skb(skb, prog);
+			break;
+		/* make gcc happy else complains about missing enum value */
+		default:
+			return 0;
+		}
 	}
 
 	rcu_read_unlock();
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1090d16a31c1..c2bce596e842 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -843,6 +843,7 @@  static int bpf_prog_attach(const union bpf_attr *attr)
 {
 	struct bpf_prog *prog;
 	struct cgroup *cgrp;
+	enum bpf_prog_type ptype;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -853,25 +854,26 @@  static int bpf_prog_attach(const union bpf_attr *attr)
 	switch (attr->attach_type) {
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
-		prog = bpf_prog_get_type(attr->attach_bpf_fd,
-					 BPF_PROG_TYPE_CGROUP_SKB);
-		if (IS_ERR(prog))
-			return PTR_ERR(prog);
-
-		cgrp = cgroup_get_from_fd(attr->target_fd);
-		if (IS_ERR(cgrp)) {
-			bpf_prog_put(prog);
-			return PTR_ERR(cgrp);
-		}
-
-		cgroup_bpf_update(cgrp, prog, attr->attach_type);
-		cgroup_put(cgrp);
+		ptype = BPF_PROG_TYPE_CGROUP_SKB;
 		break;
 
 	default:
 		return -EINVAL;
 	}
 
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp)) {
+		bpf_prog_put(prog);
+		return PTR_ERR(cgrp);
+	}
+
+	cgroup_bpf_update(cgrp, prog, attr->attach_type);
+	cgroup_put(cgrp);
+
 	return 0;
 }