diff mbox series

[v4,bpf-next,1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

Message ID 20181018160649.1611530-2-songliubraving@fb.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: add cg_skb_is_valid_access | expand

Commit Message

Song Liu Oct. 18, 2018, 4:06 p.m. UTC
BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_pointers() and
bpf_restore_data_pointers() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/filter.h | 24 ++++++++++++++++++++++++
 kernel/bpf/cgroup.c    |  6 ++++++
 net/core/filter.c      | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 65 insertions(+), 1 deletion(-)

Comments

Daniel Borkmann Oct. 19, 2018, 12:14 a.m. UTC | #1
On 10/18/2018 06:06 PM, Song Liu wrote:
> BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
> skb. This patch enables direct access of skb for these programs.
> 
> Two helper functions bpf_compute_and_save_data_pointers() and
> bpf_restore_data_pointers() are introduced. There are used in
> __cgroup_bpf_run_filter_skb(), to compute proper data_end for the
> BPF program, and restore original data afterwards.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  include/linux/filter.h | 24 ++++++++++++++++++++++++
>  kernel/bpf/cgroup.c    |  6 ++++++
>  net/core/filter.c      | 36 +++++++++++++++++++++++++++++++++++-
>  3 files changed, 65 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 5771874bc01e..96b3ee7f14c9 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -548,6 +548,30 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
>  	cb->data_end  = skb->data + skb_headlen(skb);
>  }
>  
> +/* Similar to bpf_compute_data_pointers(), except that save orginal
> + * data in cb->data and cb->meta_data for restore.
> + */
> +static inline void bpf_compute_and_save_data_pointers(
> +	struct sk_buff *skb, void *saved_pointers[2])
> +{
> +	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
> +
> +	saved_pointers[0] = cb->data_meta;
> +	saved_pointers[1] = cb->data_end;
> +	cb->data_meta = skb->data - skb_metadata_len(skb);
> +	cb->data_end  = skb->data + skb_headlen(skb);

Hmm, can you elaborate why populating data_meta here ...

> +}
> +
> +/* Restore data saved by bpf_compute_data_pointers(). */
> +static inline void bpf_restore_data_pointers(
> +	struct sk_buff *skb, void *saved_pointers[2])
> +{
> +	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
> +
> +	cb->data_meta = saved_pointers[0];
> +	cb->data_end = saved_pointers[1];;
> +}
> +
>  static inline u8 *bpf_skb_cb(struct sk_buff *skb)
>  {
>  	/* eBPF programs may read/write skb->cb[] area to transfer meta
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 00f6ed2e4f9a..5f5180104ddc 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -554,6 +554,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
>  	unsigned int offset = skb->data - skb_network_header(skb);
>  	struct sock *save_sk;
>  	struct cgroup *cgrp;
> +	void *saved_pointers[2];
>  	int ret;
>  
>  	if (!sk || !sk_fullsock(sk))
> @@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
>  	save_sk = skb->sk;
>  	skb->sk = sk;
>  	__skb_push(skb, offset);
> +
> +	/* compute pointers for the bpf prog */
> +	bpf_compute_and_save_data_pointers(skb, saved_pointers);
> +
>  	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
>  				 bpf_prog_run_save_cb);
> +	bpf_restore_data_pointers(skb, saved_pointers);
>  	__skb_pull(skb, offset);
>  	skb->sk = save_sk;
>  	return ret == 1 ? 0 : -EPERM;
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 1a3ac6c46873..e3ca30bd6840 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
>  	return bpf_skb_is_valid_access(off, size, type, prog, info);
>  }
>  
> +static bool cg_skb_is_valid_access(int off, int size,
> +				   enum bpf_access_type type,
> +				   const struct bpf_prog *prog,
> +				   struct bpf_insn_access_aux *info)
> +{
> +	switch (off) {
> +	case bpf_ctx_range(struct __sk_buff, tc_classid):
> +	case bpf_ctx_range(struct __sk_buff, data_meta):
> +	case bpf_ctx_range(struct __sk_buff, flow_keys):
> +		return false;

... if it's disallowed anyway (disallowing it is the right thing to do,
but no need to save/restore then..)?

> +	}
> +	if (type == BPF_WRITE) {
> +		switch (off) {
> +		case bpf_ctx_range(struct __sk_buff, mark):
> +		case bpf_ctx_range(struct __sk_buff, priority):
> +		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
> +			break;
> +		default:
> +			return false;
> +		}
> +	}
> +
> +	switch (off) {
> +	case bpf_ctx_range(struct __sk_buff, data):
> +		info->reg_type = PTR_TO_PACKET;
> +		break;
> +	case bpf_ctx_range(struct __sk_buff, data_end):
> +		info->reg_type = PTR_TO_PACKET_END;
> +		break;
> +	}
> +
> +	return bpf_skb_is_valid_access(off, size, type, prog, info);
> +}
> +
>  static bool lwt_is_valid_access(int off, int size,
>  				enum bpf_access_type type,
>  				const struct bpf_prog *prog,
> @@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
>  
>  const struct bpf_verifier_ops cg_skb_verifier_ops = {
>  	.get_func_proto		= cg_skb_func_proto,
> -	.is_valid_access	= sk_filter_is_valid_access,
> +	.is_valid_access	= cg_skb_is_valid_access,
>  	.convert_ctx_access	= bpf_convert_ctx_access,
>  };
>  
>
Alexei Starovoitov Oct. 19, 2018, 1:33 a.m. UTC | #2
On 10/18/18 5:14 PM, Daniel Borkmann wrote:
>> +	case bpf_ctx_range(struct __sk_buff, data_meta):
>> +	case bpf_ctx_range(struct __sk_buff, flow_keys):
>> +		return false;
> ... if it's disallowed anyway (disallowing it is the right thing to do,
> but no need to save/restore then..)?
>

that's a good point.
why shouldn't we allow cg_skb to access data_meta?
xdp can set it and cgroup_skb_ingress will consume it here.
Alexei Starovoitov Oct. 19, 2018, 2:03 a.m. UTC | #3
On 10/18/18 6:33 PM, Alexei Starovoitov wrote:
> On 10/18/18 5:14 PM, Daniel Borkmann wrote:
>>> +    case bpf_ctx_range(struct __sk_buff, data_meta):
>>> +    case bpf_ctx_range(struct __sk_buff, flow_keys):
>>> +        return false;
>> ... if it's disallowed anyway (disallowing it is the right thing to do,
>> but no need to save/restore then..)?
>>
>
> that's a good point.
> why shouldn't we allow cg_skb to access data_meta?
> xdp can set it and cgroup_skb_ingress will consume it here.

I'll take it back.
When xdp doesn't set meta_data it will be zero and
bpf_compute_data_pointers() will point data_meta to skb->data.
On ingress that's eth header, but for tx it will point
to reserved space for future eth header.
So we cannot do that.
Let's keep it disabled and adjust
bpf_compute_and_save_data_pointers() to save only 'data' pointer.
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..96b3ee7f14c9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,30 @@  static inline void bpf_compute_data_pointers(struct sk_buff *skb)
 	cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_pointers(
+	struct sk_buff *skb, void *saved_pointers[2])
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	saved_pointers[0] = cb->data_meta;
+	saved_pointers[1] = cb->data_end;
+	cb->data_meta = skb->data - skb_metadata_len(skb);
+	cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_pointers(
+	struct sk_buff *skb, void *saved_pointers[2])
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	cb->data_meta = saved_pointers[0];
+	cb->data_end = saved_pointers[1];;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
 	/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..5f5180104ddc 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -554,6 +554,7 @@  int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	unsigned int offset = skb->data - skb_network_header(skb);
 	struct sock *save_sk;
 	struct cgroup *cgrp;
+	void *saved_pointers[2];
 	int ret;
 
 	if (!sk || !sk_fullsock(sk))
@@ -566,8 +567,13 @@  int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	save_sk = skb->sk;
 	skb->sk = sk;
 	__skb_push(skb, offset);
+
+	/* compute pointers for the bpf prog */
+	bpf_compute_and_save_data_pointers(skb, saved_pointers);
+
 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 				 bpf_prog_run_save_cb);
+	bpf_restore_data_pointers(skb, saved_pointers);
 	__skb_pull(skb, offset);
 	skb->sk = save_sk;
 	return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@  static bool sk_filter_is_valid_access(int off, int size,
 	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   const struct bpf_prog *prog,
+				   struct bpf_insn_access_aux *info)
+{
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
+		return false;
+	}
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, priority):
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
 				enum bpf_access_type type,
 				const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@  const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
 	.get_func_proto		= cg_skb_func_proto,
-	.is_valid_access	= sk_filter_is_valid_access,
+	.is_valid_access	= cg_skb_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 };