diff mbox series

[v2,bpf-next] bpf: allow BPF programs access skb_shared_info->gso_segs field

Message ID 20190117233157.1467-1-edumazet@google.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [v2,bpf-next] bpf: allow BPF programs access skb_shared_info->gso_segs field | expand

Commit Message

Eric Dumazet Jan. 17, 2019, 11:31 p.m. UTC
This adds the ability to read gso_segs from a BPF program.

v2: refined Eddie Hao patch to address Alexei feedback.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eddie Hao <eddieh@google.com>
Cc: Vlad Dumitrescu <vladum@google.com>
Cc: Xiaotian Pei <xiaotian@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
---
 include/uapi/linux/bpf.h                    |  1 +
 net/core/filter.c                           | 21 ++++++++++++
 tools/include/uapi/linux/bpf.h              |  1 +
 tools/testing/selftests/bpf/test_verifier.c | 36 +++++++++++++++++++++
 4 files changed, 59 insertions(+)

Comments

Martin KaFai Lau Jan. 18, 2019, 6:42 p.m. UTC | #1
On Thu, Jan 17, 2019 at 03:31:57PM -0800, Eric Dumazet wrote:
> This adds the ability to read gso_segs from a BPF program.
> 
> v2: refined Eddie Hao patch to address Alexei feedback.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Eddie Hao <eddieh@google.com>
> Cc: Vlad Dumitrescu <vladum@google.com>
> Cc: Xiaotian Pei <xiaotian@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> ---
>  include/uapi/linux/bpf.h                    |  1 +
>  net/core/filter.c                           | 21 ++++++++++++
>  tools/include/uapi/linux/bpf.h              |  1 +
>  tools/testing/selftests/bpf/test_verifier.c | 36 +++++++++++++++++++++
>  4 files changed, 59 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 91c43884f295f60a85268ddf0020bf8aa47f8329..2940a9854f6d8e493518ca894e0c9c630ae4ab7a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2540,6 +2540,7 @@ struct __sk_buff {
>  	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
>  	__u64 tstamp;
>  	__u32 wire_len;
> +	__u32 gso_segs;
>  };
>  
>  struct bpf_tunnel_key {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2b3b436ef5457bf44c99780d6dec0b5f403f005c..a6ff5d9a04cf06926ee75cbc523456d12baf25ae 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6700,6 +6700,27 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
>  							     target_size));
>  		break;
>  
> +	case offsetof(struct __sk_buff, gso_segs):
> +		/* si->dst_reg = skb_shinfo(SKB); */
> +#ifdef NET_SKBUFF_DATA_USES_OFFSET
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
> +				      si->dst_reg, si->src_reg,
> +				      offsetof(struct sk_buff, head));
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
> +				      BPF_REG_TMP, si->src_reg,
> +				      offsetof(struct sk_buff, end));
I am not sure BPF_REG_TMP can be used for non-classic BPF.
The earlier insn could be using BPF_REG_TMP (which is BPF_REG_2) and
R2 would become loss after this BPF_LDX_MEM.

Daniel, can BPF_REG_AX be used here as a tmp?

> +		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_TMP);
> +#else
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
> +				      si->dst_reg, si->src_reg,
> +				      offsetof(struct sk_buff, end));
> +#endif
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
> +				      si->dst_reg, si->dst_reg,
> +				      bpf_target_off(struct skb_shared_info,
> +						     gso_segs, 2,
> +						     target_size));
> +		break;
>  	case offsetof(struct __sk_buff, wire_len):
>  		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
>  
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 91c43884f295f60a85268ddf0020bf8aa47f8329..2940a9854f6d8e493518ca894e0c9c630ae4ab7a 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -2540,6 +2540,7 @@ struct __sk_buff {
>  	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
>  	__u64 tstamp;
>  	__u32 wire_len;
> +	__u32 gso_segs;
>  };
>  
>  struct bpf_tunnel_key {
> diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
> index 2fd90d4568926d13542783c870507d43a6d6bb64..2c46531044bdf9ec1e4fa47e2c94c9edb0ac3d08 100644
> --- a/tools/testing/selftests/bpf/test_verifier.c
> +++ b/tools/testing/selftests/bpf/test_verifier.c
> @@ -5663,6 +5663,42 @@ static struct bpf_test tests[] = {
>  		.result = ACCEPT,
>  		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
>  	},
> +	{
> +		"read gso_segs from CGROUP_SKB",
> +		.insns = {
> +			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
> +				    offsetof(struct __sk_buff, gso_segs)),
> +			BPF_MOV64_IMM(BPF_REG_0, 0),
> +			BPF_EXIT_INSN(),
> +		},
> +		.result = ACCEPT,
> +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
> +	},
> +	{
> +		"write gso_segs from CGROUP_SKB",
> +		.insns = {
> +			BPF_MOV64_IMM(BPF_REG_0, 0),
> +			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
> +				    offsetof(struct __sk_buff, gso_segs)),
> +			BPF_MOV64_IMM(BPF_REG_0, 0),
> +			BPF_EXIT_INSN(),
> +		},
> +		.result = REJECT,
> +		.result_unpriv = REJECT,
> +		.errstr = "invalid bpf_context access off=164 size=4",
> +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
> +	},
> +	{
> +		"read gso_segs from CLS",
> +		.insns = {
> +			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
> +				    offsetof(struct __sk_buff, gso_segs)),
> +			BPF_MOV64_IMM(BPF_REG_0, 0),
> +			BPF_EXIT_INSN(),
> +		},
> +		.result = ACCEPT,
> +		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
> +	},
>  	{
>  		"multiple registers share map_lookup_elem result",
>  		.insns = {
> -- 
> 2.20.1.321.g9e740568ce-goog
>
Daniel Borkmann Jan. 23, 2019, 11:55 a.m. UTC | #2
On 01/18/2019 07:42 PM, Martin Lau wrote:
> On Thu, Jan 17, 2019 at 03:31:57PM -0800, Eric Dumazet wrote:
>> This adds the ability to read gso_segs from a BPF program.
>>
>> v2: refined Eddie Hao patch to address Alexei feedback.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Cc: Eddie Hao <eddieh@google.com>
>> Cc: Vlad Dumitrescu <vladum@google.com>
>> Cc: Xiaotian Pei <xiaotian@google.com>
>> Cc: Yuchung Cheng <ycheng@google.com>
>> ---
>>  include/uapi/linux/bpf.h                    |  1 +
>>  net/core/filter.c                           | 21 ++++++++++++
>>  tools/include/uapi/linux/bpf.h              |  1 +
>>  tools/testing/selftests/bpf/test_verifier.c | 36 +++++++++++++++++++++
>>  4 files changed, 59 insertions(+)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 91c43884f295f60a85268ddf0020bf8aa47f8329..2940a9854f6d8e493518ca894e0c9c630ae4ab7a 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -2540,6 +2540,7 @@ struct __sk_buff {
>>  	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
>>  	__u64 tstamp;
>>  	__u32 wire_len;
>> +	__u32 gso_segs;
>>  };
>>  
>>  struct bpf_tunnel_key {
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 2b3b436ef5457bf44c99780d6dec0b5f403f005c..a6ff5d9a04cf06926ee75cbc523456d12baf25ae 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -6700,6 +6700,27 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
>>  							     target_size));
>>  		break;
>>  
>> +	case offsetof(struct __sk_buff, gso_segs):
>> +		/* si->dst_reg = skb_shinfo(SKB); */
>> +#ifdef NET_SKBUFF_DATA_USES_OFFSET
>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
>> +				      si->dst_reg, si->src_reg,
>> +				      offsetof(struct sk_buff, head));
>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
>> +				      BPF_REG_TMP, si->src_reg,
>> +				      offsetof(struct sk_buff, end));
> I am not sure BPF_REG_TMP can be used for non-classic BPF.
> The earlier insn could be using BPF_REG_TMP (which is BPF_REG_2) and
> R2 would become loss after this BPF_LDX_MEM.

Yes, this will indeed corrupt R2 register. BPF_REG_TMP can only be used for
reg mapping out of classic BPF.

> Daniel, can BPF_REG_AX be used here as a tmp?

BPF_REG_AX would work in this case, yes. Neither of the above insns are used
in blinding nor would they collide with current verifier rewrites.

>> +		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_TMP);
>> +#else
>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
>> +				      si->dst_reg, si->src_reg,
>> +				      offsetof(struct sk_buff, end));
>> +#endif
>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
>> +				      si->dst_reg, si->dst_reg,
>> +				      bpf_target_off(struct skb_shared_info,
>> +						     gso_segs, 2,
>> +						     target_size));
>> +		break;
>>  	case offsetof(struct __sk_buff, wire_len):
>>  		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
>>  
>> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
>> index 91c43884f295f60a85268ddf0020bf8aa47f8329..2940a9854f6d8e493518ca894e0c9c630ae4ab7a 100644
>> --- a/tools/include/uapi/linux/bpf.h
>> +++ b/tools/include/uapi/linux/bpf.h
>> @@ -2540,6 +2540,7 @@ struct __sk_buff {
>>  	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
>>  	__u64 tstamp;
>>  	__u32 wire_len;
>> +	__u32 gso_segs;
>>  };
>>  
>>  struct bpf_tunnel_key {
>> diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
>> index 2fd90d4568926d13542783c870507d43a6d6bb64..2c46531044bdf9ec1e4fa47e2c94c9edb0ac3d08 100644
>> --- a/tools/testing/selftests/bpf/test_verifier.c
>> +++ b/tools/testing/selftests/bpf/test_verifier.c
>> @@ -5663,6 +5663,42 @@ static struct bpf_test tests[] = {
>>  		.result = ACCEPT,
>>  		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
>>  	},
>> +	{
>> +		"read gso_segs from CGROUP_SKB",
>> +		.insns = {
>> +			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
>> +				    offsetof(struct __sk_buff, gso_segs)),
>> +			BPF_MOV64_IMM(BPF_REG_0, 0),
>> +			BPF_EXIT_INSN(),
>> +		},
>> +		.result = ACCEPT,
>> +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
>> +	},
>> +	{
>> +		"write gso_segs from CGROUP_SKB",
>> +		.insns = {
>> +			BPF_MOV64_IMM(BPF_REG_0, 0),
>> +			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
>> +				    offsetof(struct __sk_buff, gso_segs)),
>> +			BPF_MOV64_IMM(BPF_REG_0, 0),
>> +			BPF_EXIT_INSN(),
>> +		},
>> +		.result = REJECT,
>> +		.result_unpriv = REJECT,
>> +		.errstr = "invalid bpf_context access off=164 size=4",
>> +		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
>> +	},
>> +	{
>> +		"read gso_segs from CLS",
>> +		.insns = {
>> +			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
>> +				    offsetof(struct __sk_buff, gso_segs)),
>> +			BPF_MOV64_IMM(BPF_REG_0, 0),
>> +			BPF_EXIT_INSN(),
>> +		},
>> +		.result = ACCEPT,
>> +		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
>> +	},
>>  	{
>>  		"multiple registers share map_lookup_elem result",
>>  		.insns = {
>> -- 
>> 2.20.1.321.g9e740568ce-goog
>>
Eric Dumazet Jan. 23, 2019, 4:37 p.m. UTC | #3
On Wed, Jan 23, 2019 at 3:55 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 01/18/2019 07:42 PM, Martin Lau wrote:
> > On Thu, Jan 17, 2019 at 03:31:57PM -0800, Eric Dumazet wrote:
> >> This adds the ability to read gso_segs from a BPF program.
> >>
> >> v2: refined Eddie Hao patch to address Alexei feedback.
> >>
> >> Signed-off-by: Eric Dumazet <edumazet@google.com>
> >> Cc: Eddie Hao <eddieh@google.com>
> >> Cc: Vlad Dumitrescu <vladum@google.com>
> >> Cc: Xiaotian Pei <xiaotian@google.com>
> >> Cc: Yuchung Cheng <ycheng@google.com>
> >> ---
> >>  include/uapi/linux/bpf.h                    |  1 +
> >>  net/core/filter.c                           | 21 ++++++++++++
> >>  tools/include/uapi/linux/bpf.h              |  1 +
> >>  tools/testing/selftests/bpf/test_verifier.c | 36 +++++++++++++++++++++
> >>  4 files changed, 59 insertions(+)
> >>
> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> >> index 91c43884f295f60a85268ddf0020bf8aa47f8329..2940a9854f6d8e493518ca894e0c9c630ae4ab7a 100644
> >> --- a/include/uapi/linux/bpf.h
> >> +++ b/include/uapi/linux/bpf.h
> >> @@ -2540,6 +2540,7 @@ struct __sk_buff {
> >>      __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
> >>      __u64 tstamp;
> >>      __u32 wire_len;
> >> +    __u32 gso_segs;
> >>  };
> >>
> >>  struct bpf_tunnel_key {
> >> diff --git a/net/core/filter.c b/net/core/filter.c
> >> index 2b3b436ef5457bf44c99780d6dec0b5f403f005c..a6ff5d9a04cf06926ee75cbc523456d12baf25ae 100644
> >> --- a/net/core/filter.c
> >> +++ b/net/core/filter.c
> >> @@ -6700,6 +6700,27 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
> >>                                                           target_size));
> >>              break;
> >>
> >> +    case offsetof(struct __sk_buff, gso_segs):
> >> +            /* si->dst_reg = skb_shinfo(SKB); */
> >> +#ifdef NET_SKBUFF_DATA_USES_OFFSET
> >> +            *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
> >> +                                  si->dst_reg, si->src_reg,
> >> +                                  offsetof(struct sk_buff, head));
> >> +            *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
> >> +                                  BPF_REG_TMP, si->src_reg,
> >> +                                  offsetof(struct sk_buff, end));
> > I am not sure BPF_REG_TMP can be used for non-classic BPF.
> > The earlier insn could be using BPF_REG_TMP (which is BPF_REG_2) and
> > R2 would become loss after this BPF_LDX_MEM.
>
> Yes, this will indeed corrupt R2 register. BPF_REG_TMP can only be used for
> reg mapping out of classic BPF.
>
> > Daniel, can BPF_REG_AX be used here as a tmp?
>
> BPF_REG_AX would work in this case, yes. Neither of the above insns are used
> in blinding nor would they collide with current verifier rewrites.
>

OK, I'll send a v3 using REG_AX then, thanks Daniel & Martin !
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 91c43884f295f60a85268ddf0020bf8aa47f8329..2940a9854f6d8e493518ca894e0c9c630ae4ab7a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2540,6 +2540,7 @@  struct __sk_buff {
 	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 	__u64 tstamp;
 	__u32 wire_len;
+	__u32 gso_segs;
 };
 
 struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index 2b3b436ef5457bf44c99780d6dec0b5f403f005c..a6ff5d9a04cf06926ee75cbc523456d12baf25ae 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6700,6 +6700,27 @@  static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 							     target_size));
 		break;
 
+	case offsetof(struct __sk_buff, gso_segs):
+		/* si->dst_reg = skb_shinfo(SKB); */
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, head));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+				      BPF_REG_TMP, si->src_reg,
+				      offsetof(struct sk_buff, end));
+		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_TMP);
+#else
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, end));
+#endif
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
+				      si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct skb_shared_info,
+						     gso_segs, 2,
+						     target_size));
+		break;
 	case offsetof(struct __sk_buff, wire_len):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 91c43884f295f60a85268ddf0020bf8aa47f8329..2940a9854f6d8e493518ca894e0c9c630ae4ab7a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2540,6 +2540,7 @@  struct __sk_buff {
 	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 	__u64 tstamp;
 	__u32 wire_len;
+	__u32 gso_segs;
 };
 
 struct bpf_tunnel_key {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 2fd90d4568926d13542783c870507d43a6d6bb64..2c46531044bdf9ec1e4fa47e2c94c9edb0ac3d08 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5663,6 +5663,42 @@  static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
 	},
+	{
+		"read gso_segs from CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, gso_segs)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"write gso_segs from CGROUP_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, gso_segs)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.result_unpriv = REJECT,
+		.errstr = "invalid bpf_context access off=164 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"read gso_segs from CLS",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, gso_segs)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 	{
 		"multiple registers share map_lookup_elem result",
 		.insns = {