diff mbox series

[bpf-next,v3,07/18] bpf: sockmap, add msg_cork_bytes() helper

Message ID 20180318195720.14466.35911.stgit@john-Precision-Tower-5810
State Accepted, archived
Delegated to: BPF Maintainers
Headers show
Series bpf,sockmap: sendmsg/sendfile ULP | expand

Commit Message

John Fastabend March 18, 2018, 7:57 p.m. UTC
In the case where we need a specific number of bytes before a
verdict can be assigned, even if the data spans multiple sendmsg
or sendfile calls. The BPF program may use msg_cork_bytes().

The extreme case is a user can call sendmsg repeatedly with
1-byte msg segments. Obviously, this is bad for performance but
is still valid. If the BPF program needs N bytes to validate
a header it can use msg_cork_bytes to specify N bytes and the
BPF program will not be called again until N bytes have been
accumulated. The infrastructure will attempt to coalesce data
if possible so in many cases (most my use cases at least) the
data will be in a single scatterlist element with data pointers
pointing to start/end of the element. However, this is dependent
on available memory so is not guaranteed. So BPF programs must
validate data pointer ranges, but this is the case anyways to
convince the verifier the accesses are valid.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/uapi/linux/bpf.h |    3 ++-
 net/core/filter.c        |   16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

Comments

David Miller March 18, 2018, 8:30 p.m. UTC | #1
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:20 -0700

> In the case where we need a specific number of bytes before a
> verdict can be assigned, even if the data spans multiple sendmsg
> or sendfile calls. The BPF program may use msg_cork_bytes().
> 
> The extreme case is a user can call sendmsg repeatedly with
> 1-byte msg segments. Obviously, this is bad for performance but
> is still valid. If the BPF program needs N bytes to validate
> a header it can use msg_cork_bytes to specify N bytes and the
> BPF program will not be called again until N bytes have been
> accumulated. The infrastructure will attempt to coalesce data
> if possible so in many cases (most my use cases at least) the
> data will be in a single scatterlist element with data pointers
> pointing to start/end of the element. However, this is dependent
> on available memory so is not guaranteed. So BPF programs must
> validate data pointer ranges, but this is the case anyways to
> convince the verifier the accesses are valid.
> 
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>

Acked-by: David S. Miller <davem@davemloft.net>
Alexei Starovoitov March 19, 2018, 4:30 p.m. UTC | #2
On Sun, Mar 18, 2018 at 12:57:20PM -0700, John Fastabend wrote:
> In the case where we need a specific number of bytes before a
> verdict can be assigned, even if the data spans multiple sendmsg
> or sendfile calls. The BPF program may use msg_cork_bytes().
> 
> The extreme case is a user can call sendmsg repeatedly with
> 1-byte msg segments. Obviously, this is bad for performance but
> is still valid. If the BPF program needs N bytes to validate
> a header it can use msg_cork_bytes to specify N bytes and the
> BPF program will not be called again until N bytes have been
> accumulated. The infrastructure will attempt to coalesce data
> if possible so in many cases (most my use cases at least) the
> data will be in a single scatterlist element with data pointers
> pointing to start/end of the element. However, this is dependent
> on available memory so is not guaranteed. So BPF programs must
> validate data pointer ranges, but this is the case anyways to
> convince the verifier the accesses are valid.
> 
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  include/uapi/linux/bpf.h |    3 ++-
>  net/core/filter.c        |   16 ++++++++++++++++
>  2 files changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index a557a2a..1765cfb 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -792,7 +792,8 @@ struct bpf_stack_build_id {
>  	FN(override_return),		\
>  	FN(sock_ops_cb_flags_set),	\
>  	FN(msg_redirect_map),		\
> -	FN(msg_apply_bytes),
> +	FN(msg_apply_bytes),		\
> +	FN(msg_cork_bytes),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 17d6775..0c9daf6 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -1942,6 +1942,20 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>  	.arg2_type      = ARG_ANYTHING,
>  };
>  
> +BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
> +{
> +	msg->cork_bytes = bytes;
> +	return 0;
> +}

my understanding that setting it here and in the other helper *_bytes to zero
will be effectively a nop. Right?

Acked-by: Alexei Starovoitov <ast@kernel.org>
John Fastabend March 19, 2018, 8 p.m. UTC | #3
On 03/19/2018 09:30 AM, Alexei Starovoitov wrote:
> On Sun, Mar 18, 2018 at 12:57:20PM -0700, John Fastabend wrote:
>> In the case where we need a specific number of bytes before a
>> verdict can be assigned, even if the data spans multiple sendmsg
>> or sendfile calls. The BPF program may use msg_cork_bytes().
>>
>> The extreme case is a user can call sendmsg repeatedly with
>> 1-byte msg segments. Obviously, this is bad for performance but
>> is still valid. If the BPF program needs N bytes to validate
>> a header it can use msg_cork_bytes to specify N bytes and the
>> BPF program will not be called again until N bytes have been
>> accumulated. The infrastructure will attempt to coalesce data
>> if possible so in many cases (most my use cases at least) the
>> data will be in a single scatterlist element with data pointers
>> pointing to start/end of the element. However, this is dependent
>> on available memory so is not guaranteed. So BPF programs must
>> validate data pointer ranges, but this is the case anyways to
>> convince the verifier the accesses are valid.
>>
>> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>> ---
>>  include/uapi/linux/bpf.h |    3 ++-
>>  net/core/filter.c        |   16 ++++++++++++++++
>>  2 files changed, 18 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index a557a2a..1765cfb 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -792,7 +792,8 @@ struct bpf_stack_build_id {
>>  	FN(override_return),		\
>>  	FN(sock_ops_cb_flags_set),	\
>>  	FN(msg_redirect_map),		\
>> -	FN(msg_apply_bytes),
>> +	FN(msg_apply_bytes),		\
>> +	FN(msg_cork_bytes),
>>  
>>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>   * function eBPF program intends to call
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 17d6775..0c9daf6 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -1942,6 +1942,20 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>>  	.arg2_type      = ARG_ANYTHING,
>>  };
>>  
>> +BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
>> +{
>> +	msg->cork_bytes = bytes;
>> +	return 0;
>> +}
> 
> my understanding that setting it here and in the other helper *_bytes to zero
> will be effectively a nop. Right?
> 

Correct, setting cork_bytes or apply_bytes to zero is just a nop.

> Acked-by: Alexei Starovoitov <ast@kernel.org>
>
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a557a2a..1765cfb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -792,7 +792,8 @@  struct bpf_stack_build_id {
 	FN(override_return),		\
 	FN(sock_ops_cb_flags_set),	\
 	FN(msg_redirect_map),		\
-	FN(msg_apply_bytes),
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 17d6775..0c9daf6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1942,6 +1942,20 @@  struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+	msg->cork_bytes = bytes;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+	.func           = bpf_msg_cork_bytes,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -3650,6 +3664,8 @@  static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
 		return &bpf_msg_redirect_map_proto;
 	case BPF_FUNC_msg_apply_bytes:
 		return &bpf_msg_apply_bytes_proto;
+	case BPF_FUNC_msg_cork_bytes:
+		return &bpf_msg_cork_bytes_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}