diff mbox series

[bpf-next,v6,05/11] bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock

Message ID 20180120014548.2941040-6-brakmo@fb.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: More sock_ops callbacks | expand

Commit Message

Lawrence Brakmo Jan. 20, 2018, 1:45 a.m. UTC
Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary
use is to determine if there should be calls to sock_ops bpf program at
various points in the TCP code. The field is initialized to zero,
disabling the calls. A sock_ops BPF program can set it, per connection and
as necessary, when the connection is established.

It also adds support for reading and writting the field within a
sock_ops BPF program. Reading is done by accessing the field directly.
However, writing is done through the helper function
bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program
is trying to set a callback that is not supported in the current kernel
(i.e. running an older kernel). The helper function returns 0 if it was
able to set all of the bits set in the argument, a positive number
containing the bits that could not be set, or -EINVAL if the socket is
not a full TCP socket.

Examples of where one could call the bpf program:

1) When RTO fires
2) When a packet is retransmitted
3) When the connection terminates
4) When a packet is sent
5) When a packet is received

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/linux/tcp.h      | 11 +++++++++++
 include/uapi/linux/bpf.h | 12 +++++++++++-
 include/uapi/linux/tcp.h |  5 +++++
 net/core/filter.c        | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)

Comments

Alexei Starovoitov Jan. 20, 2018, 3:52 a.m. UTC | #1
On Fri, Jan 19, 2018 at 05:45:42PM -0800, Lawrence Brakmo wrote:
> Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary
> use is to determine if there should be calls to sock_ops bpf program at
> various points in the TCP code. The field is initialized to zero,
> disabling the calls. A sock_ops BPF program can set it, per connection and
> as necessary, when the connection is established.
> 
> It also adds support for reading and writting the field within a
> sock_ops BPF program. Reading is done by accessing the field directly.
> However, writing is done through the helper function
> bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program
> is trying to set a callback that is not supported in the current kernel
> (i.e. running an older kernel). The helper function returns 0 if it was
> able to set all of the bits set in the argument, a positive number
> containing the bits that could not be set, or -EINVAL if the socket is
> not a full TCP socket.
...
> +/* Sock_ops bpf program related variables */
> +#ifdef CONFIG_BPF
> +	u8	bpf_sock_ops_cb_flags;  /* Control calling BPF programs
> +					 * values defined in uapi/linux/tcp.h

I guess we can extend u8 into u16 or more if necessary in the future.

> + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
> + *     Set callback flags for sock_ops
> + *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
> + *     @flags: flags value
> + *     Return: 0 for no error
> + *             -EINVAL if there is no full tcp socket
> + *             bits in flags that are not supported by current kernel
...
> +BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
> +	   int, argval)
> +{
> +	struct sock *sk = bpf_sock->sk;
> +	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
> +
> +	if (!sk_fullsock(sk))
> +		return -EINVAL;
> +
> +#ifdef CONFIG_INET
> +	if (val)
> +		tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
> +
> +	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);

interesting idea! took me some time to realize the potential
of such semantics, but now I like it a lot.
It blends 'set good flag' with 'which flags are supported' logic
into single helper. Nice.
Thanks for adding a test for both ways.
Acked-by: Alexei Starovoitov <ast@kernel.org>

Eric, does this approach address your concerns?
Lawrence Brakmo Jan. 20, 2018, 7:50 a.m. UTC | #2
On 1/19/18, 7:52 PM, "Alexei Starovoitov" <alexei.starovoitov@gmail.com> wrote:

    On Fri, Jan 19, 2018 at 05:45:42PM -0800, Lawrence Brakmo wrote:
    > Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary

    > use is to determine if there should be calls to sock_ops bpf program at

    > various points in the TCP code. The field is initialized to zero,

    > disabling the calls. A sock_ops BPF program can set it, per connection and

    > as necessary, when the connection is established.

    > 

    > It also adds support for reading and writting the field within a

    > sock_ops BPF program. Reading is done by accessing the field directly.

    > However, writing is done through the helper function

    > bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program

    > is trying to set a callback that is not supported in the current kernel

    > (i.e. running an older kernel). The helper function returns 0 if it was

    > able to set all of the bits set in the argument, a positive number

    > containing the bits that could not be set, or -EINVAL if the socket is

    > not a full TCP socket.

    ...
    > +/* Sock_ops bpf program related variables */

    > +#ifdef CONFIG_BPF

    > +	u8	bpf_sock_ops_cb_flags;  /* Control calling BPF programs

    > +					 * values defined in uapi/linux/tcp.h

    
    I guess we can extend u8 into u16 or more if necessary in the future.
    
Yes, that was my thought.

    > + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)

    > + *     Set callback flags for sock_ops

    > + *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct

    > + *     @flags: flags value

    > + *     Return: 0 for no error

    > + *             -EINVAL if there is no full tcp socket

    > + *             bits in flags that are not supported by current kernel

    ...
    > +BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,

    > +	   int, argval)

    > +{

    > +	struct sock *sk = bpf_sock->sk;

    > +	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;

    > +

    > +	if (!sk_fullsock(sk))

    > +		return -EINVAL;

    > +

    > +#ifdef CONFIG_INET

    > +	if (val)

    > +		tcp_sk(sk)->bpf_sock_ops_cb_flags = val;

    > +

    > +	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);

    
    interesting idea! took me some time to realize the potential
    of such semantics, but now I like it a lot.
    It blends 'set good flag' with 'which flags are supported' logic
    into single helper. Nice.
    Thanks for adding a test for both ways.
    Acked-by: Alexei Starovoitov <ast@kernel.org>

    
    Eric, does this approach address your concerns?
Eric Dumazet Jan. 23, 2018, 5:29 p.m. UTC | #3
On Fri, 2018-01-19 at 19:52 -0800, Alexei Starovoitov wrote:
> On Fri, Jan 19, 2018 at 05:45:42PM -0800, Lawrence Brakmo wrote:
> > Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary
> > use is to determine if there should be calls to sock_ops bpf program at
> > various points in the TCP code. The field is initialized to zero,
> > disabling the calls. A sock_ops BPF program can set it, per connection and
> > as necessary, when the connection is established.
> > 
> > It also adds support for reading and writting the field within a
> > sock_ops BPF program. Reading is done by accessing the field directly.
> > However, writing is done through the helper function
> > bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program
> > is trying to set a callback that is not supported in the current kernel
> > (i.e. running an older kernel). The helper function returns 0 if it was
> > able to set all of the bits set in the argument, a positive number
> > containing the bits that could not be set, or -EINVAL if the socket is
> > not a full TCP socket.
> 
> ...
> > +/* Sock_ops bpf program related variables */
> > +#ifdef CONFIG_BPF
> > +	u8	bpf_sock_ops_cb_flags;  /* Control calling BPF programs
> > +					 * values defined in uapi/linux/tcp.h
> 
> I guess we can extend u8 into u16 or more if necessary in the future.
> 
> > + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
> > + *     Set callback flags for sock_ops
> > + *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
> > + *     @flags: flags value
> > + *     Return: 0 for no error
> > + *             -EINVAL if there is no full tcp socket
> > + *             bits in flags that are not supported by current kernel
> 
> ...
> > +BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
> > +	   int, argval)
> > +{
> > +	struct sock *sk = bpf_sock->sk;
> > +	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
> > +
> > +	if (!sk_fullsock(sk))
> > +		return -EINVAL;
> > +
> > +#ifdef CONFIG_INET
> > +	if (val)
> > +		tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
> > +
> > +	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
> 
> interesting idea! took me some time to realize the potential
> of such semantics, but now I like it a lot.
> It blends 'set good flag' with 'which flags are supported' logic
> into single helper. Nice.
> Thanks for adding a test for both ways.
> Acked-by: Alexei Starovoitov <ast@kernel.org>
> 
> Eric, does this approach address your concerns?


Yes, this seems fine, thanks.
diff mbox series

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f095..8f4c549 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -335,6 +335,17 @@  struct tcp_sock {
 
 	int			linger2;
 
+
+/* Sock_ops bpf program related variables */
+#ifdef CONFIG_BPF
+	u8	bpf_sock_ops_cb_flags;  /* Control calling BPF programs
+					 * values defined in uapi/linux/tcp.h
+					 */
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
+#else
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
+#endif
+
 /* Receiver side RTT estimation */
 	struct {
 		u32	rtt_us;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8d5874c..7573f5b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -642,6 +642,14 @@  union bpf_attr {
  *     @optlen: length of optval in bytes
  *     Return: 0 or negative error
  *
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ *     Set callback flags for sock_ops
+ *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ *     @flags: flags value
+ *     Return: 0 for no error
+ *             -EINVAL if there is no full tcp socket
+ *             bits in flags that are not supported by current kernel
+ *
  * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
  *     Grow or shrink room in sk_buff.
  *     @skb: pointer to skb
@@ -748,7 +756,8 @@  union bpf_attr {
 	FN(perf_event_read_value),	\
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
-	FN(override_return),
+	FN(override_return),		\
+	FN(sock_ops_cb_flags_set),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -969,6 +978,7 @@  struct bpf_sock_ops {
 				 */
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
+	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
 };
 
 /* List of known BPF sock_ops operators.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b4a4f64..d1df2f6 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -268,4 +268,9 @@  struct tcp_diag_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];
 };
 
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0		/* Mask of all currently
+							 * supported cb flags
+							 */
+
 #endif /* _UAPI_LINUX_TCP_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index 1ff36ca..c9411dc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3324,6 +3324,33 @@  static const struct bpf_func_proto bpf_getsockopt_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
+	   int, argval)
+{
+	struct sock *sk = bpf_sock->sk;
+	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
+
+	if (!sk_fullsock(sk))
+		return -EINVAL;
+
+#ifdef CONFIG_INET
+	if (val)
+		tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+
+	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
+#else
+	return -EINVAL;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
+	.func		= bpf_sock_ops_cb_flags_set,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -3504,6 +3531,8 @@  static const struct bpf_func_proto *
 		return &bpf_setsockopt_proto;
 	case BPF_FUNC_getsockopt:
 		return &bpf_getsockopt_proto;
+	case BPF_FUNC_sock_ops_cb_flags_set:
+		return &bpf_sock_ops_cb_flags_set_proto;
 	case BPF_FUNC_sock_map_update:
 		return &bpf_sock_map_update_proto;
 	default:
@@ -4541,6 +4570,11 @@  static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	case offsetof(struct bpf_sock_ops, srtt_us):
 		SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
 		break;
+
+	case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
+		SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
+				   struct tcp_sock);
+		break;
 	}
 	return insn - insn_buf;
 }