diff mbox series

[RFC,v4,bpf-next,05/12] bpf: tcp: Add bpf_skops_established()

Message ID 20200803231045.2683198-1-kafai@fb.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series BPF TCP header options | expand

Commit Message

Martin KaFai Lau Aug. 3, 2020, 11:10 p.m. UTC
In tcp_init_transfer(), it currently calls the bpf prog to give it a
chance to handle the just "ESTABLISHED" event (e.g. do setsockopt
on the newly established sk).  Right now, it is done by calling the
general purpose tcp_call_bpf().

In the later patch, it also needs to pass the just-received skb which
concludes the 3 way handshake. E.g. the SYNACK received at the active side.
The bpf prog can then learn some specific header options written by the
peer's bpf-prog and potentially do setsockopt on the newly established sk.
Thus, instead of reusing the general purpose tcp_call_bpf(), a new function
bpf_skops_established() is added to allow passing the "skb" to the bpf prog.
The actual skb passing from bpf_skops_established() to the bpf prog
will happen together in a later patch which has the necessary bpf pieces.

A "skb" arg is also added to tcp_init_transfer() such that
it can then be passed to bpf_skops_established().

Calling the new bpf_skops_established() instead of tcp_call_bpf()
should be a noop in this patch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/net/tcp.h       |  2 +-
 net/ipv4/tcp_fastopen.c |  2 +-
 net/ipv4/tcp_input.c    | 32 ++++++++++++++++++++++++++++----
 3 files changed, 30 insertions(+), 6 deletions(-)

Comments

John Fastabend Aug. 4, 2020, 1:28 a.m. UTC | #1
Martin KaFai Lau wrote:
> In tcp_init_transfer(), it currently calls the bpf prog to give it a
> chance to handle the just "ESTABLISHED" event (e.g. do setsockopt
> on the newly established sk).  Right now, it is done by calling the
> general purpose tcp_call_bpf().
> 
> In the later patch, it also needs to pass the just-received skb which
> concludes the 3 way handshake. E.g. the SYNACK received at the active side.
> The bpf prog can then learn some specific header options written by the
> peer's bpf-prog and potentially do setsockopt on the newly established sk.
> Thus, instead of reusing the general purpose tcp_call_bpf(), a new function
> bpf_skops_established() is added to allow passing the "skb" to the bpf prog.
> The actual skb passing from bpf_skops_established() to the bpf prog
> will happen together in a later patch which has the necessary bpf pieces.
> 
> A "skb" arg is also added to tcp_init_transfer() such that
> it can then be passed to bpf_skops_established().
> 
> Calling the new bpf_skops_established() instead of tcp_call_bpf()
> should be a noop in this patch.

Yep, looks like a noop.

> 
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>

Acked-by: John Fastabend <john.fastabend@gmail.com>

[...]

>  
> +#ifdef CONFIG_CGROUP_BPF
> +static void bpf_skops_established(struct sock *sk, int bpf_op,
> +				  struct sk_buff *skb)


Small nit because its an RFC anyways.

Should we call this bpf_skops_fullsock(...) instead? Just a suggestion.

> +{
> +	struct bpf_sock_ops_kern sock_ops;
> +
> +	sock_owned_by_me(sk);
> +
> +	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> +	sock_ops.op = bpf_op;
> +	sock_ops.is_fullsock = 1;
> +	sock_ops.sk = sk;
> +	/* skb will be passed to the bpf prog in a later patch. */
> +
> +	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
> +}
Martin KaFai Lau Aug. 20, 2020, 6:52 p.m. UTC | #2
On Mon, Aug 03, 2020 at 06:28:04PM -0700, John Fastabend wrote:
> Martin KaFai Lau wrote:
> > In tcp_init_transfer(), it currently calls the bpf prog to give it a
> > chance to handle the just "ESTABLISHED" event (e.g. do setsockopt
> > on the newly established sk).  Right now, it is done by calling the
> > general purpose tcp_call_bpf().
> > 
> > In the later patch, it also needs to pass the just-received skb which
> > concludes the 3 way handshake. E.g. the SYNACK received at the active side.
> > The bpf prog can then learn some specific header options written by the
> > peer's bpf-prog and potentially do setsockopt on the newly established sk.
> > Thus, instead of reusing the general purpose tcp_call_bpf(), a new function
> > bpf_skops_established() is added to allow passing the "skb" to the bpf prog.
> > The actual skb passing from bpf_skops_established() to the bpf prog
> > will happen together in a later patch which has the necessary bpf pieces.
> > 
> > A "skb" arg is also added to tcp_init_transfer() such that
> > it can then be passed to bpf_skops_established().
> > 
> > Calling the new bpf_skops_established() instead of tcp_call_bpf()
> > should be a noop in this patch.
> 
> Yep, looks like a noop.
> 
> > 
> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> 
> Acked-by: John Fastabend <john.fastabend@gmail.com>
> 
> [...]
> 
> >  
> > +#ifdef CONFIG_CGROUP_BPF
> > +static void bpf_skops_established(struct sock *sk, int bpf_op,
> > +				  struct sk_buff *skb)
> 
> 
> Small nit because its an RFC anyways.
> 
> Should we call this bpf_skops_fullsock(...) instead? Just a suggestion.
I prefer to stay with the current suffix "_established" which names after the
sock_ops->op that it is calling.  I think it is understood that established
sk is a fullsock.
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 895e7aabf136..ae18c2b222a3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -394,7 +394,7 @@  void tcp_metrics_init(void);
 bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
 void tcp_close(struct sock *sk, long timeout);
 void tcp_init_sock(struct sock *sk);
-void tcp_init_transfer(struct sock *sk, int bpf_op);
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
 __poll_t tcp_poll(struct file *file, struct socket *sock,
 		      struct poll_table_struct *wait);
 int tcp_getsockopt(struct sock *sk, int level, int optname,
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 19ad9586c720..d53190302087 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -272,7 +272,7 @@  static struct sock *tcp_fastopen_create_child(struct sock *sk,
 	refcount_set(&req->rsk_refcnt, 2);
 
 	/* Now finish processing the fastopen child socket. */
-	tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
+	tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb);
 
 	tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c1d2fb507701..9a8fb41676bc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -138,6 +138,29 @@  void clean_acked_data_flush(void)
 EXPORT_SYMBOL_GPL(clean_acked_data_flush);
 #endif
 
+#ifdef CONFIG_CGROUP_BPF
+static void bpf_skops_established(struct sock *sk, int bpf_op,
+				  struct sk_buff *skb)
+{
+	struct bpf_sock_ops_kern sock_ops;
+
+	sock_owned_by_me(sk);
+
+	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+	sock_ops.op = bpf_op;
+	sock_ops.is_fullsock = 1;
+	sock_ops.sk = sk;
+	/* skb will be passed to the bpf prog in a later patch. */
+
+	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+}
+#else
+static void bpf_skops_established(struct sock *sk, int bpf_op,
+				  struct sk_buff *skb)
+{
+}
+#endif
+
 static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
 			     unsigned int len)
 {
@@ -5806,7 +5829,7 @@  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_rcv_established);
 
-void tcp_init_transfer(struct sock *sk, int bpf_op)
+void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -5827,7 +5850,7 @@  void tcp_init_transfer(struct sock *sk, int bpf_op)
 		tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
 	tp->snd_cwnd_stamp = tcp_jiffies32;
 
-	tcp_call_bpf(sk, bpf_op, 0, NULL);
+	bpf_skops_established(sk, bpf_op, skb);
 	tcp_init_congestion_control(sk);
 	tcp_init_buffer_space(sk);
 }
@@ -5846,7 +5869,7 @@  void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id(sk, skb);
 	}
 
-	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
+	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, skb);
 
 	/* Prevent spurious tcp_cwnd_restart() on first data
 	 * packet.
@@ -6318,7 +6341,8 @@  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		} else {
 			tcp_try_undo_spurious_syn(sk);
 			tp->retrans_stamp = 0;
-			tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
+			tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,
+					  skb);
 			WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
 		}
 		smp_mb();