[bpf-next,RFC,3/6] bpf: add bpf_tcp_gen_syncookie helper
diff mbox series

Message ID 20190716002650.154729-4-ppenkov.kernel@gmail.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series
  • Introduce a BPF helper to generate SYN cookies
Related show

Commit Message

Petar Penkov July 16, 2019, 12:26 a.m. UTC
From: Petar Penkov <ppenkov@google.com>

This helper function allows BPF programs to try to generate SYN
cookies, given a reference to a listener socket. The function works
from XDP and with an skb context since bpf_skc_lookup_tcp can lookup a
socket in both cases.

Signed-off-by: Petar Penkov <ppenkov@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
---
 include/uapi/linux/bpf.h | 30 ++++++++++++++++++-
 net/core/filter.c        | 62 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 1 deletion(-)

Comments

Eric Dumazet July 16, 2019, 7:59 a.m. UTC | #1
On 7/16/19 2:26 AM, Petar Penkov wrote:
> From: Petar Penkov <ppenkov@google.com>
> 
> This helper function allows BPF programs to try to generate SYN
> cookies, given a reference to a listener socket. The function works
> from XDP and with an skb context since bpf_skc_lookup_tcp can lookup a
> socket in both cases.
> 
...
>  
> +BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
> +	   struct tcphdr *, th, u32, th_len)
> +{
> +#ifdef CONFIG_SYN_COOKIES
> +	u32 cookie;
> +	u16 mss;
> +
> +	if (unlikely(th_len < sizeof(*th)))


You probably need to check that th_len == th->doff * 4

> +		return -EINVAL;
> +
> +	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> +		return -EINVAL;
> +
> +	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> +		return -EINVAL;
> +
> +	if (!th->syn || th->ack || th->fin || th->rst)
> +		return -EINVAL;
> +
> +	switch (sk->sk_family) {

This is strange, because a dual stack listener will have sk->sk_family set to AF_INET6.

What really matters here is if the packet is IPv4 or IPv6.

So you need to look at iph->version instead.

Then look if the socket family allows this packet to be processed
(For example AF_INET6 sockets might prevent IPv4 packets, see sk->sk_ipv6only )

> +	case AF_INET:
> +		if (unlikely(iph_len < sizeof(struct iphdr)))
> +			return -EINVAL;
> +		mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
> +		break;
> +
> +#if IS_BUILTIN(CONFIG_IPV6)
> +	case AF_INET6:
> +		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
> +			return -EINVAL;
> +		mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
> +		break;
> +#endif /* CONFIG_IPV6 */
> +
> +	default:
> +		return -EPROTONOSUPPORT;
> +	}
> +	if (mss <= 0)
> +		return -ENOENT;
> +
Lorenz Bauer July 16, 2019, 11:54 a.m. UTC | #2
On Tue, 16 Jul 2019 at 01:27, Petar Penkov <ppenkov.kernel@gmail.com> wrote:
>
> From: Petar Penkov <ppenkov@google.com>
>
> This helper function allows BPF programs to try to generate SYN
> cookies, given a reference to a listener socket. The function works
> from XDP and with an skb context since bpf_skc_lookup_tcp can lookup a
> socket in both cases.
>
> Signed-off-by: Petar Penkov <ppenkov@google.com>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/uapi/linux/bpf.h | 30 ++++++++++++++++++-
>  net/core/filter.c        | 62 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 91 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6f68438aa4ed..abf4a85c76d1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2713,6 +2713,33 @@ union bpf_attr {
>   *             **-EPERM** if no permission to send the *sig*.
>   *
>   *             **-EAGAIN** if bpf program can try again.
> + *
> + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
> + *     Description
> + *             Try to issue a SYN cookie for the packet with corresponding
> + *             IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
> + *
> + *             *iph* points to the start of the IPv4 or IPv6 header, while
> + *             *iph_len* contains **sizeof**\ (**struct iphdr**) or
> + *             **sizeof**\ (**struct ip6hdr**).
> + *
> + *             *th* points to the start of the TCP header, while *th_len*
> + *             contains **sizeof**\ (**struct tcphdr**).
> + *
> + *     Return
> + *             On success, lower 32 bits hold the generated SYN cookie in
> + *             network order and the higher 32 bits hold the MSS value for that
> + *             cookie.

I prefer returning the cookie vs. directly creating the packet.
Returning a struct would
be nicest, but it doesn't seem worth it to extend the verifier for
that. Taking a pointer
to a result struct would also be good, but we're out of arguments :/

Nit: the MSS is only 16 bit?

> + *
> + *             On failure, the returned value is one of the following:
> + *
> + *             **-EINVAL** SYN cookie cannot be issued due to error
> + *
> + *             **-ENOENT** SYN cookie should not be issued (no SYN flood)
> + *
> + *             **-ENOTSUPP** kernel configuration does not enable SYN cookies
> + *
> + *             **-EPROTONOSUPPORT** *sk* family is not AF_INET/AF_INET6
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -2824,7 +2851,8 @@ union bpf_attr {
>         FN(strtoul),                    \
>         FN(sk_storage_get),             \
>         FN(sk_storage_delete),          \
> -       FN(send_signal),
> +       FN(send_signal),                \
> +       FN(tcp_gen_syncookie),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 47f6386fb17a..109fd1e286f4 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5850,6 +5850,64 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
>         .arg5_type      = ARG_CONST_SIZE,
>  };
>
> +BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
> +          struct tcphdr *, th, u32, th_len)
> +{
> +#ifdef CONFIG_SYN_COOKIES
> +       u32 cookie;
> +       u16 mss;
> +
> +       if (unlikely(th_len < sizeof(*th)))
> +               return -EINVAL;
> +
> +       if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> +               return -EINVAL;
> +
> +       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> +               return -EINVAL;

Should this be ENOENT instead?

> +
> +       if (!th->syn || th->ack || th->fin || th->rst)
> +               return -EINVAL;
> +
> +       switch (sk->sk_family) {
> +       case AF_INET:
> +               if (unlikely(iph_len < sizeof(struct iphdr)))
> +                       return -EINVAL;
> +               mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
> +               break;
> +
> +#if IS_BUILTIN(CONFIG_IPV6)
> +       case AF_INET6:
> +               if (unlikely(iph_len < sizeof(struct ipv6hdr)))
> +                       return -EINVAL;
> +               mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
> +               break;
> +#endif /* CONFIG_IPV6 */
> +
> +       default:
> +               return -EPROTONOSUPPORT;
> +       }
> +       if (mss <= 0)
> +               return -ENOENT;
> +
> +       return htonl(cookie) | ((u64)mss << 32);
> +#else
> +       return -ENOTSUPP;
> +#endif /* CONFIG_SYN_COOKIES */
> +}
> +
> +static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
> +       .func           = bpf_tcp_gen_syncookie,
> +       .gpl_only       = true, /* __cookie_v*_init_sequence() is GPL */
> +       .pkt_access     = true,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
> +       .arg2_type      = ARG_PTR_TO_MEM,
> +       .arg3_type      = ARG_CONST_SIZE,
> +       .arg4_type      = ARG_PTR_TO_MEM,
> +       .arg5_type      = ARG_CONST_SIZE,
> +};
> +
>  #endif /* CONFIG_INET */
>
>  bool bpf_helper_changes_pkt_data(void *func)
> @@ -6135,6 +6193,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_tcp_check_syncookie_proto;
>         case BPF_FUNC_skb_ecn_set_ce:
>                 return &bpf_skb_ecn_set_ce_proto;
> +       case BPF_FUNC_tcp_gen_syncookie:
> +               return &bpf_tcp_gen_syncookie_proto;
>  #endif
>         default:
>                 return bpf_base_func_proto(func_id);
> @@ -6174,6 +6234,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_xdp_skc_lookup_tcp_proto;
>         case BPF_FUNC_tcp_check_syncookie:
>                 return &bpf_tcp_check_syncookie_proto;
> +       case BPF_FUNC_tcp_gen_syncookie:
> +               return &bpf_tcp_gen_syncookie_proto;
>  #endif
>         default:
>                 return bpf_base_func_proto(func_id);
> --
> 2.22.0.510.g264f2c817a-goog
>
Lorenz Bauer July 16, 2019, 11:56 a.m. UTC | #3
On Tue, 16 Jul 2019 at 08:59, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > +             return -EINVAL;
> > +
> > +     if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> > +             return -EINVAL;
> > +
> > +     if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> > +             return -EINVAL;
> > +
> > +     if (!th->syn || th->ack || th->fin || th->rst)
> > +             return -EINVAL;
> > +
> > +     switch (sk->sk_family) {
>
> This is strange, because a dual stack listener will have sk->sk_family set to AF_INET6.
>
> What really matters here is if the packet is IPv4 or IPv6.
>
> So you need to look at iph->version instead.
>
> Then look if the socket family allows this packet to be processed
> (For example AF_INET6 sockets might prevent IPv4 packets, see sk->sk_ipv6only )

Does this apply for (the existing) tcp_check_syn_cookie as well?
Petar Penkov July 17, 2019, 12:27 a.m. UTC | #4
Thank you for the reviews!

On Tue, Jul 16, 2019 at 4:56 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>
> On Tue, 16 Jul 2019 at 08:59, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > +             return -EINVAL;
> > > +
> > > +     if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> > > +             return -EINVAL;
> > > +
> > > +     if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> > > +             return -EINVAL;
> > > +
> > > +     if (!th->syn || th->ack || th->fin || th->rst)
> > > +             return -EINVAL;
> > > +
> > > +     switch (sk->sk_family) {
> >
> > This is strange, because a dual stack listener will have sk->sk_family set to AF_INET6.
> >
> > What really matters here is if the packet is IPv4 or IPv6.
> >
> > So you need to look at iph->version instead.
> >
> > Then look if the socket family allows this packet to be processed
> > (For example AF_INET6 sockets might prevent IPv4 packets, see sk->sk_ipv6only )

This makes a lot of sense, thanks Eric, will update!
>
> Does this apply for (the existing) tcp_check_syn_cookie as well?

I think we will probably have to update the check there, too.
>
> --
> Lorenz Bauer  |  Systems Engineer
> 6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
>
> www.cloudflare.com
Alexei Starovoitov July 17, 2019, 2:26 a.m. UTC | #5
On Tue, Jul 16, 2019 at 09:59:26AM +0200, Eric Dumazet wrote:
> 
> 
> On 7/16/19 2:26 AM, Petar Penkov wrote:
> > From: Petar Penkov <ppenkov@google.com>
> > 
> > This helper function allows BPF programs to try to generate SYN
> > cookies, given a reference to a listener socket. The function works
> > from XDP and with an skb context since bpf_skc_lookup_tcp can lookup a
> > socket in both cases.
> > 
> ...
> >  
> > +BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
> > +	   struct tcphdr *, th, u32, th_len)
> > +{
> > +#ifdef CONFIG_SYN_COOKIES
> > +	u32 cookie;
> > +	u16 mss;
> > +
> > +	if (unlikely(th_len < sizeof(*th)))
> 
> 
> You probably need to check that th_len == th->doff * 4

+1
that is surely necessary for safety.

Considering the limitation of 5 args the api choice is good.
struct bpf_syncookie approach doesn't look natural to me.
And I couldn't come up with any better way to represent this helper.
So let's go with 
return htonl(cookie) | ((u64)mss << 32);
My only question is why htonl ?

Independently of that...
Since we've been hitting this 5 args limit too much,
we need to start thinking how to extend BPF ISA to pass
args 6,7,8 on stack.
Petar Penkov July 17, 2019, 3:33 a.m. UTC | #6
Thank you for your feedback!

On Tue, Jul 16, 2019 at 7:26 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Jul 16, 2019 at 09:59:26AM +0200, Eric Dumazet wrote:
> >
> >
> > On 7/16/19 2:26 AM, Petar Penkov wrote:
> > > From: Petar Penkov <ppenkov@google.com>
> > >
> > > This helper function allows BPF programs to try to generate SYN
> > > cookies, given a reference to a listener socket. The function works
> > > from XDP and with an skb context since bpf_skc_lookup_tcp can lookup a
> > > socket in both cases.
> > >
> > ...
> > >
> > > +BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
> > > +      struct tcphdr *, th, u32, th_len)
> > > +{
> > > +#ifdef CONFIG_SYN_COOKIES
> > > +   u32 cookie;
> > > +   u16 mss;
> > > +
> > > +   if (unlikely(th_len < sizeof(*th)))
> >
> >
> > You probably need to check that th_len == th->doff * 4
>
> +1
> that is surely necessary for safety.

I'll make sure to include this check in the next version.

>
> Considering the limitation of 5 args the api choice is good.
> struct bpf_syncookie approach doesn't look natural to me.
> And I couldn't come up with any better way to represent this helper.
> So let's go with
> return htonl(cookie) | ((u64)mss << 32);
> My only question is why htonl ?

I did it to mirror bpf_tcp_check_syncookie which sees a network order
cookie. That said, it is probably better for the caller to call
bpf_htonl() as they see fit, rather than to do it in the helper. Will
update, thanks.
>
> Independently of that...
> Since we've been hitting this 5 args limit too much,
> we need to start thinking how to extend BPF ISA to pass
> args 6,7,8 on stack.
>

Agreed, having an additional argument would have been helpful for this function.

Patch
diff mbox series

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6f68438aa4ed..abf4a85c76d1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2713,6 +2713,33 @@  union bpf_attr {
  *		**-EPERM** if no permission to send the *sig*.
  *
  *		**-EAGAIN** if bpf program can try again.
+ *
+ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ *	Description
+ *		Try to issue a SYN cookie for the packet with corresponding
+ *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ *		*iph* points to the start of the IPv4 or IPv6 header, while
+ *		*iph_len* contains **sizeof**\ (**struct iphdr**) or
+ *		**sizeof**\ (**struct ip6hdr**).
+ *
+ *		*th* points to the start of the TCP header, while *th_len*
+ *		contains **sizeof**\ (**struct tcphdr**).
+ *
+ *	Return
+ *		On success, lower 32 bits hold the generated SYN cookie in
+ *		network order and the higher 32 bits hold the MSS value for that
+ *		cookie.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** SYN cookie cannot be issued due to error
+ *
+ *		**-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ *		**-ENOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ *		**-EPROTONOSUPPORT** *sk* family is not AF_INET/AF_INET6
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2824,7 +2851,8 @@  union bpf_attr {
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
-	FN(send_signal),
+	FN(send_signal),		\
+	FN(tcp_gen_syncookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 47f6386fb17a..109fd1e286f4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5850,6 +5850,64 @@  static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+	   struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+	u32 cookie;
+	u16 mss;
+
+	if (unlikely(th_len < sizeof(*th)))
+		return -EINVAL;
+
+	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+		return -EINVAL;
+
+	if (!th->syn || th->ack || th->fin || th->rst)
+		return -EINVAL;
+
+	switch (sk->sk_family) {
+	case AF_INET:
+		if (unlikely(iph_len < sizeof(struct iphdr)))
+			return -EINVAL;
+		mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
+		break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+	case AF_INET6:
+		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+		mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
+		break;
+#endif /* CONFIG_IPV6 */
+
+	default:
+		return -EPROTONOSUPPORT;
+	}
+	if (mss <= 0)
+		return -ENOENT;
+
+	return htonl(cookie) | ((u64)mss << 32);
+#else
+	return -ENOTSUPP;
+#endif /* CONFIG_SYN_COOKIES */
+}
+
+static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
+	.func		= bpf_tcp_gen_syncookie,
+	.gpl_only	= true, /* __cookie_v*_init_sequence() is GPL */
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -6135,6 +6193,8 @@  tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_tcp_check_syncookie_proto;
 	case BPF_FUNC_skb_ecn_set_ce:
 		return &bpf_skb_ecn_set_ce_proto;
+	case BPF_FUNC_tcp_gen_syncookie:
+		return &bpf_tcp_gen_syncookie_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
@@ -6174,6 +6234,8 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_skc_lookup_tcp_proto;
 	case BPF_FUNC_tcp_check_syncookie:
 		return &bpf_tcp_check_syncookie_proto;
+	case BPF_FUNC_tcp_gen_syncookie:
+		return &bpf_tcp_gen_syncookie_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);