diff mbox series

[1/3] bpf: add helper to check for a valid SYN cookie

Message ID 20190222095057.9442-2-lmb@cloudflare.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series Allow checking SYN cookies from XDP and tc cls act | expand

Commit Message

Lorenz Bauer Feb. 22, 2019, 9:50 a.m. UTC
Using bpf_sk_lookup_tcp it's possible to ascertain whether a packet belongs
to a known connection. However, there is one corner case: no sockets are
created if SYN cookies are active. This means that the final ACK in the
3WHS is misclassified.

Using the helper, we can look up the listening socket via bpf_sk_lookup_tcp
and then check whether a packet is a valid SYN cookie ACK.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 include/uapi/linux/bpf.h | 18 ++++++++++-
 net/core/filter.c        | 68 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 1 deletion(-)

Comments

Martin KaFai Lau Feb. 23, 2019, 12:44 a.m. UTC | #1
On Fri, Feb 22, 2019 at 09:50:55AM +0000, Lorenz Bauer wrote:
> Using bpf_sk_lookup_tcp it's possible to ascertain whether a packet belongs
> to a known connection. However, there is one corner case: no sockets are
> created if SYN cookies are active. This means that the final ACK in the
> 3WHS is misclassified.
> 
> Using the helper, we can look up the listening socket via bpf_sk_lookup_tcp
> and then check whether a packet is a valid SYN cookie ACK.
> 
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
>  include/uapi/linux/bpf.h | 18 ++++++++++-
>  net/core/filter.c        | 68 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index bcdd2474eee7..bc2af87e9621 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2359,6 +2359,21 @@ union bpf_attr {
>   *	Return
>   *		A **struct bpf_tcp_sock** pointer on success, or NULL in
>   *		case of failure.
> + *
> + * int bpf_sk_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
> + * 	Description
> + * 		Check whether iph and th contain a valid SYN cookie ACK for
> + * 		the listening socket in sk.
> + *
> + * 		iph points to the start of the IPv4 or IPv6 header, while
> + * 		iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
> + *
> + * 		th points to the start of the TCP header, while th_len contains
> + * 		sizeof(struct tcphdr).
> + *
> + * 	Return
> + * 		0 if iph and th are a valid SYN cookie ACK, or a negative error
> + * 		otherwise.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> @@ -2457,7 +2472,8 @@ union bpf_attr {
>  	FN(spin_lock),			\
>  	FN(spin_unlock),		\
>  	FN(sk_fullsock),		\
> -	FN(tcp_sock),
> +	FN(tcp_sock),			\
> +	FN(sk_check_syncookie),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 85749f6ec789..9e68897cc7ed 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5426,6 +5426,70 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
>  	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
>  };
>  
> +BPF_CALL_5(bpf_sk_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
s/bpf_sk_check_syncookie/bpf_tcp_check_syncookie/

> +	   struct tcphdr *, th, u32, th_len)
> +{
> +#if IS_ENABLED(CONFIG_SYN_COOKIES)
nit. "#ifdef CONFIG_SYN_COOKIES" such that it is clear it is a bool kconfig.

> +	u32 cookie;
> +	int ret;
> +
> +	if (unlikely(th_len < sizeof(*th)))
> +		return -EINVAL;
> +
> +	/* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
> +	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
From the test program in patch 3, the "sk" here is obtained from
bpf_sk_lookup_tcp() which does a sk_to_full_sk() before returning.
AFAICT, meaning bpf_sk_lookup_tcp() will return the listening sk
even if there is a request_sock.  Does it make sense to check
syncookie if there is already a request_sock?

> +		return -EINVAL;
> +
> +	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
Should tcp_synq_no_recent_overflow(tp) be checked also?

> +		return -EINVAL;
> +
> +	if (!th->ack || th->rst)
How about th->syn?

> +		return -ENOENT;
> +
> +	cookie = ntohl(th->ack_seq) - 1;
> +
> +	switch (sk->sk_family) {
> +	case AF_INET:
> +		if (unlikely(iph_len < sizeof(struct iphdr)))
> +			return -EINVAL;
> +
> +		ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
> +		break;
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +	case AF_INET6:
> +		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
> +			return -EINVAL;
> +
> +		ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
> +		break;
> +#endif /* CONFIG_IPV6 */
> +
> +	default:
> +		return -EPROTONOSUPPORT;
> +	}
> +
> +	if (ret > 0)
> +		return 0;
> +
> +	return -ENOENT;
> +#else
> +	return -ENOTSUP;
> +#endif
> +}
> +
> +static const struct bpf_func_proto bpf_sk_check_syncookie_proto = {
> +	.func		= bpf_sk_check_syncookie,
> +	.gpl_only	= true,
> +	.pkt_access	= true,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_SOCKET,
I think it should be ARG_PTR_TO_TCP_SOCK

> +	.arg2_type	= ARG_PTR_TO_MEM,
> +	.arg3_type	= ARG_CONST_SIZE,
> +	.arg4_type	= ARG_PTR_TO_MEM,
> +	.arg5_type	= ARG_CONST_SIZE,
> +};
> +
>  #endif /* CONFIG_INET */
kernel test robot Feb. 24, 2019, 11:21 a.m. UTC | #2
Hi Lorenz,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]
[also build test ERROR on next-20190222]
[cannot apply to v5.0-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Lorenz-Bauer/Allow-checking-SYN-cookies-from-XDP-and-tc-cls-act/20190224-180755
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-kexec (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All error/warnings (new ones prefixed by >>):

   net/core/filter.c: In function '____bpf_sk_check_syncookie':
>> net/core/filter.c:5477:10: error: 'ENOTSUP' undeclared (first use in this function); did you mean 'ENOTSUPP'?
     return -ENOTSUP;
             ^~~~~~~
             ENOTSUPP
   net/core/filter.c:5477:10: note: each undeclared identifier is reported only once for each function it appears in
>> net/core/filter.c:5479:1: warning: control reaches end of non-void function [-Wreturn-type]
    }
    ^

vim +5477 net/core/filter.c

  5467	
  5468		default:
  5469			return -EPROTONOSUPPORT;
  5470		}
  5471	
  5472		if (ret > 0)
  5473			return 0;
  5474	
  5475		return -ENOENT;
  5476	#else
> 5477		return -ENOTSUP;
  5478	#endif
> 5479	}
  5480	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Feb. 24, 2019, 11:37 a.m. UTC | #3
Hi Lorenz,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]
[also build test ERROR on next-20190222]
[cannot apply to v5.0-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Lorenz-Bauer/Allow-checking-SYN-cookies-from-XDP-and-tc-cls-act/20190224-180755
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: m68k-sun3_defconfig (attached as .config)
compiler: m68k-linux-gnu-gcc (Debian 8.2.0-11) 8.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=8.2.0 make.cross ARCH=m68k 

All errors (new ones prefixed by >>):

   m68k-linux-gnu-ld: drivers/rtc/proc.o: in function `is_rtc_hctosys.isra.0':
   proc.c:(.text+0x178): undefined reference to `strcmp'
   m68k-linux-gnu-ld: net/core/filter.o: in function `bpf_sk_check_syncookie':
>> filter.c:(.text+0x5a58): undefined reference to `__cookie_v6_check'

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Lorenz Bauer Feb. 25, 2019, 6:26 p.m. UTC | #4
On Sat, 23 Feb 2019 at 00:44, Martin Lau <kafai@fb.com> wrote:
>
> On Fri, Feb 22, 2019 at 09:50:55AM +0000, Lorenz Bauer wrote:
> > Using bpf_sk_lookup_tcp it's possible to ascertain whether a packet belongs
> > to a known connection. However, there is one corner case: no sockets are
> > created if SYN cookies are active. This means that the final ACK in the
> > 3WHS is misclassified.
> >
> > Using the helper, we can look up the listening socket via bpf_sk_lookup_tcp
> > and then check whether a packet is a valid SYN cookie ACK.
> >
> > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> > ---
> >  include/uapi/linux/bpf.h | 18 ++++++++++-
> >  net/core/filter.c        | 68 ++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 85 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index bcdd2474eee7..bc2af87e9621 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2359,6 +2359,21 @@ union bpf_attr {
> >   *   Return
> >   *           A **struct bpf_tcp_sock** pointer on success, or NULL in
> >   *           case of failure.
> > + *
> > + * int bpf_sk_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
> > + *   Description
> > + *           Check whether iph and th contain a valid SYN cookie ACK for
> > + *           the listening socket in sk.
> > + *
> > + *           iph points to the start of the IPv4 or IPv6 header, while
> > + *           iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
> > + *
> > + *           th points to the start of the TCP header, while th_len contains
> > + *           sizeof(struct tcphdr).
> > + *
> > + *   Return
> > + *           0 if iph and th are a valid SYN cookie ACK, or a negative error
> > + *           otherwise.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)                \
> >       FN(unspec),                     \
> > @@ -2457,7 +2472,8 @@ union bpf_attr {
> >       FN(spin_lock),                  \
> >       FN(spin_unlock),                \
> >       FN(sk_fullsock),                \
> > -     FN(tcp_sock),
> > +     FN(tcp_sock),                   \
> > +     FN(sk_check_syncookie),
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> >   * function eBPF program intends to call
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 85749f6ec789..9e68897cc7ed 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -5426,6 +5426,70 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
> >       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
> >  };
> >
> > +BPF_CALL_5(bpf_sk_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
> s/bpf_sk_check_syncookie/bpf_tcp_check_syncookie/>
>
> > +        struct tcphdr *, th, u32, th_len)
> > +{
> > +#if IS_ENABLED(CONFIG_SYN_COOKIES)
> nit. "#ifdef CONFIG_SYN_COOKIES" such that it is clear it is a bool kconfig.
>
> > +     u32 cookie;
> > +     int ret;
> > +
> > +     if (unlikely(th_len < sizeof(*th)))
> > +             return -EINVAL;
> > +
> > +     /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
> > +     if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> From the test program in patch 3, the "sk" here is obtained from
> bpf_sk_lookup_tcp() which does a sk_to_full_sk() before returning.
> AFAICT, meaning bpf_sk_lookup_tcp() will return the listening sk
> even if there is a request_sock.  Does it make sense to check
> syncookie if there is already a request_sock?

No, that doesn't make a lot of sense. I hadn't realised that
sk_lookup_tcp only returns full sockets.
This means we need a way to detect that there is a request sock for a
given tuple.

* adding a reqsk_exists(tuple) helper means we have to pay the lookup cost twice
* drop the sk argument and do the necessary lookups in the helper
itself, but that also
  wastes a call to __inet_lookup_listener
* skip sk_to_full_sk() in a helper and return RET_PTR_TO_SOCK_COMMON,
  but that violates a bunch of assumptions (e.g. calling bpf_sk_release on them)

For context: ultimately we want use this to answer the question: does
this (encapsulated)
packet contain a payload destined to a local socket? Amongst the edge
cases we need to
handle are ICMP Packet Too Big messages and SYN cookies. A solution
would be to hide
all this in an "uber" helper that takes pointers to the L3 / L4
headers and returns a verdict,
but that seems a bit gross.

>
> > +             return -EINVAL;
> > +
> > +     if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> Should tcp_synq_no_recent_overflow(tp) be checked also?
>

Yes, not sure how that slipped out.

> > +             return -EINVAL;
> > +
> > +     if (!th->ack || th->rst)
> How about th->syn?
>

Yes, I missed the fact that the callers in tcp_ipv{4,6}.c check this.

> > +             return -ENOENT;
> > +
> > +     cookie = ntohl(th->ack_seq) - 1;
> > +
> > +     switch (sk->sk_family) {
> > +     case AF_INET:
> > +             if (unlikely(iph_len < sizeof(struct iphdr)))
> > +                     return -EINVAL;
> > +
> > +             ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
> > +             break;
> > +
> > +#if IS_ENABLED(CONFIG_IPV6)
> > +     case AF_INET6:
> > +             if (unlikely(iph_len < sizeof(struct ipv6hdr)))
> > +                     return -EINVAL;
> > +
> > +             ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
> > +             break;
> > +#endif /* CONFIG_IPV6 */
> > +
> > +     default:
> > +             return -EPROTONOSUPPORT;
> > +     }
> > +
> > +     if (ret > 0)
> > +             return 0;
> > +
> > +     return -ENOENT;
> > +#else
> > +     return -ENOTSUP;
> > +#endif
> > +}
> > +
> > +static const struct bpf_func_proto bpf_sk_check_syncookie_proto = {
> > +     .func           = bpf_sk_check_syncookie,
> > +     .gpl_only       = true,
> > +     .pkt_access     = true,
> > +     .ret_type       = RET_INTEGER,
> > +     .arg1_type      = ARG_PTR_TO_SOCKET,
> I think it should be ARG_PTR_TO_TCP_SOCK
>
> > +     .arg2_type      = ARG_PTR_TO_MEM,
> > +     .arg3_type      = ARG_CONST_SIZE,
> > +     .arg4_type      = ARG_PTR_TO_MEM,
> > +     .arg5_type      = ARG_CONST_SIZE,
> > +};
> > +
> >  #endif /* CONFIG_INET */
Martin KaFai Lau Feb. 26, 2019, 5:37 a.m. UTC | #5
On Mon, Feb 25, 2019 at 06:26:42PM +0000, Lorenz Bauer wrote:
> On Sat, 23 Feb 2019 at 00:44, Martin Lau <kafai@fb.com> wrote:
> >
> > On Fri, Feb 22, 2019 at 09:50:55AM +0000, Lorenz Bauer wrote:
> > > Using bpf_sk_lookup_tcp it's possible to ascertain whether a packet belongs
> > > to a known connection. However, there is one corner case: no sockets are
> > > created if SYN cookies are active. This means that the final ACK in the
> > > 3WHS is misclassified.
> > >
> > > Using the helper, we can look up the listening socket via bpf_sk_lookup_tcp
> > > and then check whether a packet is a valid SYN cookie ACK.
> > >
> > > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> > > ---
> > >  include/uapi/linux/bpf.h | 18 ++++++++++-
> > >  net/core/filter.c        | 68 ++++++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 85 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index bcdd2474eee7..bc2af87e9621 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -2359,6 +2359,21 @@ union bpf_attr {
> > >   *   Return
> > >   *           A **struct bpf_tcp_sock** pointer on success, or NULL in
> > >   *           case of failure.
> > > + *
> > > + * int bpf_sk_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
> > > + *   Description
> > > + *           Check whether iph and th contain a valid SYN cookie ACK for
> > > + *           the listening socket in sk.
> > > + *
> > > + *           iph points to the start of the IPv4 or IPv6 header, while
> > > + *           iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
> > > + *
> > > + *           th points to the start of the TCP header, while th_len contains
> > > + *           sizeof(struct tcphdr).
> > > + *
> > > + *   Return
> > > + *           0 if iph and th are a valid SYN cookie ACK, or a negative error
> > > + *           otherwise.
> > >   */
> > >  #define __BPF_FUNC_MAPPER(FN)                \
> > >       FN(unspec),                     \
> > > @@ -2457,7 +2472,8 @@ union bpf_attr {
> > >       FN(spin_lock),                  \
> > >       FN(spin_unlock),                \
> > >       FN(sk_fullsock),                \
> > > -     FN(tcp_sock),
> > > +     FN(tcp_sock),                   \
> > > +     FN(sk_check_syncookie),
> > >
> > >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > >   * function eBPF program intends to call
> > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > index 85749f6ec789..9e68897cc7ed 100644
> > > --- a/net/core/filter.c
> > > +++ b/net/core/filter.c
> > > @@ -5426,6 +5426,70 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
> > >       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
> > >  };
> > >
> > > +BPF_CALL_5(bpf_sk_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
> > s/bpf_sk_check_syncookie/bpf_tcp_check_syncookie/>
> >
> > > +        struct tcphdr *, th, u32, th_len)
> > > +{
> > > +#if IS_ENABLED(CONFIG_SYN_COOKIES)
> > nit. "#ifdef CONFIG_SYN_COOKIES" such that it is clear it is a bool kconfig.
> >
> > > +     u32 cookie;
> > > +     int ret;
> > > +
> > > +     if (unlikely(th_len < sizeof(*th)))
> > > +             return -EINVAL;
> > > +
> > > +     /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
> > > +     if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> > From the test program in patch 3, the "sk" here is obtained from
> > bpf_sk_lookup_tcp() which does a sk_to_full_sk() before returning.
> > AFAICT, meaning bpf_sk_lookup_tcp() will return the listening sk
> > even if there is a request_sock.  Does it make sense to check
> > syncookie if there is already a request_sock?
> 
> No, that doesn't make a lot of sense. I hadn't realised that
> sk_lookup_tcp only returns full sockets.
> This means we need a way to detect that there is a request sock for a
> given tuple.
> 
> * adding a reqsk_exists(tuple) helper means we have to pay the lookup cost twice
> * drop the sk argument and do the necessary lookups in the helper
> itself, but that also
>   wastes a call to __inet_lookup_listener
> * skip sk_to_full_sk() in a helper and return RET_PTR_TO_SOCK_COMMON,
>   but that violates a bunch of assumptions (e.g. calling bpf_sk_release on them)
How about creating a new lookup helper, bpf_sk"c"_lookup_tcp,
that does not call sk_to_full_sk() before returning.
Its ".ret_type" will be RET_PTR_TO_SOCK_COMMON_OR_NULL which its
reference(-counting) state has to be tracked in the verifier also.
Mainly in check_helper_call(), iirc.

The bpf_prog can then check bpf_sock->state for TCP_LISTEN,
call bpf_tcp_sock() to get the TCP listener sock and pass to
the bpf_tcp_check_syncookie()

> 
> For context: ultimately we want use this to answer the question: does
> this (encapsulated)
> packet contain a payload destined to a local socket? Amongst the edge
> cases we need to
> handle are ICMP Packet Too Big messages and SYN cookies. A solution
> would be to hide
> all this in an "uber" helper that takes pointers to the L3 / L4
> headers and returns a verdict,
> but that seems a bit gross.
Please include this use case in the commit message.
It is useful.

> 
> >
> > > +             return -EINVAL;
> > > +
> > > +     if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> > Should tcp_synq_no_recent_overflow(tp) be checked also?
> >
> 
> Yes, not sure how that slipped out.
> 
> > > +             return -EINVAL;
> > > +
> > > +     if (!th->ack || th->rst)
> > How about th->syn?
> >
> 
> Yes, I missed the fact that the callers in tcp_ipv{4,6}.c check this.
> 
> > > +             return -ENOENT;
> > > +
> > > +     cookie = ntohl(th->ack_seq) - 1;
> > > +
> > > +     switch (sk->sk_family) {
> > > +     case AF_INET:
> > > +             if (unlikely(iph_len < sizeof(struct iphdr)))
> > > +                     return -EINVAL;
> > > +
> > > +             ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
> > > +             break;
> > > +
> > > +#if IS_ENABLED(CONFIG_IPV6)
> > > +     case AF_INET6:
> > > +             if (unlikely(iph_len < sizeof(struct ipv6hdr)))
> > > +                     return -EINVAL;
> > > +
> > > +             ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
> > > +             break;
> > > +#endif /* CONFIG_IPV6 */
> > > +
> > > +     default:
> > > +             return -EPROTONOSUPPORT;
> > > +     }
> > > +
> > > +     if (ret > 0)
> > > +             return 0;
> > > +
> > > +     return -ENOENT;
> > > +#else
> > > +     return -ENOTSUP;
> > > +#endif
> > > +}
> > > +
> > > +static const struct bpf_func_proto bpf_sk_check_syncookie_proto = {
> > > +     .func           = bpf_sk_check_syncookie,
> > > +     .gpl_only       = true,
> > > +     .pkt_access     = true,
> > > +     .ret_type       = RET_INTEGER,
> > > +     .arg1_type      = ARG_PTR_TO_SOCKET,
> > I think it should be ARG_PTR_TO_TCP_SOCK
> >
> > > +     .arg2_type      = ARG_PTR_TO_MEM,
> > > +     .arg3_type      = ARG_CONST_SIZE,
> > > +     .arg4_type      = ARG_PTR_TO_MEM,
> > > +     .arg5_type      = ARG_CONST_SIZE,
> > > +};
> > > +
> > >  #endif /* CONFIG_INET */
> 
> 
> 
> -- 
> Lorenz Bauer  |  Systems Engineer
> 25 Lavington St., London SE1 0NZ
> 
> https://urldefense.proofpoint.com/v2/url?u=http-3A__www.cloudflare.com&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=VQnoQ7LvghIj0gVEaiQSUw&m=xhDwvX3iD-mbqSrx-L8XQNaZiYFZzMWNo_2Y38Z9j34&s=I4Ag3HflabFppFv7UtMp8WnMVSqCDW0W28ziWIvuwDE&e=
Lorenz Bauer Feb. 28, 2019, 3:11 p.m. UTC | #6
On Tue, 26 Feb 2019 at 05:38, Martin Lau <kafai@fb.com> wrote:
>
> On Mon, Feb 25, 2019 at 06:26:42PM +0000, Lorenz Bauer wrote:
> > On Sat, 23 Feb 2019 at 00:44, Martin Lau <kafai@fb.com> wrote:
> > >
> > > On Fri, Feb 22, 2019 at 09:50:55AM +0000, Lorenz Bauer wrote:
> > > > Using bpf_sk_lookup_tcp it's possible to ascertain whether a packet belongs
> > > > to a known connection. However, there is one corner case: no sockets are
> > > > created if SYN cookies are active. This means that the final ACK in the
> > > > 3WHS is misclassified.
> > > >
> > > > Using the helper, we can look up the listening socket via bpf_sk_lookup_tcp
> > > > and then check whether a packet is a valid SYN cookie ACK.
> > > >
> > > > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> > > > ---
> > > >  include/uapi/linux/bpf.h | 18 ++++++++++-
> > > >  net/core/filter.c        | 68 ++++++++++++++++++++++++++++++++++++++++
> > > >  2 files changed, 85 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > index bcdd2474eee7..bc2af87e9621 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -2359,6 +2359,21 @@ union bpf_attr {
> > > >   *   Return
> > > >   *           A **struct bpf_tcp_sock** pointer on success, or NULL in
> > > >   *           case of failure.
> > > > + *
> > > > + * int bpf_sk_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
> > > > + *   Description
> > > > + *           Check whether iph and th contain a valid SYN cookie ACK for
> > > > + *           the listening socket in sk.
> > > > + *
> > > > + *           iph points to the start of the IPv4 or IPv6 header, while
> > > > + *           iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
> > > > + *
> > > > + *           th points to the start of the TCP header, while th_len contains
> > > > + *           sizeof(struct tcphdr).
> > > > + *
> > > > + *   Return
> > > > + *           0 if iph and th are a valid SYN cookie ACK, or a negative error
> > > > + *           otherwise.
> > > >   */
> > > >  #define __BPF_FUNC_MAPPER(FN)                \
> > > >       FN(unspec),                     \
> > > > @@ -2457,7 +2472,8 @@ union bpf_attr {
> > > >       FN(spin_lock),                  \
> > > >       FN(spin_unlock),                \
> > > >       FN(sk_fullsock),                \
> > > > -     FN(tcp_sock),
> > > > +     FN(tcp_sock),                   \
> > > > +     FN(sk_check_syncookie),
> > > >
> > > >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > > >   * function eBPF program intends to call
> > > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > > index 85749f6ec789..9e68897cc7ed 100644
> > > > --- a/net/core/filter.c
> > > > +++ b/net/core/filter.c
> > > > @@ -5426,6 +5426,70 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
> > > >       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
> > > >  };
> > > >
> > > > +BPF_CALL_5(bpf_sk_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
> > > s/bpf_sk_check_syncookie/bpf_tcp_check_syncookie/>
> > >
> > > > +        struct tcphdr *, th, u32, th_len)
> > > > +{
> > > > +#if IS_ENABLED(CONFIG_SYN_COOKIES)
> > > nit. "#ifdef CONFIG_SYN_COOKIES" such that it is clear it is a bool kconfig.
> > >
> > > > +     u32 cookie;
> > > > +     int ret;
> > > > +
> > > > +     if (unlikely(th_len < sizeof(*th)))
> > > > +             return -EINVAL;
> > > > +
> > > > +     /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
> > > > +     if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> > > From the test program in patch 3, the "sk" here is obtained from
> > > bpf_sk_lookup_tcp() which does a sk_to_full_sk() before returning.
> > > AFAICT, meaning bpf_sk_lookup_tcp() will return the listening sk
> > > even if there is a request_sock.  Does it make sense to check
> > > syncookie if there is already a request_sock?
> >
> > No, that doesn't make a lot of sense. I hadn't realised that
> > sk_lookup_tcp only returns full sockets.
> > This means we need a way to detect that there is a request sock for a
> > given tuple.
> >
> > * adding a reqsk_exists(tuple) helper means we have to pay the lookup cost twice
> > * drop the sk argument and do the necessary lookups in the helper
> > itself, but that also
> >   wastes a call to __inet_lookup_listener
> > * skip sk_to_full_sk() in a helper and return RET_PTR_TO_SOCK_COMMON,
> >   but that violates a bunch of assumptions (e.g. calling bpf_sk_release on them)
> How about creating a new lookup helper, bpf_sk"c"_lookup_tcp,
> that does not call sk_to_full_sk() before returning.
> Its ".ret_type" will be RET_PTR_TO_SOCK_COMMON_OR_NULL which its
> reference(-counting) state has to be tracked in the verifier also.
> Mainly in check_helper_call(), iirc.
>
> The bpf_prog can then check bpf_sock->state for TCP_LISTEN,
> call bpf_tcp_sock() to get the TCP listener sock and pass to
> the bpf_tcp_check_syncookie()

I've started working on this, and I've hit a snag with the reference
tracking behaviour
of bpf_tcp_sock. From what I can tell, the assumption is that a PTR_TO_TCP_SOCK
doesn't need reference tracking, because its either skb->sk or a TCP listener.
In the former case, the socket is refcounted via the sk_buff, in the
latter we don't need
to worry since the eBPF is called with the RCU read lock held.

However, non-listening sockets returned by bpf_sk_lookup_tcp, can be
freed before the
end of the eBPF program. Doing bpf_sk_lookup_tcp, bpf_tcp_sock,
bpf_sk_release allows
eBPF to gain a (read-only) reference to a freed socket. I've attached
a patch with a testcase
which illustrates this issue.

Is this the intended behaviour? If not, maybe it would be the easiest
to make bpf_tcp_sock
increase the refcount if !SOCK_RCU_FREE and require a corresponding
bpf_sk_release?
That would simplify my work to add RET_PTR_TO_SOCK_COMMON as wel..

>
> >
> > For context: ultimately we want use this to answer the question: does
> > this (encapsulated)
> > packet contain a payload destined to a local socket? Amongst the edge
> > cases we need to
> > handle are ICMP Packet Too Big messages and SYN cookies. A solution
> > would be to hide
> > all this in an "uber" helper that takes pointers to the L3 / L4
> > headers and returns a verdict,
> > but that seems a bit gross.
> Please include this use case in the commit message.
> It is useful.
>
> >
> > >
> > > > +             return -EINVAL;
> > > > +
> > > > +     if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> > > Should tcp_synq_no_recent_overflow(tp) be checked also?
> > >
> >
> > Yes, not sure how that slipped out.
> >
> > > > +             return -EINVAL;
> > > > +
> > > > +     if (!th->ack || th->rst)
> > > How about th->syn?
> > >
> >
> > Yes, I missed the fact that the callers in tcp_ipv{4,6}.c check this.
> >
> > > > +             return -ENOENT;
> > > > +
> > > > +     cookie = ntohl(th->ack_seq) - 1;
> > > > +
> > > > +     switch (sk->sk_family) {
> > > > +     case AF_INET:
> > > > +             if (unlikely(iph_len < sizeof(struct iphdr)))
> > > > +                     return -EINVAL;
> > > > +
> > > > +             ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
> > > > +             break;
> > > > +
> > > > +#if IS_ENABLED(CONFIG_IPV6)
> > > > +     case AF_INET6:
> > > > +             if (unlikely(iph_len < sizeof(struct ipv6hdr)))
> > > > +                     return -EINVAL;
> > > > +
> > > > +             ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
> > > > +             break;
> > > > +#endif /* CONFIG_IPV6 */
> > > > +
> > > > +     default:
> > > > +             return -EPROTONOSUPPORT;
> > > > +     }
> > > > +
> > > > +     if (ret > 0)
> > > > +             return 0;
> > > > +
> > > > +     return -ENOENT;
> > > > +#else
> > > > +     return -ENOTSUP;
> > > > +#endif
> > > > +}
> > > > +
> > > > +static const struct bpf_func_proto bpf_sk_check_syncookie_proto = {
> > > > +     .func           = bpf_sk_check_syncookie,
> > > > +     .gpl_only       = true,
> > > > +     .pkt_access     = true,
> > > > +     .ret_type       = RET_INTEGER,
> > > > +     .arg1_type      = ARG_PTR_TO_SOCKET,
> > > I think it should be ARG_PTR_TO_TCP_SOCK
> > >
> > > > +     .arg2_type      = ARG_PTR_TO_MEM,
> > > > +     .arg3_type      = ARG_CONST_SIZE,
> > > > +     .arg4_type      = ARG_PTR_TO_MEM,
> > > > +     .arg5_type      = ARG_CONST_SIZE,
> > > > +};
> > > > +
> > > >  #endif /* CONFIG_INET */
> >
> >
> >
> > --
> > Lorenz Bauer  |  Systems Engineer
> > 25 Lavington St., London SE1 0NZ
> >
> > https://urldefense.proofpoint.com/v2/url?u=http-3A__www.cloudflare.com&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=VQnoQ7LvghIj0gVEaiQSUw&m=xhDwvX3iD-mbqSrx-L8XQNaZiYFZzMWNo_2Y38Z9j34&s=I4Ag3HflabFppFv7UtMp8WnMVSqCDW0W28ziWIvuwDE&e=

---
 tools/testing/selftests/bpf/verifier/sock.c | 23 +++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/testing/selftests/bpf/verifier/sock.c
b/tools/testing/selftests/bpf/verifier/sock.c
index 0ddfdf76aba5..3307cca6bdd5 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -382,3 +382,26 @@
        .result = REJECT,
        .errstr = "type=tcp_sock expected=sock",
 },
+{
+       "use bpf_tcp_sock after bpf_sk_release",
+       .insns = {
+       BPF_SK_LOOKUP,
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct
bpf_tcp_sock, snd_cwnd)),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "bogus",
+},
--
2.19.1
Martin KaFai Lau Feb. 28, 2019, 5:37 p.m. UTC | #7
On Thu, Feb 28, 2019 at 03:11:09PM +0000, Lorenz Bauer wrote:

> I've started working on this, and I've hit a snag with the reference
> tracking behaviour
> of bpf_tcp_sock. From what I can tell, the assumption is that a PTR_TO_TCP_SOCK
> doesn't need reference tracking, because its either skb->sk or a TCP listener.
> In the former case, the socket is refcounted via the sk_buff, in the
> latter we don't need
> to worry since the eBPF is called with the RCU read lock held.
> 
> However, non-listening sockets returned by bpf_sk_lookup_tcp, can be
> freed before the
> end of the eBPF program. Doing bpf_sk_lookup_tcp, bpf_tcp_sock,
> bpf_sk_release allows
> eBPF to gain a (read-only) reference to a freed socket. I've attached
> a patch with a testcase
> which illustrates this issue.
> 
> Is this the intended behaviour? If not, maybe it would be the easiest
> to make bpf_tcp_sock
> increase the refcount if !SOCK_RCU_FREE and require a corresponding
> bpf_sk_release?
Increase the refcount at runtime may be a too big hammer for this.
Let me think if it can be resolved within the verifier.

> That would simplify my work to add RET_PTR_TO_SOCK_COMMON as wel..
> 
> ---
>  tools/testing/selftests/bpf/verifier/sock.c | 23 +++++++++++++++++++++
>  1 file changed, 23 insertions(+)
> 
> diff --git a/tools/testing/selftests/bpf/verifier/sock.c
> b/tools/testing/selftests/bpf/verifier/sock.c
> index 0ddfdf76aba5..3307cca6bdd5 100644
> --- a/tools/testing/selftests/bpf/verifier/sock.c
> +++ b/tools/testing/selftests/bpf/verifier/sock.c
> @@ -382,3 +382,26 @@
>         .result = REJECT,
>         .errstr = "type=tcp_sock expected=sock",
>  },
> +{
> +       "use bpf_tcp_sock after bpf_sk_release",
> +       .insns = {
> +       BPF_SK_LOOKUP,
> +       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
> +       BPF_EXIT_INSN(),
> +       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
> +       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
> +       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
> +       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
> +       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
> +       BPF_EMIT_CALL(BPF_FUNC_sk_release),
> +       BPF_EXIT_INSN(),
> +       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
> +       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
> +       BPF_EMIT_CALL(BPF_FUNC_sk_release),
> +       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct
> bpf_tcp_sock, snd_cwnd)),
> +       BPF_EXIT_INSN(),
> +       },
> +       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
> +       .result = REJECT,
> +       .errstr = "bogus",
> +},
> --
> 2.19.1
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bcdd2474eee7..bc2af87e9621 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2359,6 +2359,21 @@  union bpf_attr {
  *	Return
  *		A **struct bpf_tcp_sock** pointer on success, or NULL in
  *		case of failure.
+ *
+ * int bpf_sk_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * 	Description
+ * 		Check whether iph and th contain a valid SYN cookie ACK for
+ * 		the listening socket in sk.
+ *
+ * 		iph points to the start of the IPv4 or IPv6 header, while
+ * 		iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ * 		th points to the start of the TCP header, while th_len contains
+ * 		sizeof(struct tcphdr).
+ *
+ * 	Return
+ * 		0 if iph and th are a valid SYN cookie ACK, or a negative error
+ * 		otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2457,7 +2472,8 @@  union bpf_attr {
 	FN(spin_lock),			\
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
-	FN(tcp_sock),
+	FN(tcp_sock),			\
+	FN(sk_check_syncookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 85749f6ec789..9e68897cc7ed 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5426,6 +5426,70 @@  static const struct bpf_func_proto bpf_tcp_sock_proto = {
 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
 };
 
+BPF_CALL_5(bpf_sk_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+	   struct tcphdr *, th, u32, th_len)
+{
+#if IS_ENABLED(CONFIG_SYN_COOKIES)
+	u32 cookie;
+	int ret;
+
+	if (unlikely(th_len < sizeof(*th)))
+		return -EINVAL;
+
+	/* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
+	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+		return -EINVAL;
+
+	if (!th->ack || th->rst)
+		return -ENOENT;
+
+	cookie = ntohl(th->ack_seq) - 1;
+
+	switch (sk->sk_family) {
+	case AF_INET:
+		if (unlikely(iph_len < sizeof(struct iphdr)))
+			return -EINVAL;
+
+		ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+		break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+
+		ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+		break;
+#endif /* CONFIG_IPV6 */
+
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	if (ret > 0)
+		return 0;
+
+	return -ENOENT;
+#else
+	return -ENOTSUP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sk_check_syncookie_proto = {
+	.func		= bpf_sk_check_syncookie,
+	.gpl_only	= true,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_SOCKET,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5678,6 +5742,8 @@  tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_release_proto;
 	case BPF_FUNC_tcp_sock:
 		return &bpf_tcp_sock_proto;
+	case BPF_FUNC_sk_check_syncookie:
+		return &bpf_sk_check_syncookie_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
@@ -5713,6 +5779,8 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_sk_lookup_tcp_proto;
 	case BPF_FUNC_sk_release:
 		return &bpf_sk_release_proto;
+	case BPF_FUNC_sk_check_syncookie:
+		return &bpf_sk_check_syncookie_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);