diff mbox series

[bpf,v2,1/2] bpf: sockmap, fix crash when ipv6 sock is added

Message ID 20180608150639.15153.91342.stgit@john-Precision-Tower-5810
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf, sockmap IPv6/TCP state fixes | expand

Commit Message

John Fastabend June 8, 2018, 3:06 p.m. UTC
This fixes a crash where we assign tcp_prot to IPv6 sockets instead
of tcpv6_prot.

Previously we overwrote the sk->prot field with tcp_prot even in the
AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
are used. Further, only allow ESTABLISHED connections to join the
map per note in TLS ULP,

   /* The TLS ulp is currently supported only for TCP sockets
    * in ESTABLISHED state.
    * Supporting sockets in LISTEN state will require us
    * to modify the accept implementation to clone rather then
    * share the ulp context.
    */

Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
crashing case here.

Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c |   24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

Comments

Daniel Borkmann June 11, 2018, 11:14 p.m. UTC | #1
Hi John,

On 06/08/2018 05:06 PM, John Fastabend wrote:
> This fixes a crash where we assign tcp_prot to IPv6 sockets instead
> of tcpv6_prot.
> 
> Previously we overwrote the sk->prot field with tcp_prot even in the
> AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
> are used. Further, only allow ESTABLISHED connections to join the
> map per note in TLS ULP,
> 
>    /* The TLS ulp is currently supported only for TCP sockets
>     * in ESTABLISHED state.
>     * Supporting sockets in LISTEN state will require us
>     * to modify the accept implementation to clone rather then
>     * share the ulp context.
>     */
> 
> Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
> 'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
> crashing case here.
> 
> Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
> Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> Signed-off-by: Wei Wang <weiwan@google.com>
[...]

Still one question for some more clarification below that popped up while
review:

> @@ -162,6 +164,8 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
>  }
>  
>  static struct proto tcp_bpf_proto;
> +static struct proto tcpv6_bpf_proto;

These two are global, w/o locking.

>  static int bpf_tcp_init(struct sock *sk)
>  {
>  	struct smap_psock *psock;
> @@ -181,14 +185,30 @@ static int bpf_tcp_init(struct sock *sk)
>  	psock->save_close = sk->sk_prot->close;
>  	psock->sk_proto = sk->sk_prot;
>  
> +	if (sk->sk_family == AF_INET6) {
> +		tcpv6_bpf_proto = *sk->sk_prot;
> +		tcpv6_bpf_proto.close = bpf_tcp_close;
> +	} else {
> +		tcp_bpf_proto = *sk->sk_prot;
> +		tcp_bpf_proto.close = bpf_tcp_close;
> +	}

And each time we add a BPF ULP to a v4/v6 socket, we override tcp{,v6}_bpf_proto
from scratch.

>  	if (psock->bpf_tx_msg) {
> +		tcpv6_bpf_proto.sendmsg = bpf_tcp_sendmsg;
> +		tcpv6_bpf_proto.sendpage = bpf_tcp_sendpage;
> +		tcpv6_bpf_proto.recvmsg = bpf_tcp_recvmsg;
> +		tcpv6_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
>  		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
>  		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
>  		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
>  		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
>  	}
>  
> -	sk->sk_prot = &tcp_bpf_proto;
> +	if (sk->sk_family == AF_INET6)
> +		sk->sk_prot = &tcpv6_bpf_proto;
> +	else
> +		sk->sk_prot = &tcp_bpf_proto;

Where every active socket would be affected from it as well. Isn't that
generally racy? E.g. existing ones where tcpv6_bpf_proto.sendmsg points
to bpf_tcp_sendmsg would get overridden with earlier assignment on the
tcpv6_bpf_proto = *sk->sk_prot during their lifetime after bpf_tcp_init().

In the kTLS case, the v4 protos are built up in module init via tls_register()
and never change from there. The v6 ones are only reloaded when their addr
changes e.g. module reload would come to mind, which should only be possible
once no active v6 socket is present. What speaks against adapting similar
scheme resp. what am I missing that the above would work? (Would be nice if
there was some discussion in commit log related to it on 'why' this approach
was done differently.)

Thanks,
Daniel

>  	rcu_read_unlock();
>  	return 0;
>  }
> @@ -1111,8 +1131,6 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
>  
>  static int bpf_tcp_ulp_register(void)
>  {
> -	tcp_bpf_proto = tcp_prot;
> -	tcp_bpf_proto.close = bpf_tcp_close;
>  	/* Once BPF TX ULP is registered it is never unregistered. It
>  	 * will be in the ULP list for the lifetime of the system. Doing
>  	 * duplicate registers is not a problem.
>
John Fastabend June 12, 2018, 1:57 p.m. UTC | #2
On 06/11/2018 04:14 PM, Daniel Borkmann wrote:
> Hi John,
> 
> On 06/08/2018 05:06 PM, John Fastabend wrote:
>> This fixes a crash where we assign tcp_prot to IPv6 sockets instead
>> of tcpv6_prot.
>>
>> Previously we overwrote the sk->prot field with tcp_prot even in the
>> AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
>> are used. Further, only allow ESTABLISHED connections to join the
>> map per note in TLS ULP,
>>
>>    /* The TLS ulp is currently supported only for TCP sockets
>>     * in ESTABLISHED state.
>>     * Supporting sockets in LISTEN state will require us
>>     * to modify the accept implementation to clone rather then
>>     * share the ulp context.
>>     */
>>
>> Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
>> 'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
>> crashing case here.
>>
>> Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
>> Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
>> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>> Signed-off-by: Wei Wang <weiwan@google.com>
> [...]
> 
> Still one question for some more clarification below that popped up while
> review:
> 
>> @@ -162,6 +164,8 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
>>  }
>>  
>>  static struct proto tcp_bpf_proto;
>> +static struct proto tcpv6_bpf_proto;
> 
> These two are global, w/o locking.
> 
>>  static int bpf_tcp_init(struct sock *sk)
>>  {
>>  	struct smap_psock *psock;
>> @@ -181,14 +185,30 @@ static int bpf_tcp_init(struct sock *sk)
>>  	psock->save_close = sk->sk_prot->close;
>>  	psock->sk_proto = sk->sk_prot;
>>  
>> +	if (sk->sk_family == AF_INET6) {
>> +		tcpv6_bpf_proto = *sk->sk_prot;
>> +		tcpv6_bpf_proto.close = bpf_tcp_close;
>> +	} else {
>> +		tcp_bpf_proto = *sk->sk_prot;
>> +		tcp_bpf_proto.close = bpf_tcp_close;
>> +	}
> 
> And each time we add a BPF ULP to a v4/v6 socket, we override tcp{,v6}_bpf_proto
> from scratch.
> 
>>  	if (psock->bpf_tx_msg) {
>> +		tcpv6_bpf_proto.sendmsg = bpf_tcp_sendmsg;
>> +		tcpv6_bpf_proto.sendpage = bpf_tcp_sendpage;
>> +		tcpv6_bpf_proto.recvmsg = bpf_tcp_recvmsg;
>> +		tcpv6_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
>>  		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
>>  		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
>>  		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
>>  		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
>>  	}
>>  
>> -	sk->sk_prot = &tcp_bpf_proto;
>> +	if (sk->sk_family == AF_INET6)
>> +		sk->sk_prot = &tcpv6_bpf_proto;
>> +	else
>> +		sk->sk_prot = &tcp_bpf_proto;
> 
> Where every active socket would be affected from it as well. Isn't that
> generally racy? E.g. existing ones where tcpv6_bpf_proto.sendmsg points
> to bpf_tcp_sendmsg would get overridden with earlier assignment on the
> tcpv6_bpf_proto = *sk->sk_prot during their lifetime after bpf_tcp_init().
> 

In general yes. At best it does feel fragile.

> In the kTLS case, the v4 protos are built up in module init via tls_register()
> and never change from there. The v6 ones are only reloaded when their addr
> changes e.g. module reload would come to mind, which should only be possible
> once no active v6 socket is present. What speaks against adapting similar
> scheme resp. what am I missing that the above would work? (Would be nice if
> there was some discussion in commit log related to it on 'why' this approach
> was done differently.)

I think its best to use the same scheme. Will post a new version. Also
would be nice to fix the selftests in the same series. Finally, I set
these pointers lazily adding a sendmsg hook for example even if it not
needed. Its harmless but does create an extra call through bpf for
no reason on some socks. To be complete we should avoid that.

> 
> Thanks,
> Daniel
> 
>>  	rcu_read_unlock();
>>  	return 0;
>>  }
>> @@ -1111,8 +1131,6 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
>>  
>>  static int bpf_tcp_ulp_register(void)
>>  {
>> -	tcp_bpf_proto = tcp_prot;
>> -	tcp_bpf_proto.close = bpf_tcp_close;
>>  	/* Once BPF TX ULP is registered it is never unregistered. It
>>  	 * will be in the ULP list for the lifetime of the system. Doing
>>  	 * duplicate registers is not a problem.
>>
>
diff mbox series

Patch

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d8..fa9b7f3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -41,6 +41,7 @@ 
 #include <linux/mm.h>
 #include <net/strparser.h>
 #include <net/tcp.h>
+#include <net/transp_v6.h>
 #include <linux/ptr_ring.h>
 #include <net/inet_common.h>
 #include <linux/sched/signal.h>
@@ -140,6 +141,7 @@  static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 			    int offset, size_t size, int flags);
+static void bpf_tcp_close(struct sock *sk, long timeout);
 
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
@@ -162,6 +164,8 @@  static bool bpf_tcp_stream_read(const struct sock *sk)
 }
 
 static struct proto tcp_bpf_proto;
+static struct proto tcpv6_bpf_proto;
+
 static int bpf_tcp_init(struct sock *sk)
 {
 	struct smap_psock *psock;
@@ -181,14 +185,30 @@  static int bpf_tcp_init(struct sock *sk)
 	psock->save_close = sk->sk_prot->close;
 	psock->sk_proto = sk->sk_prot;
 
+	if (sk->sk_family == AF_INET6) {
+		tcpv6_bpf_proto = *sk->sk_prot;
+		tcpv6_bpf_proto.close = bpf_tcp_close;
+	} else {
+		tcp_bpf_proto = *sk->sk_prot;
+		tcp_bpf_proto.close = bpf_tcp_close;
+	}
+
 	if (psock->bpf_tx_msg) {
+		tcpv6_bpf_proto.sendmsg = bpf_tcp_sendmsg;
+		tcpv6_bpf_proto.sendpage = bpf_tcp_sendpage;
+		tcpv6_bpf_proto.recvmsg = bpf_tcp_recvmsg;
+		tcpv6_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
 		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
 		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
 		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
 		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
 	}
 
-	sk->sk_prot = &tcp_bpf_proto;
+	if (sk->sk_family == AF_INET6)
+		sk->sk_prot = &tcpv6_bpf_proto;
+	else
+		sk->sk_prot = &tcp_bpf_proto;
+
 	rcu_read_unlock();
 	return 0;
 }
@@ -1111,8 +1131,6 @@  static void bpf_tcp_msg_add(struct smap_psock *psock,
 
 static int bpf_tcp_ulp_register(void)
 {
-	tcp_bpf_proto = tcp_prot;
-	tcp_bpf_proto.close = bpf_tcp_close;
 	/* Once BPF TX ULP is registered it is never unregistered. It
 	 * will be in the ULP list for the lifetime of the system. Doing
 	 * duplicate registers is not a problem.