diff mbox series

[bpf,v2,1/6] bpf: sockmap, fix crash when ipv6 sock is added

Message ID 20180614164446.24994.22118.stgit@john-Precision-Tower-5810
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series BPF fixes for sockhash | expand

Commit Message

John Fastabend June 14, 2018, 4:44 p.m. UTC
This fixes a crash where we assign tcp_prot to IPv6 sockets instead
of tcpv6_prot.

Previously we overwrote the sk->prot field with tcp_prot even in the
AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
are used. Further, only allow ESTABLISHED connections to join the
map per note in TLS ULP,

   /* The TLS ulp is currently supported only for TCP sockets
    * in ESTABLISHED state.
    * Supporting sockets in LISTEN state will require us
    * to modify the accept implementation to clone rather then
    * share the ulp context.
    */

Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
crashing case here.

Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Wei Wang <weiwan@google.com>
---
 0 files changed

Comments

Martin KaFai Lau June 14, 2018, 11:53 p.m. UTC | #1
On Thu, Jun 14, 2018 at 09:44:46AM -0700, John Fastabend wrote:
> This fixes a crash where we assign tcp_prot to IPv6 sockets instead
> of tcpv6_prot.
> 
> Previously we overwrote the sk->prot field with tcp_prot even in the
> AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
> are used.

> Further, only allow ESTABLISHED connections to join the
> map per note in TLS ULP,
> 
>    /* The TLS ulp is currently supported only for TCP sockets
>     * in ESTABLISHED state.
>     * Supporting sockets in LISTEN state will require us
>     * to modify the accept implementation to clone rather then
>     * share the ulp context.
>     */
This bit has been moved to patch 2.

> 
> Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
> 'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
> crashing case here.
> 
> Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
> Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> Signed-off-by: Wei Wang <weiwan@google.com>
> ---
>  0 files changed
> 
> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
> index 52a91d8..f6dd4cd 100644
> --- a/kernel/bpf/sockmap.c
> +++ b/kernel/bpf/sockmap.c
> @@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>  static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
>  static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
>  			    int offset, size_t size, int flags);
> +static void bpf_tcp_close(struct sock *sk, long timeout);
>  
>  static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
>  {
> @@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
>  	return !empty;
>  }
>  
> -static struct proto tcp_bpf_proto;
> +enum {
> +	SOCKMAP_IPV4,
> +	SOCKMAP_IPV6,
> +	SOCKMAP_NUM_PROTS,
> +};
> +
> +enum {
> +	SOCKMAP_BASE,
> +	SOCKMAP_TX,
> +	SOCKMAP_NUM_CONFIGS,
> +};
> +
> +static struct proto *saved_tcpv6_prot;
__read_mostly

> +static DEFINE_MUTEX(tcpv6_prot_mutex);
> +static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
> +static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
> +			 struct proto *base)
> +{
> +	prot[SOCKMAP_BASE]			= *base;
> +	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
> +	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
> +	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
> +
> +	prot[SOCKMAP_TX]			= prot[SOCKMAP_BASE];
> +	prot[SOCKMAP_TX].sendmsg		= bpf_tcp_sendmsg;
> +	prot[SOCKMAP_TX].sendpage		= bpf_tcp_sendpage;
> +}
> +
> +static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
> +{
> +	int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
> +	int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
> +
> +	sk->sk_prot = &bpf_tcp_prots[family][conf];
> +}
> +
>  static int bpf_tcp_init(struct sock *sk)
>  {
>  	struct smap_psock *psock;
> @@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk)
>  	psock->save_close = sk->sk_prot->close;
>  	psock->sk_proto = sk->sk_prot;
>  
> -	if (psock->bpf_tx_msg) {
> -		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
> -		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
> -		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
> -		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
> +	/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
> +	if (sk->sk_family == AF_INET6 &&
> +	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
> +		mutex_lock(&tcpv6_prot_mutex);
bpf_tcp_init() can be called by skops?
Can mutex_lock() be used here?

> +		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
> +			build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
> +			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
> +		}
> +		mutex_unlock(&tcpv6_prot_mutex);
>  	}
> -
> -	sk->sk_prot = &tcp_bpf_proto;
> +	update_sk_prot(sk, psock);
>  	rcu_read_unlock();
>  	return 0;
>  }
> @@ -1111,8 +1150,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
>  
>  static int bpf_tcp_ulp_register(void)
>  {
> -	tcp_bpf_proto = tcp_prot;
> -	tcp_bpf_proto.close = bpf_tcp_close;
> +	build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
>  	/* Once BPF TX ULP is registered it is never unregistered. It
>  	 * will be in the ULP list for the lifetime of the system. Doing
>  	 * duplicate registers is not a problem.
>
John Fastabend June 15, 2018, 4:46 a.m. UTC | #2
On 06/14/2018 04:53 PM, Martin KaFai Lau wrote:
> On Thu, Jun 14, 2018 at 09:44:46AM -0700, John Fastabend wrote:
>> This fixes a crash where we assign tcp_prot to IPv6 sockets instead
>> of tcpv6_prot.
>>
>> Previously we overwrote the sk->prot field with tcp_prot even in the
>> AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
>> are used.
> 
>> Further, only allow ESTABLISHED connections to join the
>> map per note in TLS ULP,
>>
>>    /* The TLS ulp is currently supported only for TCP sockets
>>     * in ESTABLISHED state.
>>     * Supporting sockets in LISTEN state will require us
>>     * to modify the accept implementation to clone rather then
>>     * share the ulp context.
>>     */
> This bit has been moved to patch 2.

Yep better cut the comment as well.

> 
>>
>> Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
>> 'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
>> crashing case here.
>>
>> Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
>> Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
>> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>> Signed-off-by: Wei Wang <weiwan@google.com>
>> ---
>>  0 files changed
>>

0 files changed will fix that as well.

>> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
>> index 52a91d8..f6dd4cd 100644
>> --- a/kernel/bpf/sockmap.c
>> +++ b/kernel/bpf/sockmap.c
>> @@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>>  static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
>>  static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
>>  			    int offset, size_t size, int flags);
>> +static void bpf_tcp_close(struct sock *sk, long timeout);
>>  
>>  static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
>>  {
>> @@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
>>  	return !empty;
>>  }
>>  
>> -static struct proto tcp_bpf_proto;
>> +enum {
>> +	SOCKMAP_IPV4,
>> +	SOCKMAP_IPV6,
>> +	SOCKMAP_NUM_PROTS,
>> +};
>> +
>> +enum {
>> +	SOCKMAP_BASE,
>> +	SOCKMAP_TX,
>> +	SOCKMAP_NUM_CONFIGS,
>> +};
>> +
>> +static struct proto *saved_tcpv6_prot;
> __read_mostly
> 

Sure makes sense.

>> +static DEFINE_MUTEX(tcpv6_prot_mutex);
>> +static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
>> +static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
>> +			 struct proto *base)
>> +{
>> +	prot[SOCKMAP_BASE]			= *base;
>> +	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
>> +	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
>> +	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
>> +
>> +	prot[SOCKMAP_TX]			= prot[SOCKMAP_BASE];
>> +	prot[SOCKMAP_TX].sendmsg		= bpf_tcp_sendmsg;
>> +	prot[SOCKMAP_TX].sendpage		= bpf_tcp_sendpage;
>> +}
>> +
>> +static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
>> +{
>> +	int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
>> +	int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
>> +
>> +	sk->sk_prot = &bpf_tcp_prots[family][conf];
>> +}
>> +
>>  static int bpf_tcp_init(struct sock *sk)
>>  {
>>  	struct smap_psock *psock;
>> @@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk)
>>  	psock->save_close = sk->sk_prot->close;
>>  	psock->sk_proto = sk->sk_prot;
>>  
>> -	if (psock->bpf_tx_msg) {
>> -		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
>> -		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
>> -		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
>> -		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
>> +	/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
>> +	if (sk->sk_family == AF_INET6 &&
>> +	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
>> +		mutex_lock(&tcpv6_prot_mutex);
> bpf_tcp_init() can be called by skops?
> Can mutex_lock() be used here?
> 

No mutex lock can not be used here. Both are called
with rcu_read_lock() and we can not sleep. Thanks
for catching. Also this will give a kernel splat now
that I have the right config options. Guess we need
a v3 :/

Thanks,
John
diff mbox series

Patch

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d8..f6dd4cd 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -140,6 +140,7 @@  static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 			    int offset, size_t size, int flags);
+static void bpf_tcp_close(struct sock *sk, long timeout);
 
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
@@ -161,7 +162,42 @@  static bool bpf_tcp_stream_read(const struct sock *sk)
 	return !empty;
 }
 
-static struct proto tcp_bpf_proto;
+enum {
+	SOCKMAP_IPV4,
+	SOCKMAP_IPV6,
+	SOCKMAP_NUM_PROTS,
+};
+
+enum {
+	SOCKMAP_BASE,
+	SOCKMAP_TX,
+	SOCKMAP_NUM_CONFIGS,
+};
+
+static struct proto *saved_tcpv6_prot;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
+static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
+static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
+			 struct proto *base)
+{
+	prot[SOCKMAP_BASE]			= *base;
+	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
+	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
+	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
+
+	prot[SOCKMAP_TX]			= prot[SOCKMAP_BASE];
+	prot[SOCKMAP_TX].sendmsg		= bpf_tcp_sendmsg;
+	prot[SOCKMAP_TX].sendpage		= bpf_tcp_sendpage;
+}
+
+static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
+	int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
+
+	sk->sk_prot = &bpf_tcp_prots[family][conf];
+}
+
 static int bpf_tcp_init(struct sock *sk)
 {
 	struct smap_psock *psock;
@@ -181,14 +217,17 @@  static int bpf_tcp_init(struct sock *sk)
 	psock->save_close = sk->sk_prot->close;
 	psock->sk_proto = sk->sk_prot;
 
-	if (psock->bpf_tx_msg) {
-		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
-		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
-		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
-		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
+	/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
+	if (sk->sk_family == AF_INET6 &&
+	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+		mutex_lock(&tcpv6_prot_mutex);
+		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+			build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
+			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+		}
+		mutex_unlock(&tcpv6_prot_mutex);
 	}
-
-	sk->sk_prot = &tcp_bpf_proto;
+	update_sk_prot(sk, psock);
 	rcu_read_unlock();
 	return 0;
 }
@@ -1111,8 +1150,7 @@  static void bpf_tcp_msg_add(struct smap_psock *psock,
 
 static int bpf_tcp_ulp_register(void)
 {
-	tcp_bpf_proto = tcp_prot;
-	tcp_bpf_proto.close = bpf_tcp_close;
+	build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
 	/* Once BPF TX ULP is registered it is never unregistered. It
 	 * will be in the ULP list for the lifetime of the system. Doing
 	 * duplicate registers is not a problem.