diff mbox series

[bpf,v2,2/6] bpf: sockmap only allow ESTABLISHED sock state

Message ID 20180614164451.24994.31096.stgit@john-Precision-Tower-5810
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series BPF fixes for sockhash | expand

Commit Message

John Fastabend June 14, 2018, 4:44 p.m. UTC
Per the note in the TLS ULP (which is actually a generic statement
regarding ULPs)

 /* The TLS ulp is currently supported only for TCP sockets
  * in ESTABLISHED state.
  * Supporting sockets in LISTEN state will require us
  * to modify the accept implementation to clone rather then
  * share the ulp context.
  */

After this patch we only allow socks that are in ESTABLISHED state or
are being added via a sock_ops event that is transitioning into an
ESTABLISHED state. By allowing sock_ops events we allow users to
manage sockmaps directly from sock ops programs. The two supported
sock_ops ops are BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB and
BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB.

>From the userspace BPF update API the sock lock is also taken now
to ensure we don't race with state changes after the ESTABLISHED
check. The BPF program sock ops hook already has the sock lock
taken.

Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
'netperf -H [IPv4]'.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 0 files changed

Comments

Martin KaFai Lau June 15, 2018, 12:18 a.m. UTC | #1
On Thu, Jun 14, 2018 at 09:44:52AM -0700, John Fastabend wrote:
> Per the note in the TLS ULP (which is actually a generic statement
> regarding ULPs)
> 
>  /* The TLS ulp is currently supported only for TCP sockets
>   * in ESTABLISHED state.
>   * Supporting sockets in LISTEN state will require us
>   * to modify the accept implementation to clone rather then
>   * share the ulp context.
>   */
Can you explain how that apply to bpf_tcp ulp?

My understanding is the "ulp context" referred in TLS ulp is
the tls_context stored in icsk_ulp_data but I don't see bpf_tcp's
ulp is using icsk_ulp_data.

Others LGTM.

> 
> After this patch we only allow socks that are in ESTABLISHED state or
> are being added via a sock_ops event that is transitioning into an
> ESTABLISHED state. By allowing sock_ops events we allow users to
> manage sockmaps directly from sock ops programs. The two supported
> sock_ops ops are BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB and
> BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB.
> 
> >From the userspace BPF update API the sock lock is also taken now
> to ensure we don't race with state changes after the ESTABLISHED
> check. The BPF program sock ops hook already has the sock lock
> taken.
> 
> Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
> 'netperf -H [IPv4]'.
> 
> Reported-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  0 files changed
> 
> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
> index f6dd4cd..f1ab52d 100644
> --- a/kernel/bpf/sockmap.c
> +++ b/kernel/bpf/sockmap.c
> @@ -1976,13 +1976,20 @@ static int sock_map_update_elem(struct bpf_map *map,
>  		return -EINVAL;
>  	}
>  
> +	lock_sock(skops.sk);
> +	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
> +	 * state.
> +	 */
>  	if (skops.sk->sk_type != SOCK_STREAM ||
> -	    skops.sk->sk_protocol != IPPROTO_TCP) {
> -		fput(socket->file);
> -		return -EOPNOTSUPP;
> +	    skops.sk->sk_protocol != IPPROTO_TCP ||
> +	    skops.sk->sk_state != TCP_ESTABLISHED) {
> +		err = -EOPNOTSUPP;
> +		goto out;
>  	}
>  
>  	err = sock_map_ctx_update_elem(&skops, map, key, flags);
> +out:
> +	release_sock(skops.sk);
>  	fput(socket->file);
>  	return err;
>  }
> @@ -2247,10 +2254,6 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
>  
>  	sock = skops->sk;
>  
> -	if (sock->sk_type != SOCK_STREAM ||
> -	    sock->sk_protocol != IPPROTO_TCP)
> -		return -EOPNOTSUPP;
> -
>  	if (unlikely(map_flags > BPF_EXIST))
>  		return -EINVAL;
>  
> @@ -2338,7 +2341,20 @@ static int sock_hash_update_elem(struct bpf_map *map,
>  		return -EINVAL;
>  	}
>  
> +	lock_sock(skops.sk);
> +	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
> +	 * state.
> +	 */
> +	if (skops.sk->sk_type != SOCK_STREAM ||
> +	    skops.sk->sk_protocol != IPPROTO_TCP ||
> +	    skops.sk->sk_state != TCP_ESTABLISHED) {
> +		err = -EOPNOTSUPP;
> +		goto out;
> +	}
> +
>  	err = sock_hash_ctx_update_elem(&skops, map, key, flags);
> +out:
> +	release_sock(skops.sk);
>  	fput(socket->file);
>  	return err;
>  }
> @@ -2423,10 +2439,19 @@ struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
>  	.map_delete_elem = sock_hash_delete_elem,
>  };
>  
> +static bool bpf_is_valid_sock(struct bpf_sock_ops_kern *ops)
> +{
> +	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
> +	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
> +}
> +
>  BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
>  	   struct bpf_map *, map, void *, key, u64, flags)
>  {
>  	WARN_ON_ONCE(!rcu_read_lock_held());
> +
> +	if (!bpf_is_valid_sock(bpf_sock))
> +		return -EOPNOTSUPP;
>  	return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
>  }
>  
> @@ -2445,6 +2470,9 @@ struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
>  	   struct bpf_map *, map, void *, key, u64, flags)
>  {
>  	WARN_ON_ONCE(!rcu_read_lock_held());
> +
> +	if (!bpf_is_valid_sock(bpf_sock))
> +		return -EOPNOTSUPP;
>  	return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
>  }
>  
>
John Fastabend June 18, 2018, 2:50 p.m. UTC | #2
On 06/14/2018 05:18 PM, Martin KaFai Lau wrote:
> On Thu, Jun 14, 2018 at 09:44:52AM -0700, John Fastabend wrote:
>> Per the note in the TLS ULP (which is actually a generic statement
>> regarding ULPs)
>>
>>  /* The TLS ulp is currently supported only for TCP sockets
>>   * in ESTABLISHED state.
>>   * Supporting sockets in LISTEN state will require us
>>   * to modify the accept implementation to clone rather then
>>   * share the ulp context.
>>   */
> Can you explain how that apply to bpf_tcp ulp?
> 
> My understanding is the "ulp context" referred in TLS ulp is
> the tls_context stored in icsk_ulp_data but I don't see bpf_tcp's
> ulp is using icsk_ulp_data.
> 
> Others LGTM.
> 

So I think you are right we could probably allow it
here but I am thinking I'll leave the check for now
anyways for a couple reasons. First, we will shortly
add support to allow ULP types to coexist. At the moment
the two ULP types can not coexist. When this happens it
looks like we will need to restrict to only ESTABLISHED
types or somehow make all ULPs work in all states.

Second, I don't have any use cases (nor can I think of
any) for the sock{map|hash} ULP to be running on a non
ESTABLISHED socket. Its not clear to me that having the
sendmsg/sendpage hooks for a LISTEN socket makes sense.
I would rather restrict it now and if we add something
later where it makes sense to run on non-ESTABLISHED
socks we can remove the check.

Thanks for reviewing,
John
Martin KaFai Lau June 18, 2018, 9:17 p.m. UTC | #3
On Mon, Jun 18, 2018 at 07:50:19AM -0700, John Fastabend wrote:
> On 06/14/2018 05:18 PM, Martin KaFai Lau wrote:
> > On Thu, Jun 14, 2018 at 09:44:52AM -0700, John Fastabend wrote:
> >> Per the note in the TLS ULP (which is actually a generic statement
> >> regarding ULPs)
> >>
> >>  /* The TLS ulp is currently supported only for TCP sockets
> >>   * in ESTABLISHED state.
> >>   * Supporting sockets in LISTEN state will require us
> >>   * to modify the accept implementation to clone rather then
> >>   * share the ulp context.
> >>   */
> > Can you explain how that apply to bpf_tcp ulp?
> > 
> > My understanding is the "ulp context" referred in TLS ulp is
> > the tls_context stored in icsk_ulp_data but I don't see bpf_tcp's
> > ulp is using icsk_ulp_data.
> > 
> > Others LGTM.
> > 
> 
> So I think you are right we could probably allow it
> here but I am thinking I'll leave the check for now
> anyways for a couple reasons. First, we will shortly
> add support to allow ULP types to coexist. At the moment
> the two ULP types can not coexist. When this happens it
> looks like we will need to restrict to only ESTABLISHED
> types or somehow make all ULPs work in all states.
> 
> Second, I don't have any use cases (nor can I think of
> any) for the sock{map|hash} ULP to be running on a non
> ESTABLISHED socket. Its not clear to me that having the
> sendmsg/sendpage hooks for a LISTEN socket makes sense.
> I would rather restrict it now and if we add something
> later where it makes sense to run on non-ESTABLISHED
> socks we can remove the check.
Make sense if there is no use case.  It will be helpful if the commit log
is updated accordingly.  Thanks!

Acked-by: Martin KaFai Lau <kafai@fb.com>
John Fastabend June 20, 2018, 10:15 p.m. UTC | #4
On 06/18/2018 02:17 PM, Martin KaFai Lau wrote:
> On Mon, Jun 18, 2018 at 07:50:19AM -0700, John Fastabend wrote:
>> On 06/14/2018 05:18 PM, Martin KaFai Lau wrote:
>>> On Thu, Jun 14, 2018 at 09:44:52AM -0700, John Fastabend wrote:
>>>> Per the note in the TLS ULP (which is actually a generic statement
>>>> regarding ULPs)
>>>>
>>>>  /* The TLS ulp is currently supported only for TCP sockets
>>>>   * in ESTABLISHED state.
>>>>   * Supporting sockets in LISTEN state will require us
>>>>   * to modify the accept implementation to clone rather then
>>>>   * share the ulp context.
>>>>   */
>>> Can you explain how that apply to bpf_tcp ulp?
>>>
>>> My understanding is the "ulp context" referred in TLS ulp is
>>> the tls_context stored in icsk_ulp_data but I don't see bpf_tcp's
>>> ulp is using icsk_ulp_data.
>>>
>>> Others LGTM.
>>>
>>
>> So I think you are right we could probably allow it
>> here but I am thinking I'll leave the check for now
>> anyways for a couple reasons. First, we will shortly
>> add support to allow ULP types to coexist. At the moment
>> the two ULP types can not coexist. When this happens it
>> looks like we will need to restrict to only ESTABLISHED
>> types or somehow make all ULPs work in all states.
>>
>> Second, I don't have any use cases (nor can I think of
>> any) for the sock{map|hash} ULP to be running on a non
>> ESTABLISHED socket. Its not clear to me that having the
>> sendmsg/sendpage hooks for a LISTEN socket makes sense.
>> I would rather restrict it now and if we add something
>> later where it makes sense to run on non-ESTABLISHED
>> socks we can remove the check.
> Make sense if there is no use case.  It will be helpful if the commit log
> is updated accordingly.  Thanks!
> 
> Acked-by: Martin KaFai Lau <kafai@fb.com>
> 

The fall-out of this patch got a bit ugly. It doesn't
make much sense to me to allow transitioning into
ESTABLISH state (via tcp_disconnect) and not allow adding
the socks up front. But the fix via unhash callback, subsequent
patch, ended up causing a few issues. First to avoid racing
with transitions through update logic we had to use
sock_lock(sk) in the update handler. Which means we
can't use the normal ./kernel/bpf/syscall.c map update
logic and had to special case it so that preempt and
rcu were not used until after the lock was taken because
sock_lock can sleep. Then after running over night I noticed
a couple WARNINGS related to sk_forward_alloc not being
zero'd correctly on sock teardown. The issue is unhash
doesn't have sock_lock either and can be done while a
sendmsg/sendpage are running resulting in incorrectly
removing scatterlists. :(

All in all the "fix" got ugly so lets stay with the minimal
required set and allow non-established socks. It shouldn't
hurt anything even if from a use case perspective its a bit
useless. Then in bpf-next when we allow ULPs to coexist we
need to have some fix to handle this. But I would rather do
that in *next branches instead of bpf/net branches.

Thanks for all the useful feedback. I'm going to send a
set of three patches now to address the specific issues,
(a) IPV6 socks, (b) bucket lock missing and (c) uref release
missing.

Thanks,
John
diff mbox series

Patch

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index f6dd4cd..f1ab52d 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1976,13 +1976,20 @@  static int sock_map_update_elem(struct bpf_map *map,
 		return -EINVAL;
 	}
 
+	lock_sock(skops.sk);
+	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
+	 * state.
+	 */
 	if (skops.sk->sk_type != SOCK_STREAM ||
-	    skops.sk->sk_protocol != IPPROTO_TCP) {
-		fput(socket->file);
-		return -EOPNOTSUPP;
+	    skops.sk->sk_protocol != IPPROTO_TCP ||
+	    skops.sk->sk_state != TCP_ESTABLISHED) {
+		err = -EOPNOTSUPP;
+		goto out;
 	}
 
 	err = sock_map_ctx_update_elem(&skops, map, key, flags);
+out:
+	release_sock(skops.sk);
 	fput(socket->file);
 	return err;
 }
@@ -2247,10 +2254,6 @@  static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 
 	sock = skops->sk;
 
-	if (sock->sk_type != SOCK_STREAM ||
-	    sock->sk_protocol != IPPROTO_TCP)
-		return -EOPNOTSUPP;
-
 	if (unlikely(map_flags > BPF_EXIST))
 		return -EINVAL;
 
@@ -2338,7 +2341,20 @@  static int sock_hash_update_elem(struct bpf_map *map,
 		return -EINVAL;
 	}
 
+	lock_sock(skops.sk);
+	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
+	 * state.
+	 */
+	if (skops.sk->sk_type != SOCK_STREAM ||
+	    skops.sk->sk_protocol != IPPROTO_TCP ||
+	    skops.sk->sk_state != TCP_ESTABLISHED) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
 	err = sock_hash_ctx_update_elem(&skops, map, key, flags);
+out:
+	release_sock(skops.sk);
 	fput(socket->file);
 	return err;
 }
@@ -2423,10 +2439,19 @@  struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
 	.map_delete_elem = sock_hash_delete_elem,
 };
 
+static bool bpf_is_valid_sock(struct bpf_sock_ops_kern *ops)
+{
+	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
+}
+
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
 	   struct bpf_map *, map, void *, key, u64, flags)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (!bpf_is_valid_sock(bpf_sock))
+		return -EOPNOTSUPP;
 	return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
 }
 
@@ -2445,6 +2470,9 @@  struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
 	   struct bpf_map *, map, void *, key, u64, flags)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (!bpf_is_valid_sock(bpf_sock))
+		return -EOPNOTSUPP;
 	return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
 }