Message ID | 20180614164446.24994.22118.stgit@john-Precision-Tower-5810 |
---|---|
State | Changes Requested, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | BPF fixes for sockhash | expand |
On Thu, Jun 14, 2018 at 09:44:46AM -0700, John Fastabend wrote: > This fixes a crash where we assign tcp_prot to IPv6 sockets instead > of tcpv6_prot. > > Previously we overwrote the sk->prot field with tcp_prot even in the > AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot > are used. > Further, only allow ESTABLISHED connections to join the > map per note in TLS ULP, > > /* The TLS ulp is currently supported only for TCP sockets > * in ESTABLISHED state. > * Supporting sockets in LISTEN state will require us > * to modify the accept implementation to clone rather then > * share the ulp context. > */ This bit has been moved to patch 2. > > Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as > 'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously > crashing case here. > > Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") > Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com > Signed-off-by: John Fastabend <john.fastabend@gmail.com> > Signed-off-by: Wei Wang <weiwan@google.com> > --- > 0 files changed > > diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c > index 52a91d8..f6dd4cd 100644 > --- a/kernel/bpf/sockmap.c > +++ b/kernel/bpf/sockmap.c > @@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, > static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); > static int bpf_tcp_sendpage(struct sock *sk, struct page *page, > int offset, size_t size, int flags); > +static void bpf_tcp_close(struct sock *sk, long timeout); > > static inline struct smap_psock *smap_psock_sk(const struct sock *sk) > { > @@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk) > return !empty; > } > > -static struct proto tcp_bpf_proto; > +enum { > + SOCKMAP_IPV4, > + SOCKMAP_IPV6, > + SOCKMAP_NUM_PROTS, > +}; > + > +enum { > + SOCKMAP_BASE, > + SOCKMAP_TX, > + SOCKMAP_NUM_CONFIGS, > +}; > + > +static struct proto *saved_tcpv6_prot; __read_mostly > +static DEFINE_MUTEX(tcpv6_prot_mutex); > +static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS]; > +static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS], > + struct proto *base) > +{ > + prot[SOCKMAP_BASE] = *base; > + prot[SOCKMAP_BASE].close = bpf_tcp_close; > + prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg; > + prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read; > + > + prot[SOCKMAP_TX] = prot[SOCKMAP_BASE]; > + prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg; > + prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage; > +} > + > +static void update_sk_prot(struct sock *sk, struct smap_psock *psock) > +{ > + int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4; > + int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE; > + > + sk->sk_prot = &bpf_tcp_prots[family][conf]; > +} > + > static int bpf_tcp_init(struct sock *sk) > { > struct smap_psock *psock; > @@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk) > psock->save_close = sk->sk_prot->close; > psock->sk_proto = sk->sk_prot; > > - if (psock->bpf_tx_msg) { > - tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; > - tcp_bpf_proto.sendpage = bpf_tcp_sendpage; > - tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg; > - tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read; > + /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */ > + if (sk->sk_family == AF_INET6 && > + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { > + mutex_lock(&tcpv6_prot_mutex); bpf_tcp_init() can be called by skops? Can mutex_lock() be used here? > + if (likely(sk->sk_prot != saved_tcpv6_prot)) { > + build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot); > + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); > + } > + mutex_unlock(&tcpv6_prot_mutex); > } > - > - sk->sk_prot = &tcp_bpf_proto; > + update_sk_prot(sk, psock); > rcu_read_unlock(); > return 0; > } > @@ -1111,8 +1150,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock, > > static int bpf_tcp_ulp_register(void) > { > - tcp_bpf_proto = tcp_prot; > - tcp_bpf_proto.close = bpf_tcp_close; > + build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot); > /* Once BPF TX ULP is registered it is never unregistered. It > * will be in the ULP list for the lifetime of the system. Doing > * duplicate registers is not a problem. >
On 06/14/2018 04:53 PM, Martin KaFai Lau wrote: > On Thu, Jun 14, 2018 at 09:44:46AM -0700, John Fastabend wrote: >> This fixes a crash where we assign tcp_prot to IPv6 sockets instead >> of tcpv6_prot. >> >> Previously we overwrote the sk->prot field with tcp_prot even in the >> AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot >> are used. > >> Further, only allow ESTABLISHED connections to join the >> map per note in TLS ULP, >> >> /* The TLS ulp is currently supported only for TCP sockets >> * in ESTABLISHED state. >> * Supporting sockets in LISTEN state will require us >> * to modify the accept implementation to clone rather then >> * share the ulp context. >> */ > This bit has been moved to patch 2. Yep better cut the comment as well. > >> >> Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as >> 'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously >> crashing case here. >> >> Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") >> Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com >> Signed-off-by: John Fastabend <john.fastabend@gmail.com> >> Signed-off-by: Wei Wang <weiwan@google.com> >> --- >> 0 files changed >> 0 files changed will fix that as well. >> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c >> index 52a91d8..f6dd4cd 100644 >> --- a/kernel/bpf/sockmap.c >> +++ b/kernel/bpf/sockmap.c >> @@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, >> static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); >> static int bpf_tcp_sendpage(struct sock *sk, struct page *page, >> int offset, size_t size, int flags); >> +static void bpf_tcp_close(struct sock *sk, long timeout); >> >> static inline struct smap_psock *smap_psock_sk(const struct sock *sk) >> { >> @@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk) >> return !empty; >> } >> >> -static struct proto tcp_bpf_proto; >> +enum { >> + SOCKMAP_IPV4, >> + SOCKMAP_IPV6, >> + SOCKMAP_NUM_PROTS, >> +}; >> + >> +enum { >> + SOCKMAP_BASE, >> + SOCKMAP_TX, >> + SOCKMAP_NUM_CONFIGS, >> +}; >> + >> +static struct proto *saved_tcpv6_prot; > __read_mostly > Sure makes sense. >> +static DEFINE_MUTEX(tcpv6_prot_mutex); >> +static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS]; >> +static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS], >> + struct proto *base) >> +{ >> + prot[SOCKMAP_BASE] = *base; >> + prot[SOCKMAP_BASE].close = bpf_tcp_close; >> + prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg; >> + prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read; >> + >> + prot[SOCKMAP_TX] = prot[SOCKMAP_BASE]; >> + prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg; >> + prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage; >> +} >> + >> +static void update_sk_prot(struct sock *sk, struct smap_psock *psock) >> +{ >> + int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4; >> + int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE; >> + >> + sk->sk_prot = &bpf_tcp_prots[family][conf]; >> +} >> + >> static int bpf_tcp_init(struct sock *sk) >> { >> struct smap_psock *psock; >> @@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk) >> psock->save_close = sk->sk_prot->close; >> psock->sk_proto = sk->sk_prot; >> >> - if (psock->bpf_tx_msg) { >> - tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; >> - tcp_bpf_proto.sendpage = bpf_tcp_sendpage; >> - tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg; >> - tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read; >> + /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */ >> + if (sk->sk_family == AF_INET6 && >> + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { >> + mutex_lock(&tcpv6_prot_mutex); > bpf_tcp_init() can be called by skops? > Can mutex_lock() be used here? > No mutex lock can not be used here. Both are called with rcu_read_lock() and we can not sleep. Thanks for catching. Also this will give a kernel splat now that I have the right config options. Guess we need a v3 :/ Thanks, John
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 52a91d8..f6dd4cd 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); static int bpf_tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +static void bpf_tcp_close(struct sock *sk, long timeout); static inline struct smap_psock *smap_psock_sk(const struct sock *sk) { @@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk) return !empty; } -static struct proto tcp_bpf_proto; +enum { + SOCKMAP_IPV4, + SOCKMAP_IPV6, + SOCKMAP_NUM_PROTS, +}; + +enum { + SOCKMAP_BASE, + SOCKMAP_TX, + SOCKMAP_NUM_CONFIGS, +}; + +static struct proto *saved_tcpv6_prot; +static DEFINE_MUTEX(tcpv6_prot_mutex); +static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS]; +static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS], + struct proto *base) +{ + prot[SOCKMAP_BASE] = *base; + prot[SOCKMAP_BASE].close = bpf_tcp_close; + prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg; + prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read; + + prot[SOCKMAP_TX] = prot[SOCKMAP_BASE]; + prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg; + prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage; +} + +static void update_sk_prot(struct sock *sk, struct smap_psock *psock) +{ + int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4; + int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE; + + sk->sk_prot = &bpf_tcp_prots[family][conf]; +} + static int bpf_tcp_init(struct sock *sk) { struct smap_psock *psock; @@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk) psock->save_close = sk->sk_prot->close; psock->sk_proto = sk->sk_prot; - if (psock->bpf_tx_msg) { - tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; - tcp_bpf_proto.sendpage = bpf_tcp_sendpage; - tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg; - tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read; + /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */ + if (sk->sk_family == AF_INET6 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + mutex_lock(&tcpv6_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv6_prot)) { + build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot); + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + } + mutex_unlock(&tcpv6_prot_mutex); } - - sk->sk_prot = &tcp_bpf_proto; + update_sk_prot(sk, psock); rcu_read_unlock(); return 0; } @@ -1111,8 +1150,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock, static int bpf_tcp_ulp_register(void) { - tcp_bpf_proto = tcp_prot; - tcp_bpf_proto.close = bpf_tcp_close; + build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot); /* Once BPF TX ULP is registered it is never unregistered. It * will be in the ULP list for the lifetime of the system. Doing * duplicate registers is not a problem.