diff mbox series

[bpf-next,v2,2/4] xsk: add proper barriers and {READ, WRITE}_ONCE-correctness for state

Message ID 20190826061053.15996-3-bjorn.topel@gmail.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series xsk: various CPU barrier and {READ, WRITE}_ONCE fixes | expand

Commit Message

Björn Töpel Aug. 26, 2019, 6:10 a.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

The state variable was read, and written outside the control mutex
(struct xdp_sock, mutex), without proper barriers and {READ,
WRITE}_ONCE correctness.

In this commit this issue is addressed, and the state member is now
used a point of synchronization whether the socket is setup correctly
or not.

This also fixes a race, found by syzcaller, in xsk_poll() where umem
could be accessed when stale.

Suggested-by: Hillf Danton <hdanton@sina.com>
Reported-by: syzbot+c82697e3043781e08802@syzkaller.appspotmail.com
Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 net/xdp/xsk.c | 57 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 17 deletions(-)

Comments

Ilya Maximets Aug. 26, 2019, 3:24 p.m. UTC | #1
On 26.08.2019 9:10, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> The state variable was read, and written outside the control mutex
> (struct xdp_sock, mutex), without proper barriers and {READ,
> WRITE}_ONCE correctness.
> 
> In this commit this issue is addressed, and the state member is now
> used a point of synchronization whether the socket is setup correctly
> or not.
> 
> This also fixes a race, found by syzcaller, in xsk_poll() where umem
> could be accessed when stale.
> 
> Suggested-by: Hillf Danton <hdanton@sina.com>
> Reported-by: syzbot+c82697e3043781e08802@syzkaller.appspotmail.com
> Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings")
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
>  net/xdp/xsk.c | 57 ++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 40 insertions(+), 17 deletions(-)
> 
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index f3351013c2a5..8fafa3ce3ae6 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -162,10 +162,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
>  	return err;
>  }
>  
> +static bool xsk_is_bound(struct xdp_sock *xs)
> +{
> +	if (READ_ONCE(xs->state) == XSK_BOUND) {
> +		/* Matches smp_wmb() in bind(). */
> +		smp_rmb();
> +		return true;
> +	}
> +	return false;
> +}
> +
>  int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
>  {
>  	u32 len;
>  
> +	if (!xsk_is_bound(xs))
> +		return -EINVAL;
> +
>  	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
>  		return -EINVAL;
>  
> @@ -362,7 +375,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
>  	struct sock *sk = sock->sk;
>  	struct xdp_sock *xs = xdp_sk(sk);
>  
> -	if (unlikely(!xs->dev))
> +	if (unlikely(!xsk_is_bound(xs)))
>  		return -ENXIO;
>  	if (unlikely(!(xs->dev->flags & IFF_UP)))
>  		return -ENETDOWN;
> @@ -378,10 +391,15 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
>  			     struct poll_table_struct *wait)
>  {
>  	unsigned int mask = datagram_poll(file, sock, wait);
> -	struct sock *sk = sock->sk;
> -	struct xdp_sock *xs = xdp_sk(sk);
> -	struct net_device *dev = xs->dev;
> -	struct xdp_umem *umem = xs->umem;
> +	struct xdp_sock *xs = xdp_sk(sock->sk);
> +	struct net_device *dev;
> +	struct xdp_umem *umem;
> +
> +	if (unlikely(!xsk_is_bound(xs)))
> +		return mask;
> +
> +	dev = xs->dev;
> +	umem = xs->umem;
>  
>  	if (umem->need_wakeup)
>  		dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
> @@ -417,10 +435,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
>  {
>  	struct net_device *dev = xs->dev;
>  
> -	if (!dev || xs->state != XSK_BOUND)
> +	if (xs->state != XSK_BOUND)
>  		return;
> -
> -	xs->state = XSK_UNBOUND;
> +	WRITE_ONCE(xs->state, XSK_UNBOUND);
>  
>  	/* Wait for driver to stop using the xdp socket. */
>  	xdp_del_sk_umem(xs->umem, xs);
> @@ -495,7 +512,9 @@ static int xsk_release(struct socket *sock)
>  	local_bh_enable();
>  
>  	xsk_delete_from_maps(xs);
> +	mutex_lock(&xs->mutex);
>  	xsk_unbind_dev(xs);
> +	mutex_unlock(&xs->mutex);
>  
>  	xskq_destroy(xs->rx);
>  	xskq_destroy(xs->tx);
> @@ -589,19 +608,18 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>  		}
>  
>  		umem_xs = xdp_sk(sock->sk);
> -		if (!umem_xs->umem) {
> -			/* No umem to inherit. */
> +		if (!xsk_is_bound(umem_xs)) {

This changes the error code a bit.
Previously:
   umem exists + xs unbound    --> EINVAL
   no umem     + xs unbound    --> EBADF
   xs bound to different dev/q --> EINVAL

With this change:
   umem exists + xs unbound    --> EBADF
   no umem     + xs unbound    --> EBADF
   xs bound to different dev/q --> EINVAL

Just a note. Not sure if this is important.


>  			err = -EBADF;
>  			sockfd_put(sock);
>  			goto out_unlock;
> -		} else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
> +		}
> +		if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
>  			err = -EINVAL;
>  			sockfd_put(sock);
>  			goto out_unlock;
>  		}
> -
>  		xdp_get_umem(umem_xs->umem);
> -		xs->umem = umem_xs->umem;
> +		WRITE_ONCE(xs->umem, umem_xs->umem);
>  		sockfd_put(sock);
>  	} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
>  		err = -EINVAL;
> @@ -626,10 +644,15 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>  	xdp_add_sk_umem(xs->umem, xs);
>  
>  out_unlock:
> -	if (err)
> +	if (err) {
>  		dev_put(dev);
> -	else
> -		xs->state = XSK_BOUND;
> +	} else {
> +		/* Matches smp_rmb() in bind() for shared umem
> +		 * sockets, and xsk_is_bound().
> +		 */
> +		smp_wmb();
> +		WRITE_ONCE(xs->state, XSK_BOUND);
> +	}
>  out_release:
>  	mutex_unlock(&xs->mutex);
>  	rtnl_unlock();
> @@ -869,7 +892,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
>  	unsigned long pfn;
>  	struct page *qpg;
>  
> -	if (xs->state != XSK_READY)
> +	if (READ_ONCE(xs->state) != XSK_READY)
>  		return -EBUSY;
>  
>  	if (offset == XDP_PGOFF_RX_RING) {
>
Björn Töpel Aug. 26, 2019, 4:34 p.m. UTC | #2
On 2019-08-26 17:24, Ilya Maximets wrote:
> This changes the error code a bit.
> Previously:
>     umem exists + xs unbound    --> EINVAL
>     no umem     + xs unbound    --> EBADF
>     xs bound to different dev/q --> EINVAL
> 
> With this change:
>     umem exists + xs unbound    --> EBADF
>     no umem     + xs unbound    --> EBADF
>     xs bound to different dev/q --> EINVAL
> 
> Just a note. Not sure if this is important.
> 

Note that this is for *shared* umem, so it's very seldom used. Still,
you're right, that strictly this is an uapi break, but I'd vote for the
change still. I find it hard to see that anyone relies on EINVAL/EBADF
for shared umem bind.

Opinions? :-)


Björn
Jonathan Lemon Aug. 26, 2019, 5:54 p.m. UTC | #3
On 25 Aug 2019, at 23:10, Björn Töpel wrote:

> From: Björn Töpel <bjorn.topel@intel.com>
>
> The state variable was read, and written outside the control mutex
> (struct xdp_sock, mutex), without proper barriers and {READ,
> WRITE}_ONCE correctness.
>
> In this commit this issue is addressed, and the state member is now
> used a point of synchronization whether the socket is setup correctly
> or not.
>
> This also fixes a race, found by syzcaller, in xsk_poll() where umem
> could be accessed when stale.
>
> Suggested-by: Hillf Danton <hdanton@sina.com>
> Reported-by: syzbot+c82697e3043781e08802@syzkaller.appspotmail.com
> Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP 
> rings")
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>

Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Jonathan Lemon Aug. 26, 2019, 5:57 p.m. UTC | #4
On 26 Aug 2019, at 9:34, Björn Töpel wrote:

> On 2019-08-26 17:24, Ilya Maximets wrote:
>> This changes the error code a bit.
>> Previously:
>>     umem exists + xs unbound    --> EINVAL
>>     no umem     + xs unbound    --> EBADF
>>     xs bound to different dev/q --> EINVAL
>>
>> With this change:
>>     umem exists + xs unbound    --> EBADF
>>     no umem     + xs unbound    --> EBADF
>>     xs bound to different dev/q --> EINVAL
>>
>> Just a note. Not sure if this is important.
>>
>
> Note that this is for *shared* umem, so it's very seldom used. Still,
> you're right, that strictly this is an uapi break, but I'd vote for the
> change still. I find it hard to see that anyone relies on EINVAL/EBADF
> for shared umem bind.
>
> Opinions? :-)

I'd agree - if it isn't documented somewhere, it's not an API break. :)
Daniel Borkmann Sept. 3, 2019, 3:22 p.m. UTC | #5
On 8/26/19 8:10 AM, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> The state variable was read, and written outside the control mutex
> (struct xdp_sock, mutex), without proper barriers and {READ,
> WRITE}_ONCE correctness.
> 
> In this commit this issue is addressed, and the state member is now
> used a point of synchronization whether the socket is setup correctly
> or not.
> 
> This also fixes a race, found by syzcaller, in xsk_poll() where umem
> could be accessed when stale.
> 
> Suggested-by: Hillf Danton <hdanton@sina.com>
> Reported-by: syzbot+c82697e3043781e08802@syzkaller.appspotmail.com
> Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings")
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>

Sorry for the delay.

> ---
>   net/xdp/xsk.c | 57 ++++++++++++++++++++++++++++++++++++---------------
>   1 file changed, 40 insertions(+), 17 deletions(-)
> 
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index f3351013c2a5..8fafa3ce3ae6 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -162,10 +162,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
>   	return err;
>   }
>   
> +static bool xsk_is_bound(struct xdp_sock *xs)
> +{
> +	if (READ_ONCE(xs->state) == XSK_BOUND) {
> +		/* Matches smp_wmb() in bind(). */
> +		smp_rmb();
> +		return true;
> +	}
> +	return false;
> +}
> +
>   int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
>   {
>   	u32 len;
>   
> +	if (!xsk_is_bound(xs))
> +		return -EINVAL;
> +
>   	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
>   		return -EINVAL;
>   
> @@ -362,7 +375,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
>   	struct sock *sk = sock->sk;
>   	struct xdp_sock *xs = xdp_sk(sk);
>   
> -	if (unlikely(!xs->dev))
> +	if (unlikely(!xsk_is_bound(xs)))
>   		return -ENXIO;
>   	if (unlikely(!(xs->dev->flags & IFF_UP)))
>   		return -ENETDOWN;
> @@ -378,10 +391,15 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
>   			     struct poll_table_struct *wait)
>   {
>   	unsigned int mask = datagram_poll(file, sock, wait);
> -	struct sock *sk = sock->sk;
> -	struct xdp_sock *xs = xdp_sk(sk);
> -	struct net_device *dev = xs->dev;
> -	struct xdp_umem *umem = xs->umem;
> +	struct xdp_sock *xs = xdp_sk(sock->sk);
> +	struct net_device *dev;
> +	struct xdp_umem *umem;
> +
> +	if (unlikely(!xsk_is_bound(xs)))
> +		return mask;
> +
> +	dev = xs->dev;
> +	umem = xs->umem;
>   
>   	if (umem->need_wakeup)
>   		dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
> @@ -417,10 +435,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
>   {
>   	struct net_device *dev = xs->dev;
>   
> -	if (!dev || xs->state != XSK_BOUND)
> +	if (xs->state != XSK_BOUND)
>   		return;
> -
> -	xs->state = XSK_UNBOUND;
> +	WRITE_ONCE(xs->state, XSK_UNBOUND);
>   
>   	/* Wait for driver to stop using the xdp socket. */
>   	xdp_del_sk_umem(xs->umem, xs);
> @@ -495,7 +512,9 @@ static int xsk_release(struct socket *sock)
>   	local_bh_enable();
>   
>   	xsk_delete_from_maps(xs);
> +	mutex_lock(&xs->mutex);
>   	xsk_unbind_dev(xs);
> +	mutex_unlock(&xs->mutex);
>   
>   	xskq_destroy(xs->rx);
>   	xskq_destroy(xs->tx);
> @@ -589,19 +608,18 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>   		}
>   
>   		umem_xs = xdp_sk(sock->sk);
> -		if (!umem_xs->umem) {
> -			/* No umem to inherit. */
> +		if (!xsk_is_bound(umem_xs)) {
>   			err = -EBADF;
>   			sockfd_put(sock);
>   			goto out_unlock;
> -		} else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
> +		}
> +		if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
>   			err = -EINVAL;
>   			sockfd_put(sock);
>   			goto out_unlock;
>   		}
> -
>   		xdp_get_umem(umem_xs->umem);
> -		xs->umem = umem_xs->umem;
> +		WRITE_ONCE(xs->umem, umem_xs->umem);
>   		sockfd_put(sock);
>   	} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
>   		err = -EINVAL;
> @@ -626,10 +644,15 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
>   	xdp_add_sk_umem(xs->umem, xs);
>   
>   out_unlock:
> -	if (err)
> +	if (err) {
>   		dev_put(dev);
> -	else
> -		xs->state = XSK_BOUND;
> +	} else {
> +		/* Matches smp_rmb() in bind() for shared umem
> +		 * sockets, and xsk_is_bound().
> +		 */
> +		smp_wmb();

You write with what this barrier matches/pairs, but would be useful for readers
to have an explanation against what it protects. I presume to have things like
xs->umem public as you seem to guard it behind xsk_is_bound() in xsk_poll() and
other cases? Would be great to have a detailed analysis of all this e.g. in the
commit message so one wouldn't need to guess; right now it feels this is doing
many things at once and w/o further explanation of why READ_ONCE() or others are
omitted sometimes. Would be great to get a lot more clarity into this, perhaps
splitting it up a bit might also help.

> +		WRITE_ONCE(xs->state, XSK_BOUND);
> +	}

Thanks,
Daniel
Björn Töpel Sept. 3, 2019, 3:26 p.m. UTC | #6
On Tue, 3 Sep 2019 at 17:22, Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 8/26/19 8:10 AM, Björn Töpel wrote:
> > From: Björn Töpel <bjorn.topel@intel.com>
> >
> > The state variable was read, and written outside the control mutex
> > (struct xdp_sock, mutex), without proper barriers and {READ,
> > WRITE}_ONCE correctness.
> >
> > In this commit this issue is addressed, and the state member is now
> > used a point of synchronization whether the socket is setup correctly
> > or not.
> >
> > This also fixes a race, found by syzcaller, in xsk_poll() where umem
> > could be accessed when stale.
> >
> > Suggested-by: Hillf Danton <hdanton@sina.com>
> > Reported-by: syzbot+c82697e3043781e08802@syzkaller.appspotmail.com
> > Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings")
> > Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
>
> Sorry for the delay.
>
> > ---
> >   net/xdp/xsk.c | 57 ++++++++++++++++++++++++++++++++++++---------------
> >   1 file changed, 40 insertions(+), 17 deletions(-)
> >
> > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> > index f3351013c2a5..8fafa3ce3ae6 100644
> > --- a/net/xdp/xsk.c
> > +++ b/net/xdp/xsk.c
> > @@ -162,10 +162,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
> >       return err;
> >   }
> >
> > +static bool xsk_is_bound(struct xdp_sock *xs)
> > +{
> > +     if (READ_ONCE(xs->state) == XSK_BOUND) {
> > +             /* Matches smp_wmb() in bind(). */
> > +             smp_rmb();
> > +             return true;
> > +     }
> > +     return false;
> > +}
> > +
> >   int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
> >   {
> >       u32 len;
> >
> > +     if (!xsk_is_bound(xs))
> > +             return -EINVAL;
> > +
> >       if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
> >               return -EINVAL;
> >
> > @@ -362,7 +375,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> >       struct sock *sk = sock->sk;
> >       struct xdp_sock *xs = xdp_sk(sk);
> >
> > -     if (unlikely(!xs->dev))
> > +     if (unlikely(!xsk_is_bound(xs)))
> >               return -ENXIO;
> >       if (unlikely(!(xs->dev->flags & IFF_UP)))
> >               return -ENETDOWN;
> > @@ -378,10 +391,15 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
> >                            struct poll_table_struct *wait)
> >   {
> >       unsigned int mask = datagram_poll(file, sock, wait);
> > -     struct sock *sk = sock->sk;
> > -     struct xdp_sock *xs = xdp_sk(sk);
> > -     struct net_device *dev = xs->dev;
> > -     struct xdp_umem *umem = xs->umem;
> > +     struct xdp_sock *xs = xdp_sk(sock->sk);
> > +     struct net_device *dev;
> > +     struct xdp_umem *umem;
> > +
> > +     if (unlikely(!xsk_is_bound(xs)))
> > +             return mask;
> > +
> > +     dev = xs->dev;
> > +     umem = xs->umem;
> >
> >       if (umem->need_wakeup)
> >               dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
> > @@ -417,10 +435,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
> >   {
> >       struct net_device *dev = xs->dev;
> >
> > -     if (!dev || xs->state != XSK_BOUND)
> > +     if (xs->state != XSK_BOUND)
> >               return;
> > -
> > -     xs->state = XSK_UNBOUND;
> > +     WRITE_ONCE(xs->state, XSK_UNBOUND);
> >
> >       /* Wait for driver to stop using the xdp socket. */
> >       xdp_del_sk_umem(xs->umem, xs);
> > @@ -495,7 +512,9 @@ static int xsk_release(struct socket *sock)
> >       local_bh_enable();
> >
> >       xsk_delete_from_maps(xs);
> > +     mutex_lock(&xs->mutex);
> >       xsk_unbind_dev(xs);
> > +     mutex_unlock(&xs->mutex);
> >
> >       xskq_destroy(xs->rx);
> >       xskq_destroy(xs->tx);
> > @@ -589,19 +608,18 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> >               }
> >
> >               umem_xs = xdp_sk(sock->sk);
> > -             if (!umem_xs->umem) {
> > -                     /* No umem to inherit. */
> > +             if (!xsk_is_bound(umem_xs)) {
> >                       err = -EBADF;
> >                       sockfd_put(sock);
> >                       goto out_unlock;
> > -             } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
> > +             }
> > +             if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
> >                       err = -EINVAL;
> >                       sockfd_put(sock);
> >                       goto out_unlock;
> >               }
> > -
> >               xdp_get_umem(umem_xs->umem);
> > -             xs->umem = umem_xs->umem;
> > +             WRITE_ONCE(xs->umem, umem_xs->umem);
> >               sockfd_put(sock);
> >       } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
> >               err = -EINVAL;
> > @@ -626,10 +644,15 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> >       xdp_add_sk_umem(xs->umem, xs);
> >
> >   out_unlock:
> > -     if (err)
> > +     if (err) {
> >               dev_put(dev);
> > -     else
> > -             xs->state = XSK_BOUND;
> > +     } else {
> > +             /* Matches smp_rmb() in bind() for shared umem
> > +              * sockets, and xsk_is_bound().
> > +              */
> > +             smp_wmb();
>
> You write with what this barrier matches/pairs, but would be useful for readers
> to have an explanation against what it protects. I presume to have things like
> xs->umem public as you seem to guard it behind xsk_is_bound() in xsk_poll() and
> other cases? Would be great to have a detailed analysis of all this e.g. in the
> commit message so one wouldn't need to guess; right now it feels this is doing
> many things at once and w/o further explanation of why READ_ONCE() or others are
> omitted sometimes. Would be great to get a lot more clarity into this, perhaps
> splitting it up a bit might also help.
>

I'll address that. Thanks for the review!


Björn

> > +             WRITE_ONCE(xs->state, XSK_BOUND);
> > +     }
>
> Thanks,
> Daniel
diff mbox series

Patch

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index f3351013c2a5..8fafa3ce3ae6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -162,10 +162,23 @@  static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 	return err;
 }
 
+static bool xsk_is_bound(struct xdp_sock *xs)
+{
+	if (READ_ONCE(xs->state) == XSK_BOUND) {
+		/* Matches smp_wmb() in bind(). */
+		smp_rmb();
+		return true;
+	}
+	return false;
+}
+
 int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	u32 len;
 
+	if (!xsk_is_bound(xs))
+		return -EINVAL;
+
 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
 		return -EINVAL;
 
@@ -362,7 +375,7 @@  static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 	struct sock *sk = sock->sk;
 	struct xdp_sock *xs = xdp_sk(sk);
 
-	if (unlikely(!xs->dev))
+	if (unlikely(!xsk_is_bound(xs)))
 		return -ENXIO;
 	if (unlikely(!(xs->dev->flags & IFF_UP)))
 		return -ENETDOWN;
@@ -378,10 +391,15 @@  static unsigned int xsk_poll(struct file *file, struct socket *sock,
 			     struct poll_table_struct *wait)
 {
 	unsigned int mask = datagram_poll(file, sock, wait);
-	struct sock *sk = sock->sk;
-	struct xdp_sock *xs = xdp_sk(sk);
-	struct net_device *dev = xs->dev;
-	struct xdp_umem *umem = xs->umem;
+	struct xdp_sock *xs = xdp_sk(sock->sk);
+	struct net_device *dev;
+	struct xdp_umem *umem;
+
+	if (unlikely(!xsk_is_bound(xs)))
+		return mask;
+
+	dev = xs->dev;
+	umem = xs->umem;
 
 	if (umem->need_wakeup)
 		dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
@@ -417,10 +435,9 @@  static void xsk_unbind_dev(struct xdp_sock *xs)
 {
 	struct net_device *dev = xs->dev;
 
-	if (!dev || xs->state != XSK_BOUND)
+	if (xs->state != XSK_BOUND)
 		return;
-
-	xs->state = XSK_UNBOUND;
+	WRITE_ONCE(xs->state, XSK_UNBOUND);
 
 	/* Wait for driver to stop using the xdp socket. */
 	xdp_del_sk_umem(xs->umem, xs);
@@ -495,7 +512,9 @@  static int xsk_release(struct socket *sock)
 	local_bh_enable();
 
 	xsk_delete_from_maps(xs);
+	mutex_lock(&xs->mutex);
 	xsk_unbind_dev(xs);
+	mutex_unlock(&xs->mutex);
 
 	xskq_destroy(xs->rx);
 	xskq_destroy(xs->tx);
@@ -589,19 +608,18 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		}
 
 		umem_xs = xdp_sk(sock->sk);
-		if (!umem_xs->umem) {
-			/* No umem to inherit. */
+		if (!xsk_is_bound(umem_xs)) {
 			err = -EBADF;
 			sockfd_put(sock);
 			goto out_unlock;
-		} else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
+		}
+		if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
 			err = -EINVAL;
 			sockfd_put(sock);
 			goto out_unlock;
 		}
-
 		xdp_get_umem(umem_xs->umem);
-		xs->umem = umem_xs->umem;
+		WRITE_ONCE(xs->umem, umem_xs->umem);
 		sockfd_put(sock);
 	} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
 		err = -EINVAL;
@@ -626,10 +644,15 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	xdp_add_sk_umem(xs->umem, xs);
 
 out_unlock:
-	if (err)
+	if (err) {
 		dev_put(dev);
-	else
-		xs->state = XSK_BOUND;
+	} else {
+		/* Matches smp_rmb() in bind() for shared umem
+		 * sockets, and xsk_is_bound().
+		 */
+		smp_wmb();
+		WRITE_ONCE(xs->state, XSK_BOUND);
+	}
 out_release:
 	mutex_unlock(&xs->mutex);
 	rtnl_unlock();
@@ -869,7 +892,7 @@  static int xsk_mmap(struct file *file, struct socket *sock,
 	unsigned long pfn;
 	struct page *qpg;
 
-	if (xs->state != XSK_READY)
+	if (READ_ONCE(xs->state) != XSK_READY)
 		return -EBUSY;
 
 	if (offset == XDP_PGOFF_RX_RING) {