Message ID | 201002111655.40349.arnd@arndb.de |
---|---|
State | Accepted, archived |
Delegated to: | David Miller |
Headers | show |
On Thu, 2010-02-11 at 16:55 +0100, Arnd Bergmann wrote: > The RCU usage in the original code was broken because > there are cases where we possibly sleep with rcu_read_lock > held. As a fix, change the macvtap_file_get_queue to > get a reference on the socket and the netdev instead of > taking the full rcu_read_lock. > > Also, change macvtap_file_get_queue failure case to > not require a subsequent macvtap_file_put_queue, as > pointed out by Ed Swierk. Looks good. Acked-by: Sridhar Samudrala <sri@us.ibm.com> > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > Cc: Ed Swierk <eswierk@aristanetworks.com> > Cc: Sridhar Samudrala <sri@us.ibm.com> > --- > drivers/net/macvtap.c | 35 ++++++++++++++++++++++------------- > 1 files changed, 22 insertions(+), 13 deletions(-) > > Please disregard v1 of this patch, I accidentally sent Sridhar's > patch... > > diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c > index ad1f6ef..fe7656b 100644 > --- a/drivers/net/macvtap.c > +++ b/drivers/net/macvtap.c > @@ -70,7 +70,8 @@ static struct cdev macvtap_cdev; > * exists. > * > * The callbacks from macvlan are always done with rcu_read_lock held > - * already, while in the file_operations, we get it ourselves. > + * already. For calls from file_operations, we use the rcu_read_lock_bh > + * to get a reference count on the socket and the device. > * > * When destroying a queue, we remove the pointers from the file and > * from the dev and then synchronize_rcu to make sure no thread is > @@ -159,13 +160,21 @@ static void macvtap_del_queues(struct net_device *dev) > > static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file) > { > + struct macvtap_queue *q; > rcu_read_lock_bh(); > - return rcu_dereference(file->private_data); > + q = rcu_dereference(file->private_data); > + if (q) { > + sock_hold(&q->sk); > + dev_hold(q->vlan->dev); > + } > + rcu_read_unlock_bh(); > + return q; > } > > -static inline void macvtap_file_put_queue(void) > +static inline void macvtap_file_put_queue(struct macvtap_queue *q) > { > - rcu_read_unlock_bh(); > + sock_put(&q->sk); > + dev_put(q->vlan->dev); > } > > /* > @@ -314,8 +323,8 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) > sock_writeable(&q->sk))) > mask |= POLLOUT | POLLWRNORM; > > + macvtap_file_put_queue(q); > out: > - macvtap_file_put_queue(); > return mask; > } > > @@ -366,8 +375,8 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, > > result = macvtap_get_user(q, iv, iov_length(iv, count), > file->f_flags & O_NONBLOCK); > + macvtap_file_put_queue(q); > out: > - macvtap_file_put_queue(); > return result; > } > > @@ -398,10 +407,8 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, > struct sk_buff *skb; > ssize_t len, ret = 0; > > - if (!q) { > - ret = -ENOLINK; > - goto out; > - } > + if (!q) > + return -ENOLINK; > > len = iov_length(iv, count); > if (len < 0) { > @@ -437,7 +444,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, > remove_wait_queue(q->sk.sk_sleep, &wait); > > out: > - macvtap_file_put_queue(); > + macvtap_file_put_queue(q); > return ret; > } > > @@ -468,7 +475,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, > if (!q) > return -ENOLINK; > memcpy(devname, q->vlan->dev->name, sizeof(devname)); > - macvtap_file_put_queue(); > + macvtap_file_put_queue(q); > > if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || > put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags)) > @@ -485,8 +492,10 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, > return -EFAULT; > > q = macvtap_file_get_queue(file); > + if (!q) > + return -ENOLINK; > q->sk.sk_sndbuf = u; > - macvtap_file_put_queue(); > + macvtap_file_put_queue(q); > return 0; > > case TUNSETOFFLOAD: -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Feb 11, 2010 at 7:55 AM, Arnd Bergmann <arnd@arndb.de> wrote: > The RCU usage in the original code was broken because > there are cases where we possibly sleep with rcu_read_lock > held. As a fix, change the macvtap_file_get_queue to > get a reference on the socket and the netdev instead of > taking the full rcu_read_lock. > > Also, change macvtap_file_get_queue failure case to > not require a subsequent macvtap_file_put_queue, as > pointed out by Ed Swierk. Works for me. Thanks. Acked-by: Ed Swierk <eswierk@aristanetworks.com> -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Sridhar Samudrala <sri@us.ibm.com> Date: Thu, 11 Feb 2010 13:09:37 -0800 > On Thu, 2010-02-11 at 16:55 +0100, Arnd Bergmann wrote: >> The RCU usage in the original code was broken because >> there are cases where we possibly sleep with rcu_read_lock >> held. As a fix, change the macvtap_file_get_queue to >> get a reference on the socket and the netdev instead of >> taking the full rcu_read_lock. >> >> Also, change macvtap_file_get_queue failure case to >> not require a subsequent macvtap_file_put_queue, as >> pointed out by Ed Swierk. > > Looks good. > > Acked-by: Sridhar Samudrala <sri@us.ibm.com> Applied. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tuesday 16 February 2010, David Miller wrote: > From: Sridhar Samudrala <sri@us.ibm.com> > > Acked-by: Sridhar Samudrala <sri@us.ibm.com> > > Applied. Thanks for applying this one, but unfortunately I had reworked the patch in a different way in the meantime and Sridhar has added another patch conflicting with my rework. I think I've got it all sorted out now, so I'll send a new series with the three patches I was intending to get merged, but rebased on this one and mostly reverting it. Sridhar, please have a look at this series and send an Ack if you're fine with it. Arnd -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index ad1f6ef..fe7656b 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -70,7 +70,8 @@ static struct cdev macvtap_cdev; * exists. * * The callbacks from macvlan are always done with rcu_read_lock held - * already, while in the file_operations, we get it ourselves. + * already. For calls from file_operations, we use the rcu_read_lock_bh + * to get a reference count on the socket and the device. * * When destroying a queue, we remove the pointers from the file and * from the dev and then synchronize_rcu to make sure no thread is @@ -159,13 +160,21 @@ static void macvtap_del_queues(struct net_device *dev) static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file) { + struct macvtap_queue *q; rcu_read_lock_bh(); - return rcu_dereference(file->private_data); + q = rcu_dereference(file->private_data); + if (q) { + sock_hold(&q->sk); + dev_hold(q->vlan->dev); + } + rcu_read_unlock_bh(); + return q; } -static inline void macvtap_file_put_queue(void) +static inline void macvtap_file_put_queue(struct macvtap_queue *q) { - rcu_read_unlock_bh(); + sock_put(&q->sk); + dev_put(q->vlan->dev); } /* @@ -314,8 +323,8 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) sock_writeable(&q->sk))) mask |= POLLOUT | POLLWRNORM; + macvtap_file_put_queue(q); out: - macvtap_file_put_queue(); return mask; } @@ -366,8 +375,8 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, result = macvtap_get_user(q, iv, iov_length(iv, count), file->f_flags & O_NONBLOCK); + macvtap_file_put_queue(q); out: - macvtap_file_put_queue(); return result; } @@ -398,10 +407,8 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, struct sk_buff *skb; ssize_t len, ret = 0; - if (!q) { - ret = -ENOLINK; - goto out; - } + if (!q) + return -ENOLINK; len = iov_length(iv, count); if (len < 0) { @@ -437,7 +444,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, remove_wait_queue(q->sk.sk_sleep, &wait); out: - macvtap_file_put_queue(); + macvtap_file_put_queue(q); return ret; } @@ -468,7 +475,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, if (!q) return -ENOLINK; memcpy(devname, q->vlan->dev->name, sizeof(devname)); - macvtap_file_put_queue(); + macvtap_file_put_queue(q); if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags)) @@ -485,8 +492,10 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return -EFAULT; q = macvtap_file_get_queue(file); + if (!q) + return -ENOLINK; q->sk.sk_sndbuf = u; - macvtap_file_put_queue(); + macvtap_file_put_queue(q); return 0; case TUNSETOFFLOAD:
The RCU usage in the original code was broken because there are cases where we possibly sleep with rcu_read_lock held. As a fix, change the macvtap_file_get_queue to get a reference on the socket and the netdev instead of taking the full rcu_read_lock. Also, change macvtap_file_get_queue failure case to not require a subsequent macvtap_file_put_queue, as pointed out by Ed Swierk. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Cc: Ed Swierk <eswierk@aristanetworks.com> Cc: Sridhar Samudrala <sri@us.ibm.com> --- drivers/net/macvtap.c | 35 ++++++++++++++++++++++------------- 1 files changed, 22 insertions(+), 13 deletions(-) Please disregard v1 of this patch, I accidentally sent Sridhar's patch...