diff mbox

[v2] net/macvtap: fix reference counting

Message ID 201002111655.40349.arnd@arndb.de
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Arnd Bergmann Feb. 11, 2010, 3:55 p.m. UTC
The RCU usage in the original code was broken because
there are cases where we possibly sleep with rcu_read_lock
held. As a fix, change the macvtap_file_get_queue to
get a reference on the socket and the netdev instead of
taking the full rcu_read_lock.

Also, change macvtap_file_get_queue failure case to
not require a subsequent macvtap_file_put_queue, as
pointed out by Ed Swierk.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ed Swierk <eswierk@aristanetworks.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
---
 drivers/net/macvtap.c |   35 ++++++++++++++++++++++-------------
 1 files changed, 22 insertions(+), 13 deletions(-)

Please disregard v1 of this patch, I accidentally sent Sridhar's
patch...

Comments

Sridhar Samudrala Feb. 11, 2010, 9:09 p.m. UTC | #1
On Thu, 2010-02-11 at 16:55 +0100, Arnd Bergmann wrote:
> The RCU usage in the original code was broken because
> there are cases where we possibly sleep with rcu_read_lock
> held. As a fix, change the macvtap_file_get_queue to
> get a reference on the socket and the netdev instead of
> taking the full rcu_read_lock.
> 
> Also, change macvtap_file_get_queue failure case to
> not require a subsequent macvtap_file_put_queue, as
> pointed out by Ed Swierk.

Looks good.

Acked-by: Sridhar Samudrala <sri@us.ibm.com>

> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> Cc: Ed Swierk <eswierk@aristanetworks.com>
> Cc: Sridhar Samudrala <sri@us.ibm.com>
> ---
>  drivers/net/macvtap.c |   35 ++++++++++++++++++++++-------------
>  1 files changed, 22 insertions(+), 13 deletions(-)
> 
> Please disregard v1 of this patch, I accidentally sent Sridhar's
> patch...
> 
> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> index ad1f6ef..fe7656b 100644
> --- a/drivers/net/macvtap.c
> +++ b/drivers/net/macvtap.c
> @@ -70,7 +70,8 @@ static struct cdev macvtap_cdev;
>   * exists.
>   *
>   * The callbacks from macvlan are always done with rcu_read_lock held
> - * already, while in the file_operations, we get it ourselves.
> + * already. For calls from file_operations, we use the rcu_read_lock_bh
> + * to get a reference count on the socket and the device.
>   *
>   * When destroying a queue, we remove the pointers from the file and
>   * from the dev and then synchronize_rcu to make sure no thread is
> @@ -159,13 +160,21 @@ static void macvtap_del_queues(struct net_device *dev)
> 
>  static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file)
>  {
> +	struct macvtap_queue *q;
>  	rcu_read_lock_bh();
> -	return rcu_dereference(file->private_data);
> +	q = rcu_dereference(file->private_data);
> +	if (q) {
> +		sock_hold(&q->sk);
> +		dev_hold(q->vlan->dev);
> +	}
> +	rcu_read_unlock_bh();
> +	return q;
>  }
> 
> -static inline void macvtap_file_put_queue(void)
> +static inline void macvtap_file_put_queue(struct macvtap_queue *q)
>  {
> -	rcu_read_unlock_bh();
> +	sock_put(&q->sk);
> +	dev_put(q->vlan->dev);
>  }
> 
>  /*
> @@ -314,8 +323,8 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
>  	     sock_writeable(&q->sk)))
>  		mask |= POLLOUT | POLLWRNORM;
> 
> +	macvtap_file_put_queue(q);
>  out:
> -	macvtap_file_put_queue();
>  	return mask;
>  }
> 
> @@ -366,8 +375,8 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
> 
>  	result = macvtap_get_user(q, iv, iov_length(iv, count),
>  			      file->f_flags & O_NONBLOCK);
> +	macvtap_file_put_queue(q);
>  out:
> -	macvtap_file_put_queue();
>  	return result;
>  }
> 
> @@ -398,10 +407,8 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
>  	struct sk_buff *skb;
>  	ssize_t len, ret = 0;
> 
> -	if (!q) {
> -		ret = -ENOLINK;
> -		goto out;
> -	}
> +	if (!q)
> +		return -ENOLINK;
> 
>  	len = iov_length(iv, count);
>  	if (len < 0) {
> @@ -437,7 +444,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
>  	remove_wait_queue(q->sk.sk_sleep, &wait);
> 
>  out:
> -	macvtap_file_put_queue();
> +	macvtap_file_put_queue(q);
>  	return ret;
>  }
> 
> @@ -468,7 +475,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
>  		if (!q)
>  			return -ENOLINK;
>  		memcpy(devname, q->vlan->dev->name, sizeof(devname));
> -		macvtap_file_put_queue();
> +		macvtap_file_put_queue(q);
> 
>  		if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) ||
>  		    put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags))
> @@ -485,8 +492,10 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
>  			return -EFAULT;
> 
>  		q = macvtap_file_get_queue(file);
> +		if (!q)
> +			return -ENOLINK;
>  		q->sk.sk_sndbuf = u;
> -		macvtap_file_put_queue();
> +		macvtap_file_put_queue(q);
>  		return 0;
> 
>  	case TUNSETOFFLOAD:

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ed Swierk Feb. 12, 2010, 8:58 p.m. UTC | #2
On Thu, Feb 11, 2010 at 7:55 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> The RCU usage in the original code was broken because
> there are cases where we possibly sleep with rcu_read_lock
> held. As a fix, change the macvtap_file_get_queue to
> get a reference on the socket and the netdev instead of
> taking the full rcu_read_lock.
>
> Also, change macvtap_file_get_queue failure case to
> not require a subsequent macvtap_file_put_queue, as
> pointed out by Ed Swierk.

Works for me. Thanks.

Acked-by: Ed Swierk <eswierk@aristanetworks.com>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Feb. 16, 2010, 5:53 a.m. UTC | #3
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 11 Feb 2010 13:09:37 -0800

> On Thu, 2010-02-11 at 16:55 +0100, Arnd Bergmann wrote:
>> The RCU usage in the original code was broken because
>> there are cases where we possibly sleep with rcu_read_lock
>> held. As a fix, change the macvtap_file_get_queue to
>> get a reference on the socket and the netdev instead of
>> taking the full rcu_read_lock.
>> 
>> Also, change macvtap_file_get_queue failure case to
>> not require a subsequent macvtap_file_put_queue, as
>> pointed out by Ed Swierk.
> 
> Looks good.
> 
> Acked-by: Sridhar Samudrala <sri@us.ibm.com>

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Feb. 18, 2010, 3:44 p.m. UTC | #4
On Tuesday 16 February 2010, David Miller wrote:
> From: Sridhar Samudrala <sri@us.ibm.com>
> > Acked-by: Sridhar Samudrala <sri@us.ibm.com>
> 
> Applied.

Thanks for applying this one, but unfortunately I had reworked
the patch in a different way in the meantime and Sridhar has
added another patch conflicting with my rework. I think I've
got it all sorted out now, so I'll send a new series with
the three patches I was intending to get merged, but rebased
on this one and mostly reverting it.

Sridhar, please have a look at this series and send an Ack
if you're fine with it.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index ad1f6ef..fe7656b 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -70,7 +70,8 @@  static struct cdev macvtap_cdev;
  * exists.
  *
  * The callbacks from macvlan are always done with rcu_read_lock held
- * already, while in the file_operations, we get it ourselves.
+ * already. For calls from file_operations, we use the rcu_read_lock_bh
+ * to get a reference count on the socket and the device.
  *
  * When destroying a queue, we remove the pointers from the file and
  * from the dev and then synchronize_rcu to make sure no thread is
@@ -159,13 +160,21 @@  static void macvtap_del_queues(struct net_device *dev)
 
 static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file)
 {
+	struct macvtap_queue *q;
 	rcu_read_lock_bh();
-	return rcu_dereference(file->private_data);
+	q = rcu_dereference(file->private_data);
+	if (q) {
+		sock_hold(&q->sk);
+		dev_hold(q->vlan->dev);
+	}
+	rcu_read_unlock_bh();
+	return q;
 }
 
-static inline void macvtap_file_put_queue(void)
+static inline void macvtap_file_put_queue(struct macvtap_queue *q)
 {
-	rcu_read_unlock_bh();
+	sock_put(&q->sk);
+	dev_put(q->vlan->dev);
 }
 
 /*
@@ -314,8 +323,8 @@  static unsigned int macvtap_poll(struct file *file, poll_table * wait)
 	     sock_writeable(&q->sk)))
 		mask |= POLLOUT | POLLWRNORM;
 
+	macvtap_file_put_queue(q);
 out:
-	macvtap_file_put_queue();
 	return mask;
 }
 
@@ -366,8 +375,8 @@  static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
 
 	result = macvtap_get_user(q, iv, iov_length(iv, count),
 			      file->f_flags & O_NONBLOCK);
+	macvtap_file_put_queue(q);
 out:
-	macvtap_file_put_queue();
 	return result;
 }
 
@@ -398,10 +407,8 @@  static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
 	struct sk_buff *skb;
 	ssize_t len, ret = 0;
 
-	if (!q) {
-		ret = -ENOLINK;
-		goto out;
-	}
+	if (!q)
+		return -ENOLINK;
 
 	len = iov_length(iv, count);
 	if (len < 0) {
@@ -437,7 +444,7 @@  static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
 	remove_wait_queue(q->sk.sk_sleep, &wait);
 
 out:
-	macvtap_file_put_queue();
+	macvtap_file_put_queue(q);
 	return ret;
 }
 
@@ -468,7 +475,7 @@  static long macvtap_ioctl(struct file *file, unsigned int cmd,
 		if (!q)
 			return -ENOLINK;
 		memcpy(devname, q->vlan->dev->name, sizeof(devname));
-		macvtap_file_put_queue();
+		macvtap_file_put_queue(q);
 
 		if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) ||
 		    put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags))
@@ -485,8 +492,10 @@  static long macvtap_ioctl(struct file *file, unsigned int cmd,
 			return -EFAULT;
 
 		q = macvtap_file_get_queue(file);
+		if (!q)
+			return -ENOLINK;
 		q->sk.sk_sndbuf = u;
-		macvtap_file_put_queue();
+		macvtap_file_put_queue(q);
 		return 0;
 
 	case TUNSETOFFLOAD: