diff mbox

[Bugme-new,Bug,12515] New: possible circular locking #0: (sk_lock-AF_PACKET){--..}, at: [<c1279838>] sock_setsockopt+0x12b/0x4a4

Message ID 20090130124947.GA6886@gondor.apana.org.au
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Herbert Xu Jan. 30, 2009, 12:49 p.m. UTC
On Fri, Jan 30, 2009 at 05:12:50PM +1100, Herbert Xu wrote:
> 
> Well, doing the copy under sk_lock is pretty common through all
> protocols.  So I think it'd be safer to change the other path,
> which is doing the odd thing here, i.e., ->mmap() grabbing the
> socket lock while holding mmap_sem.
> 
> In fact, it would appear that we don't really need the socket lock
> in ->mmap() since it only needs to ensure that pg_vec* doesn't
> get yanked or changed.  So this patch should work:
> 
> packet: Avoid lock_sock in mmap handler

Dave pointed out that a spin lock is illegal for this purpose
as vm_insert_page can do a GFP_KERNEL allocation.  So I've added
a mutex for this.

I've also widened the critical section in packet_set_ring since
we need the mapped check to be within it.

packet: Avoid lock_sock in mmap handler

As the mmap handler gets called under mmap_sem, and we may grab
mmap_sem elsewhere under the socket lock to access user data, we
should avoid grabbing the socket lock in the mmap handler.

Since the only thing we care about in the mmap handler is for
pg_vec* to be invariant, i.e., to exclude packet_set_ring, we
can achieve this by simply using sk_receive_queue.lock.

I resisted the temptation to create a new spin lock because the
mmap path isn't exactly common.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>


Cheers,

Comments

Jarek Poplawski Jan. 30, 2009, 1:56 p.m. UTC | #1
On Fri, Jan 30, 2009 at 11:49:47PM +1100, Herbert Xu wrote:
...
> Dave pointed out that a spin lock is illegal for this purpose
> as vm_insert_page can do a GFP_KERNEL allocation.  So I've added
> a mutex for this.
...
> I resisted the temptation to create a new spin lock because the
> mmap path isn't exactly common.

So, Dave is stronger than the temptation...

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Martin MOKREJŠ Jan. 30, 2009, 5:17 p.m. UTC | #2
This patch works for me.
Thanks
M.

Herbert Xu wrote:
> On Fri, Jan 30, 2009 at 05:12:50PM +1100, Herbert Xu wrote:
>> Well, doing the copy under sk_lock is pretty common through all
>> protocols.  So I think it'd be safer to change the other path,
>> which is doing the odd thing here, i.e., ->mmap() grabbing the
>> socket lock while holding mmap_sem.
>>
>> In fact, it would appear that we don't really need the socket lock
>> in ->mmap() since it only needs to ensure that pg_vec* doesn't
>> get yanked or changed.  So this patch should work:
>>
>> packet: Avoid lock_sock in mmap handler
> 
> Dave pointed out that a spin lock is illegal for this purpose
> as vm_insert_page can do a GFP_KERNEL allocation.  So I've added
> a mutex for this.
> 
> I've also widened the critical section in packet_set_ring since
> we need the mapped check to be within it.
> 
> packet: Avoid lock_sock in mmap handler
> 
> As the mmap handler gets called under mmap_sem, and we may grab
> mmap_sem elsewhere under the socket lock to access user data, we
> should avoid grabbing the socket lock in the mmap handler.
> 
> Since the only thing we care about in the mmap handler is for
> pg_vec* to be invariant, i.e., to exclude packet_set_ring, we
> can achieve this by simply using sk_receive_queue.lock.
> 
> I resisted the temptation to create a new spin lock because the
> mmap path isn't exactly common.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
> 
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 5f94db2..9454d4a 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -77,6 +77,7 @@
>  #include <linux/poll.h>
>  #include <linux/module.h>
>  #include <linux/init.h>
> +#include <linux/mutex.h>
>  
>  #ifdef CONFIG_INET
>  #include <net/inet_common.h>
> @@ -175,6 +176,7 @@ struct packet_sock {
>  #endif
>  	struct packet_type	prot_hook;
>  	spinlock_t		bind_lock;
> +	struct mutex		pg_vec_lock;
>  	unsigned int		running:1,	/* prot_hook is attached*/
>  				auxdata:1,
>  				origdev:1;
> @@ -1069,6 +1071,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
>  	 */
>  
>  	spin_lock_init(&po->bind_lock);
> +	mutex_init(&po->pg_vec_lock);
>  	po->prot_hook.func = packet_rcv;
>  
>  	if (sock->type == SOCK_PACKET)
> @@ -1865,6 +1868,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
>  	synchronize_net();
>  
>  	err = -EBUSY;
> +	mutex_lock(&po->pg_vec_lock);
>  	if (closing || atomic_read(&po->mapped) == 0) {
>  		err = 0;
>  #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
> @@ -1886,6 +1890,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
>  		if (atomic_read(&po->mapped))
>  			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
>  	}
> +	mutex_unlock(&po->pg_vec_lock);
>  
>  	spin_lock(&po->bind_lock);
>  	if (was_running && !po->running) {
> @@ -1918,7 +1923,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
>  
>  	size = vma->vm_end - vma->vm_start;
>  
> -	lock_sock(sk);
> +	mutex_lock(&po->pg_vec_lock);
>  	if (po->pg_vec == NULL)
>  		goto out;
>  	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
> @@ -1941,7 +1946,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
>  	err = 0;
>  
>  out:
> -	release_sock(sk);
> +	mutex_unlock(&po->pg_vec_lock);
>  	return err;
>  }
>  #endif
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Herbert Xu Jan. 30, 2009, 10:01 p.m. UTC | #3
On Fri, Jan 30, 2009 at 01:56:17PM +0000, Jarek Poplawski wrote:
>
> > I resisted the temptation to create a new spin lock because the
> > mmap path isn't exactly common.
> 
> So, Dave is stronger than the temptation...

Heh, that sentence should be removed from the description.

Cheers,
David Miller Jan. 30, 2009, 10:05 p.m. UTC | #4
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 31 Jan 2009 09:01:28 +1100

> On Fri, Jan 30, 2009 at 01:56:17PM +0000, Jarek Poplawski wrote:
> >
> > > I resisted the temptation to create a new spin lock because the
> > > mmap path isn't exactly common.
> > 
> > So, Dave is stronger than the temptation...
> 
> Heh, that sentence should be removed from the description.

I might leave it in there for laughs :-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Jan. 30, 2009, 10:13 p.m. UTC | #5
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 30 Jan 2009 23:49:47 +1100

> packet: Avoid lock_sock in mmap handler
> 
> As the mmap handler gets called under mmap_sem, and we may grab
> mmap_sem elsewhere under the socket lock to access user data, we
> should avoid grabbing the socket lock in the mmap handler.
> 
> Since the only thing we care about in the mmap handler is for
> pg_vec* to be invariant, i.e., to exclude packet_set_ring, we
> can achieve this by simply using sk_receive_queue.lock.
> 
> I resisted the temptation to create a new spin lock because the
> mmap path isn't exactly common.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Looks good, applied.

I rewrote the commit message as follows so that it actually
matches what happens in the fix ;-)

--------------------
packet: Avoid lock_sock in mmap handler

As the mmap handler gets called under mmap_sem, and we may grab
mmap_sem elsewhere under the socket lock to access user data, we
should avoid grabbing the socket lock in the mmap handler.

Since the only thing we care about in the mmap handler is for
pg_vec* to be invariant, i.e., to exclude packet_set_ring, we
can achieve this by simply using a new mutex.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5f94db2..9454d4a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -77,6 +77,7 @@ 
 #include <linux/poll.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -175,6 +176,7 @@  struct packet_sock {
 #endif
 	struct packet_type	prot_hook;
 	spinlock_t		bind_lock;
+	struct mutex		pg_vec_lock;
 	unsigned int		running:1,	/* prot_hook is attached*/
 				auxdata:1,
 				origdev:1;
@@ -1069,6 +1071,7 @@  static int packet_create(struct net *net, struct socket *sock, int protocol)
 	 */
 
 	spin_lock_init(&po->bind_lock);
+	mutex_init(&po->pg_vec_lock);
 	po->prot_hook.func = packet_rcv;
 
 	if (sock->type == SOCK_PACKET)
@@ -1865,6 +1868,7 @@  static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 	synchronize_net();
 
 	err = -EBUSY;
+	mutex_lock(&po->pg_vec_lock);
 	if (closing || atomic_read(&po->mapped) == 0) {
 		err = 0;
 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
@@ -1886,6 +1890,7 @@  static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 		if (atomic_read(&po->mapped))
 			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
 	}
+	mutex_unlock(&po->pg_vec_lock);
 
 	spin_lock(&po->bind_lock);
 	if (was_running && !po->running) {
@@ -1918,7 +1923,7 @@  static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
 
 	size = vma->vm_end - vma->vm_start;
 
-	lock_sock(sk);
+	mutex_lock(&po->pg_vec_lock);
 	if (po->pg_vec == NULL)
 		goto out;
 	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
@@ -1941,7 +1946,7 @@  static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
 	err = 0;
 
 out:
-	release_sock(sk);
+	mutex_unlock(&po->pg_vec_lock);
 	return err;
 }
 #endif