diff mbox

use-after-free in sock_wake_async

Message ID 1448418495.22599.320.camel@edumazet-glaptop2.roam.corp.google.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Nov. 25, 2015, 2:28 a.m. UTC
Dmitry, could you test following patch with your setup ?

( I tried to reproduce the error you reported but could not )

Inode can be freed (without RCU grace period), but not the socket or
sk_wq

By using sk_wq in the critical paths, we do not dereference the inode,



Thanks !

 include/linux/net.h |    2 +-
 include/net/sock.h  |    8 ++++++--
 net/core/stream.c   |    2 +-
 net/sctp/socket.c   |    6 +++++-
 net/socket.c        |   16 +++++-----------
 5 files changed, 18 insertions(+), 16 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet Nov. 25, 2015, 5:43 a.m. UTC | #1
On Tue, 2015-11-24 at 18:28 -0800, Eric Dumazet wrote:
> Dmitry, could you test following patch with your setup ?
> 
> ( I tried to reproduce the error you reported but could not )
> 
> Inode can be freed (without RCU grace period), but not the socket or
> sk_wq
> 
> By using sk_wq in the critical paths, we do not dereference the inode,
> 
> 

I finally was able to reproduce the warning (with more instances running
in parallel), and apparently this patch solves the problem.

> 
> Thanks !
> 
>  include/linux/net.h |    2 +-
>  include/net/sock.h  |    8 ++++++--
>  net/core/stream.c   |    2 +-
>  net/sctp/socket.c   |    6 +++++-
>  net/socket.c        |   16 +++++-----------
>  5 files changed, 18 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/net.h b/include/linux/net.h
> index 70ac5e28e6b7..6b93ec234ce8 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -202,7 +202,7 @@ enum {
>  	SOCK_WAKE_URG,
>  };
>  
> -int sock_wake_async(struct socket *sk, int how, int band);
> +int sock_wake_async(struct socket *sock, struct socket_wq *wq, int how, int band);
>  int sock_register(const struct net_proto_family *fam);
>  void sock_unregister(int family);
>  int __sock_create(struct net *net, int family, int type, int proto,
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 7f89e4ba18d1..af78f9e7a218 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2007,8 +2007,12 @@ static inline unsigned long sock_wspace(struct sock *sk)
>  
>  static inline void sk_wake_async(struct sock *sk, int how, int band)
>  {
> -	if (sock_flag(sk, SOCK_FASYNC))
> -		sock_wake_async(sk->sk_socket, how, band);
> +	if (sock_flag(sk, SOCK_FASYNC)) {
> +		rcu_read_lock();
> +		sock_wake_async(sk->sk_socket, rcu_dereference(sk->sk_wq),
> +				how, band);
> +		rcu_read_unlock();
> +	}
>  }
>  
>  /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
> diff --git a/net/core/stream.c b/net/core/stream.c
> index d70f77a0c889..92682228919d 100644
> --- a/net/core/stream.c
> +++ b/net/core/stream.c
> @@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk)
>  			wake_up_interruptible_poll(&wq->wait, POLLOUT |
>  						POLLWRNORM | POLLWRBAND);
>  		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
> -			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
> +			sock_wake_async(sock, wq, SOCK_WAKE_SPACE, POLL_OUT);
>  		rcu_read_unlock();
>  	}
>  }
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 897c01c029ca..6ab04866a1e7 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -6817,9 +6817,13 @@ static void __sctp_write_space(struct sctp_association *asoc)
>  			 * here by modeling from the current TCP/UDP code.
>  			 * We have not tested with it yet.
>  			 */
> -			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
> +			if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
> +				rcu_read_lock();
>  				sock_wake_async(sock,
> +						rcu_dereference(sk->sk_wq),
>  						SOCK_WAKE_SPACE, POLL_OUT);
> +				rcu_read_unlock();
> +			}
>  		}
>  	}
>  }
> diff --git a/net/socket.c b/net/socket.c
> index dd2c247c99e3..8df62c8bef90 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -1058,18 +1058,12 @@ static int sock_fasync(int fd, struct file *filp, int on)
>  
>  /* This function may be called only under socket lock or callback_lock or rcu_lock */
>  
> -int sock_wake_async(struct socket *sock, int how, int band)
> +int sock_wake_async(struct socket *sock, struct socket_wq *wq,
> +		    int how, int band)
>  {
> -	struct socket_wq *wq;
> -
> -	if (!sock)
> -		return -1;
> -	rcu_read_lock();
> -	wq = rcu_dereference(sock->wq);
> -	if (!wq || !wq->fasync_list) {
> -		rcu_read_unlock();
> +	if (!sock || !wq || !wq->fasync_list)
>  		return -1;
> -	}
> +
>  	switch (how) {
>  	case SOCK_WAKE_WAITD:
>  		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
> @@ -1086,7 +1080,7 @@ call_kill:
>  	case SOCK_WAKE_URG:
>  		kill_fasync(&wq->fasync_list, SIGURG, band);
>  	}
> -	rcu_read_unlock();
> +
>  	return 0;
>  }
>  EXPORT_SYMBOL(sock_wake_async);
> 


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Nov. 25, 2015, 2:18 p.m. UTC | #2
On Tue, 2015-11-24 at 21:43 -0800, Eric Dumazet wrote:
> On Tue, 2015-11-24 at 18:28 -0800, Eric Dumazet wrote:
> > Dmitry, could you test following patch with your setup ?
> > 
> > ( I tried to reproduce the error you reported but could not )
> > 
> > Inode can be freed (without RCU grace period), but not the socket or
> > sk_wq
> > 
> > By using sk_wq in the critical paths, we do not dereference the inode,
> > 
> > 
> 
> I finally was able to reproduce the warning (with more instances running
> in parallel), and apparently this patch solves the problem.

OK I got another one :

This means we also need to move SOCK_ASYNC_WAITDATA and
SOCK_ASYNC_NOSPACE from sock->flags into storage protected by RCU (in
struct socket_wq)

Patch will be much bigger, as we have many 

set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags)
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
and similar calls.

[ 7040.574519] ==================================================================
[ 7040.581838] AddressSanitizer: heap-use-after-free in sock_wake_async
[ 7040.588229] Read of size 8 by thread T977831:
[ 7040.592647]  [<ffffffff8effddd1>] sock_wake_async+0xd1/0xf0
[ 7040.598264]  [<ffffffff8f006084>] sock_def_readable+0xa4/0xb0
[ 7040.604071]  [<ffffffff8f13fec6>] unix_stream_sendmsg+0x326/0x730
[ 7040.611561]  [<ffffffff8effee33>] sock_aio_write+0x253/0x280
[ 7040.617412]  [<ffffffff8eb09493>] do_sync_write+0xe3/0x130
[ 7040.622945]  [<ffffffff8eb09ded>] vfs_write+0x2dd/0x300
[ 7040.628267]  [<ffffffff8eb0ad92>] SyS_write+0x72/0xd0
[ 7040.633380]  [<ffffffff8f1db322>] system_call_fastpath+0x16/0x1b
[ 7040.639442] 
[ 7040.640969] Freed by thread T977830:
[ 7040.644587]  [<ffffffff8effe22a>] sock_destroy_inode+0x4a/0x50
[ 7040.650472]  [<ffffffff8eb34584>] destroy_inode+0x64/0xa0
[ 7040.655921]  [<ffffffff8eb3478d>] evict+0x1cd/0x2a0
[ 7040.660850]  [<ffffffff8eb3549e>] iput+0x2ae/0x340
[ 7040.665689]  [<ffffffff8eb2f7c0>] dput.part.20+0x3f0/0x660
[ 7040.671235]  [<ffffffff8eb2fa49>] dput+0x19/0x20
[ 7040.675915]  [<ffffffff8eb0c419>] __fput+0x219/0x350
[ 7040.680940]  [<ffffffff8eb0c59e>] ____fput+0xe/0x10
[ 7040.685881]  [<ffffffff8e92c10e>] task_work_run+0xee/0x100
[ 7040.691424]  [<ffffffff8e8790bd>] do_notify_resume+0x12d/0x140
[ 7040.697314]  [<ffffffff8f1db5bf>] int_signal+0x12/0x17
[ 7040.702507] 
[ 7040.704039] Allocated by thread T977733:
[ 7040.708008]  [<ffffffff8effe25b>] sock_alloc_inode+0x2b/0x140
[ 7040.713796]  [<ffffffff8eb33392>] alloc_inode+0x32/0xf0
[ 7040.719080]  [<ffffffff8eb36431>] new_inode_pseudo+0x11/0x80
[ 7040.724803]  [<ffffffff8effdae7>] sock_alloc+0x37/0x110
[ 7040.730102]  [<ffffffff8efff365>] __sock_create+0x95/0x340
[ 7040.735652]  [<ffffffff8efff6c8>] sock_create+0x88/0xc0
[ 7040.740935]  [<ffffffff8f001701>] SyS_socketpair+0x51/0x2c0
[ 7040.746574]  [<ffffffff8f1db322>] system_call_fastpath+0x16/0x1b
[ 7040.752626] 
[ 7040.754156] The buggy address ffff880987ed2c08 is located 8 bytes inside
[ 7040.754156]  of 624-byte region [ffff880987ed2c00, ffff880987ed2e70)
[ 7040.767219] 
[ 7040.768771] Memory state around the buggy address:
[ 7040.773640]  ffff880987ed2700: ffffffff ffffffff ffffffff ffffffrr
[ 7040.779867]  ffff880987ed2800: rrrrrrrr rrrrrrrr rrrrrrrr ffffffff
[ 7040.786127]  ffff880987ed2900: ffffffff ffffffff ffffffff ffffffff
[ 7040.792380]  ffff880987ed2a00: ffffffff ffffffff ffffffff ffffffff
[ 7040.798631]  ffff880987ed2b00: ffffffrr rrrrrrrr rrrrrrrr rrrrrrrr
[ 7040.804890] >ffff880987ed2c00: ffffffff ffffffff ffffffff ffffffff
[ 7040.811114]                     ^
[ 7040.814519]  ffff880987ed2d00: ffffffff ffffffff ffffffff ffffffff
[ 7040.820786]  ffff880987ed2e00: ffffffff ffffffrr rrrrrrrr rrrrrrrr
[ 7040.827053]  ffff880987ed2f00: rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr
[ 7040.833315]  ffff880987ed3000: rrrrrrrr ffffffff ffffffff ffffffff
[ 7040.839579]  ffff880987ed3100: ffffffff ffffffff ffffffff ffffffff
[ 7040.845803] Legend:
[ 7040.847937]  f - 8 freed bytes
[ 7040.851026]  r - 8 redzone bytes
[ 7040.854298]  . - 8 allocated bytes
[ 7040.857752]  x=1..7 - x allocated bytes + (8-x) redzone bytes
[ 7040.863537] ==================================================================



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/net.h b/include/linux/net.h
index 70ac5e28e6b7..6b93ec234ce8 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -202,7 +202,7 @@  enum {
 	SOCK_WAKE_URG,
 };
 
-int sock_wake_async(struct socket *sk, int how, int band);
+int sock_wake_async(struct socket *sock, struct socket_wq *wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
diff --git a/include/net/sock.h b/include/net/sock.h
index 7f89e4ba18d1..af78f9e7a218 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2007,8 +2007,12 @@  static inline unsigned long sock_wspace(struct sock *sk)
 
 static inline void sk_wake_async(struct sock *sk, int how, int band)
 {
-	if (sock_flag(sk, SOCK_FASYNC))
-		sock_wake_async(sk->sk_socket, how, band);
+	if (sock_flag(sk, SOCK_FASYNC)) {
+		rcu_read_lock();
+		sock_wake_async(sk->sk_socket, rcu_dereference(sk->sk_wq),
+				how, band);
+		rcu_read_unlock();
+	}
 }
 
 /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
diff --git a/net/core/stream.c b/net/core/stream.c
index d70f77a0c889..92682228919d 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -39,7 +39,7 @@  void sk_stream_write_space(struct sock *sk)
 			wake_up_interruptible_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
-			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+			sock_wake_async(sock, wq, SOCK_WAKE_SPACE, POLL_OUT);
 		rcu_read_unlock();
 	}
 }
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 897c01c029ca..6ab04866a1e7 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6817,9 +6817,13 @@  static void __sctp_write_space(struct sctp_association *asoc)
 			 * here by modeling from the current TCP/UDP code.
 			 * We have not tested with it yet.
 			 */
-			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+				rcu_read_lock();
 				sock_wake_async(sock,
+						rcu_dereference(sk->sk_wq),
 						SOCK_WAKE_SPACE, POLL_OUT);
+				rcu_read_unlock();
+			}
 		}
 	}
 }
diff --git a/net/socket.c b/net/socket.c
index dd2c247c99e3..8df62c8bef90 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1058,18 +1058,12 @@  static int sock_fasync(int fd, struct file *filp, int on)
 
 /* This function may be called only under socket lock or callback_lock or rcu_lock */
 
-int sock_wake_async(struct socket *sock, int how, int band)
+int sock_wake_async(struct socket *sock, struct socket_wq *wq,
+		    int how, int band)
 {
-	struct socket_wq *wq;
-
-	if (!sock)
-		return -1;
-	rcu_read_lock();
-	wq = rcu_dereference(sock->wq);
-	if (!wq || !wq->fasync_list) {
-		rcu_read_unlock();
+	if (!sock || !wq || !wq->fasync_list)
 		return -1;
-	}
+
 	switch (how) {
 	case SOCK_WAKE_WAITD:
 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
@@ -1086,7 +1080,7 @@  call_kill:
 	case SOCK_WAKE_URG:
 		kill_fasync(&wq->fasync_list, SIGURG, band);
 	}
-	rcu_read_unlock();
+
 	return 0;
 }
 EXPORT_SYMBOL(sock_wake_async);