Message ID | 1448418495.22599.320.camel@edumazet-glaptop2.roam.corp.google.com |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
On Tue, 2015-11-24 at 18:28 -0800, Eric Dumazet wrote: > Dmitry, could you test following patch with your setup ? > > ( I tried to reproduce the error you reported but could not ) > > Inode can be freed (without RCU grace period), but not the socket or > sk_wq > > By using sk_wq in the critical paths, we do not dereference the inode, > > I finally was able to reproduce the warning (with more instances running in parallel), and apparently this patch solves the problem. > > Thanks ! > > include/linux/net.h | 2 +- > include/net/sock.h | 8 ++++++-- > net/core/stream.c | 2 +- > net/sctp/socket.c | 6 +++++- > net/socket.c | 16 +++++----------- > 5 files changed, 18 insertions(+), 16 deletions(-) > > diff --git a/include/linux/net.h b/include/linux/net.h > index 70ac5e28e6b7..6b93ec234ce8 100644 > --- a/include/linux/net.h > +++ b/include/linux/net.h > @@ -202,7 +202,7 @@ enum { > SOCK_WAKE_URG, > }; > > -int sock_wake_async(struct socket *sk, int how, int band); > +int sock_wake_async(struct socket *sock, struct socket_wq *wq, int how, int band); > int sock_register(const struct net_proto_family *fam); > void sock_unregister(int family); > int __sock_create(struct net *net, int family, int type, int proto, > diff --git a/include/net/sock.h b/include/net/sock.h > index 7f89e4ba18d1..af78f9e7a218 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -2007,8 +2007,12 @@ static inline unsigned long sock_wspace(struct sock *sk) > > static inline void sk_wake_async(struct sock *sk, int how, int band) > { > - if (sock_flag(sk, SOCK_FASYNC)) > - sock_wake_async(sk->sk_socket, how, band); > + if (sock_flag(sk, SOCK_FASYNC)) { > + rcu_read_lock(); > + sock_wake_async(sk->sk_socket, rcu_dereference(sk->sk_wq), > + how, band); > + rcu_read_unlock(); > + } > } > > /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might > diff --git a/net/core/stream.c b/net/core/stream.c > index d70f77a0c889..92682228919d 100644 > --- a/net/core/stream.c > +++ b/net/core/stream.c > @@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk) > wake_up_interruptible_poll(&wq->wait, POLLOUT | > POLLWRNORM | POLLWRBAND); > if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) > - sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); > + sock_wake_async(sock, wq, SOCK_WAKE_SPACE, POLL_OUT); > rcu_read_unlock(); > } > } > diff --git a/net/sctp/socket.c b/net/sctp/socket.c > index 897c01c029ca..6ab04866a1e7 100644 > --- a/net/sctp/socket.c > +++ b/net/sctp/socket.c > @@ -6817,9 +6817,13 @@ static void __sctp_write_space(struct sctp_association *asoc) > * here by modeling from the current TCP/UDP code. > * We have not tested with it yet. > */ > - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) > + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { > + rcu_read_lock(); > sock_wake_async(sock, > + rcu_dereference(sk->sk_wq), > SOCK_WAKE_SPACE, POLL_OUT); > + rcu_read_unlock(); > + } > } > } > } > diff --git a/net/socket.c b/net/socket.c > index dd2c247c99e3..8df62c8bef90 100644 > --- a/net/socket.c > +++ b/net/socket.c > @@ -1058,18 +1058,12 @@ static int sock_fasync(int fd, struct file *filp, int on) > > /* This function may be called only under socket lock or callback_lock or rcu_lock */ > > -int sock_wake_async(struct socket *sock, int how, int band) > +int sock_wake_async(struct socket *sock, struct socket_wq *wq, > + int how, int band) > { > - struct socket_wq *wq; > - > - if (!sock) > - return -1; > - rcu_read_lock(); > - wq = rcu_dereference(sock->wq); > - if (!wq || !wq->fasync_list) { > - rcu_read_unlock(); > + if (!sock || !wq || !wq->fasync_list) > return -1; > - } > + > switch (how) { > case SOCK_WAKE_WAITD: > if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) > @@ -1086,7 +1080,7 @@ call_kill: > case SOCK_WAKE_URG: > kill_fasync(&wq->fasync_list, SIGURG, band); > } > - rcu_read_unlock(); > + > return 0; > } > EXPORT_SYMBOL(sock_wake_async); > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2015-11-24 at 21:43 -0800, Eric Dumazet wrote: > On Tue, 2015-11-24 at 18:28 -0800, Eric Dumazet wrote: > > Dmitry, could you test following patch with your setup ? > > > > ( I tried to reproduce the error you reported but could not ) > > > > Inode can be freed (without RCU grace period), but not the socket or > > sk_wq > > > > By using sk_wq in the critical paths, we do not dereference the inode, > > > > > > I finally was able to reproduce the warning (with more instances running > in parallel), and apparently this patch solves the problem. OK I got another one : This means we also need to move SOCK_ASYNC_WAITDATA and SOCK_ASYNC_NOSPACE from sock->flags into storage protected by RCU (in struct socket_wq) Patch will be much bigger, as we have many set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags) clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); and similar calls. [ 7040.574519] ================================================================== [ 7040.581838] AddressSanitizer: heap-use-after-free in sock_wake_async [ 7040.588229] Read of size 8 by thread T977831: [ 7040.592647] [<ffffffff8effddd1>] sock_wake_async+0xd1/0xf0 [ 7040.598264] [<ffffffff8f006084>] sock_def_readable+0xa4/0xb0 [ 7040.604071] [<ffffffff8f13fec6>] unix_stream_sendmsg+0x326/0x730 [ 7040.611561] [<ffffffff8effee33>] sock_aio_write+0x253/0x280 [ 7040.617412] [<ffffffff8eb09493>] do_sync_write+0xe3/0x130 [ 7040.622945] [<ffffffff8eb09ded>] vfs_write+0x2dd/0x300 [ 7040.628267] [<ffffffff8eb0ad92>] SyS_write+0x72/0xd0 [ 7040.633380] [<ffffffff8f1db322>] system_call_fastpath+0x16/0x1b [ 7040.639442] [ 7040.640969] Freed by thread T977830: [ 7040.644587] [<ffffffff8effe22a>] sock_destroy_inode+0x4a/0x50 [ 7040.650472] [<ffffffff8eb34584>] destroy_inode+0x64/0xa0 [ 7040.655921] [<ffffffff8eb3478d>] evict+0x1cd/0x2a0 [ 7040.660850] [<ffffffff8eb3549e>] iput+0x2ae/0x340 [ 7040.665689] [<ffffffff8eb2f7c0>] dput.part.20+0x3f0/0x660 [ 7040.671235] [<ffffffff8eb2fa49>] dput+0x19/0x20 [ 7040.675915] [<ffffffff8eb0c419>] __fput+0x219/0x350 [ 7040.680940] [<ffffffff8eb0c59e>] ____fput+0xe/0x10 [ 7040.685881] [<ffffffff8e92c10e>] task_work_run+0xee/0x100 [ 7040.691424] [<ffffffff8e8790bd>] do_notify_resume+0x12d/0x140 [ 7040.697314] [<ffffffff8f1db5bf>] int_signal+0x12/0x17 [ 7040.702507] [ 7040.704039] Allocated by thread T977733: [ 7040.708008] [<ffffffff8effe25b>] sock_alloc_inode+0x2b/0x140 [ 7040.713796] [<ffffffff8eb33392>] alloc_inode+0x32/0xf0 [ 7040.719080] [<ffffffff8eb36431>] new_inode_pseudo+0x11/0x80 [ 7040.724803] [<ffffffff8effdae7>] sock_alloc+0x37/0x110 [ 7040.730102] [<ffffffff8efff365>] __sock_create+0x95/0x340 [ 7040.735652] [<ffffffff8efff6c8>] sock_create+0x88/0xc0 [ 7040.740935] [<ffffffff8f001701>] SyS_socketpair+0x51/0x2c0 [ 7040.746574] [<ffffffff8f1db322>] system_call_fastpath+0x16/0x1b [ 7040.752626] [ 7040.754156] The buggy address ffff880987ed2c08 is located 8 bytes inside [ 7040.754156] of 624-byte region [ffff880987ed2c00, ffff880987ed2e70) [ 7040.767219] [ 7040.768771] Memory state around the buggy address: [ 7040.773640] ffff880987ed2700: ffffffff ffffffff ffffffff ffffffrr [ 7040.779867] ffff880987ed2800: rrrrrrrr rrrrrrrr rrrrrrrr ffffffff [ 7040.786127] ffff880987ed2900: ffffffff ffffffff ffffffff ffffffff [ 7040.792380] ffff880987ed2a00: ffffffff ffffffff ffffffff ffffffff [ 7040.798631] ffff880987ed2b00: ffffffrr rrrrrrrr rrrrrrrr rrrrrrrr [ 7040.804890] >ffff880987ed2c00: ffffffff ffffffff ffffffff ffffffff [ 7040.811114] ^ [ 7040.814519] ffff880987ed2d00: ffffffff ffffffff ffffffff ffffffff [ 7040.820786] ffff880987ed2e00: ffffffff ffffffrr rrrrrrrr rrrrrrrr [ 7040.827053] ffff880987ed2f00: rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr [ 7040.833315] ffff880987ed3000: rrrrrrrr ffffffff ffffffff ffffffff [ 7040.839579] ffff880987ed3100: ffffffff ffffffff ffffffff ffffffff [ 7040.845803] Legend: [ 7040.847937] f - 8 freed bytes [ 7040.851026] r - 8 redzone bytes [ 7040.854298] . - 8 allocated bytes [ 7040.857752] x=1..7 - x allocated bytes + (8-x) redzone bytes [ 7040.863537] ================================================================== -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/net.h b/include/linux/net.h index 70ac5e28e6b7..6b93ec234ce8 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -202,7 +202,7 @@ enum { SOCK_WAKE_URG, }; -int sock_wake_async(struct socket *sk, int how, int band); +int sock_wake_async(struct socket *sock, struct socket_wq *wq, int how, int band); int sock_register(const struct net_proto_family *fam); void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, diff --git a/include/net/sock.h b/include/net/sock.h index 7f89e4ba18d1..af78f9e7a218 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2007,8 +2007,12 @@ static inline unsigned long sock_wspace(struct sock *sk) static inline void sk_wake_async(struct sock *sk, int how, int band) { - if (sock_flag(sk, SOCK_FASYNC)) - sock_wake_async(sk->sk_socket, how, band); + if (sock_flag(sk, SOCK_FASYNC)) { + rcu_read_lock(); + sock_wake_async(sk->sk_socket, rcu_dereference(sk->sk_wq), + how, band); + rcu_read_unlock(); + } } /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might diff --git a/net/core/stream.c b/net/core/stream.c index d70f77a0c889..92682228919d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk) wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); + sock_wake_async(sock, wq, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 897c01c029ca..6ab04866a1e7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6817,9 +6817,13 @@ static void __sctp_write_space(struct sctp_association *asoc) * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + rcu_read_lock(); sock_wake_async(sock, + rcu_dereference(sk->sk_wq), SOCK_WAKE_SPACE, POLL_OUT); + rcu_read_unlock(); + } } } } diff --git a/net/socket.c b/net/socket.c index dd2c247c99e3..8df62c8bef90 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1058,18 +1058,12 @@ static int sock_fasync(int fd, struct file *filp, int on) /* This function may be called only under socket lock or callback_lock or rcu_lock */ -int sock_wake_async(struct socket *sock, int how, int band) +int sock_wake_async(struct socket *sock, struct socket_wq *wq, + int how, int band) { - struct socket_wq *wq; - - if (!sock) - return -1; - rcu_read_lock(); - wq = rcu_dereference(sock->wq); - if (!wq || !wq->fasync_list) { - rcu_read_unlock(); + if (!sock || !wq || !wq->fasync_list) return -1; - } + switch (how) { case SOCK_WAKE_WAITD: if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) @@ -1086,7 +1080,7 @@ call_kill: case SOCK_WAKE_URG: kill_fasync(&wq->fasync_list, SIGURG, band); } - rcu_read_unlock(); + return 0; } EXPORT_SYMBOL(sock_wake_async);