[net] net: use a work queue to defer net_disable_timestamp() work
diff mbox

Message ID 1486060295.21871.19.camel@edumazet-glaptop3.roam.corp.google.com
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Feb. 2, 2017, 6:31 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

Dmitry reported a warning [1] showing that we were calling 
net_disable_timestamp() -> static_key_slow_dec() from a non
process context.

Grabbing a mutex while holding a spinlock or rcu_read_lock()
is not allowed.

As Cong suggested, we now use a work queue.

It is possible netstamp_clear() exits while netstamp_needed_deferred
is not zero, but it is probably not worth trying to do better than that.

netstamp_needed_deferred atomic tracks the exact number of deferred
decrements.

[1]
[ INFO: suspicious RCU usage. ]
4.10.0-rc5+ #192 Not tainted
-------------------------------
./include/linux/rcupdate.h:561 Illegal context switch in RCU read-side
critical section!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 0
2 locks held by syz-executor14/23111:
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] lock_sock
include/net/sock.h:1454 [inline]
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>]
rawv6_sendmsg+0x1e65/0x3ec0 net/ipv6/raw.c:919
 #1:  (rcu_read_lock){......}, at: [<ffffffff83ae2678>] nf_hook
include/linux/netfilter.h:201 [inline]
 #1:  (rcu_read_lock){......}, at: [<ffffffff83ae2678>]
__ip6_local_out+0x258/0x840 net/ipv6/output_core.c:160

stack backtrace:
CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
 lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4452
 rcu_preempt_sleep_check include/linux/rcupdate.h:560 [inline]
 ___might_sleep+0x560/0x650 kernel/sched/core.c:7748
 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739
 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752
 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060
 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149
 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174
 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728
 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403
 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441
 sk_destruct+0x47/0x80 net/core/sock.c:1460
 __sk_free+0x57/0x230 net/core/sock.c:1468
 sock_wfree+0xae/0x120 net/core/sock.c:1645
 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655
 skb_release_all+0x15/0x60 net/core/skbuff.c:668
 __kfree_skb+0x15/0x20 net/core/skbuff.c:684
 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705
 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
 inet_frag_put include/net/inet_frag.h:133 [inline]
 nf_ct_frag6_gather+0x1106/0x3840
net/ipv6/netfilter/nf_conntrack_reasm.c:617
 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline]
 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
 nf_hook include/linux/netfilter.h:212 [inline]
 __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160
 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline]
 rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 sock_write_iter+0x326/0x600 net/socket.c:848
 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695
 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872
 vfs_writev+0x87/0xc0 fs/read_write.c:911
 do_writev+0x110/0x2c0 fs/read_write.c:944
 SYSC_writev fs/read_write.c:1017 [inline]
 SyS_writev+0x27/0x30 fs/read_write.c:1014
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x445559
RSP: 002b:00007f6f46fceb58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000445559
RDX: 0000000000000001 RSI: 0000000020f1eff0 RDI: 0000000000000005
RBP: 00000000006e19c0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000700000
R13: 0000000020f59000 R14: 0000000000000015 R15: 0000000000020400
BUG: sleeping function called from invalid context at
kernel/locking/mutex.c:752
in_atomic(): 1, irqs_disabled(): 0, pid: 23111, name: syz-executor14
INFO: lockdep is turned off.
CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
 ___might_sleep+0x47e/0x650 kernel/sched/core.c:7780
 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739
 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752
 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060
 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149
 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174
 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728
 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403
 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441
 sk_destruct+0x47/0x80 net/core/sock.c:1460
 __sk_free+0x57/0x230 net/core/sock.c:1468
 sock_wfree+0xae/0x120 net/core/sock.c:1645
 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655
 skb_release_all+0x15/0x60 net/core/skbuff.c:668
 __kfree_skb+0x15/0x20 net/core/skbuff.c:684
 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705
 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
 inet_frag_put include/net/inet_frag.h:133 [inline]
 nf_ct_frag6_gather+0x1106/0x3840
net/ipv6/netfilter/nf_conntrack_reasm.c:617
 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline]
 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
 nf_hook include/linux/netfilter.h:212 [inline]
 __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160
 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline]
 rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 sock_write_iter+0x326/0x600 net/socket.c:848
 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695
 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872
 vfs_writev+0x87/0xc0 fs/read_write.c:911
 do_writev+0x110/0x2c0 fs/read_write.c:944
 SYSC_writev fs/read_write.c:1017 [inline]
 SyS_writev+0x27/0x30 fs/read_write.c:1014
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x445559

Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/core/dev.c |   31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

Comments

David Miller Feb. 3, 2017, 9:18 p.m. UTC | #1
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 02 Feb 2017 10:31:35 -0800

> From: Eric Dumazet <edumazet@google.com>
> 
> Dmitry reported a warning [1] showing that we were calling 
> net_disable_timestamp() -> static_key_slow_dec() from a non
> process context.
> 
> Grabbing a mutex while holding a spinlock or rcu_read_lock()
> is not allowed.
> 
> As Cong suggested, we now use a work queue.
> 
> It is possible netstamp_clear() exits while netstamp_needed_deferred
> is not zero, but it is probably not worth trying to do better than that.
> 
> netstamp_needed_deferred atomic tracks the exact number of deferred
> decrements.
 ...
> Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
> Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied and queued up for -stable, thanks Eric.

Patch
diff mbox

diff --git a/net/core/dev.c b/net/core/dev.c
index 7f218e095361520d11c243d650e053321ea7274f..29101c98399f40b6b8e42c31a255d8f1fb6bd7a1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1695,24 +1695,19 @@  EXPORT_SYMBOL_GPL(net_dec_egress_queue);
 
 static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
-/* We are not allowed to call static_key_slow_dec() from irq context
- * If net_disable_timestamp() is called from irq context, defer the
- * static_key_slow_dec() calls.
- */
 static atomic_t netstamp_needed_deferred;
-#endif
-
-void net_enable_timestamp(void)
+static void netstamp_clear(struct work_struct *work)
 {
-#ifdef HAVE_JUMP_LABEL
 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
 
-	if (deferred) {
-		while (--deferred)
-			static_key_slow_dec(&netstamp_needed);
-		return;
-	}
+	while (deferred--)
+		static_key_slow_dec(&netstamp_needed);
+}
+static DECLARE_WORK(netstamp_work, netstamp_clear);
 #endif
+
+void net_enable_timestamp(void)
+{
 	static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
@@ -1720,12 +1715,12 @@  EXPORT_SYMBOL(net_enable_timestamp);
 void net_disable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
-	if (in_interrupt()) {
-		atomic_inc(&netstamp_needed_deferred);
-		return;
-	}
-#endif
+	/* net_disable_timestamp() can be called from non process context */
+	atomic_inc(&netstamp_needed_deferred);
+	schedule_work(&netstamp_work);
+#else
 	static_key_slow_dec(&netstamp_needed);
+#endif
 }
 EXPORT_SYMBOL(net_disable_timestamp);