diff mbox

[tip:sched/core] sched/core: Add debugging code to catch missing update_rq_clock() calls

Message ID 20170202155506.GX6515@twins.programming.kicks-ass.net (mailing list archive)
State Not Applicable
Headers show

Commit Message

Peter Zijlstra Feb. 2, 2017, 3:55 p.m. UTC
On Tue, Jan 31, 2017 at 10:22:47AM -0700, Ross Zwisler wrote:
> On Tue, Jan 31, 2017 at 4:48 AM, Mike Galbraith <efault@gmx.de> wrote:
> > On Tue, 2017-01-31 at 16:30 +0530, Sachin Sant wrote:


Could some of you test this? It seems to cure things in my (very)
limited testing.

---

Comments

Matt Fleming Feb. 2, 2017, 10:01 p.m. UTC | #1
On Thu, 02 Feb, at 04:55:06PM, Peter Zijlstra wrote:
> On Tue, Jan 31, 2017 at 10:22:47AM -0700, Ross Zwisler wrote:
> > On Tue, Jan 31, 2017 at 4:48 AM, Mike Galbraith <efault@gmx.de> wrote:
> > > On Tue, 2017-01-31 at 16:30 +0530, Sachin Sant wrote:
> 
> 
> Could some of you test this? It seems to cure things in my (very)
> limited testing.

I haven't tested it but this looks like the correct fix to me.

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Mike Galbraith Feb. 3, 2017, 3:05 a.m. UTC | #2
On Thu, 2017-02-02 at 16:55 +0100, Peter Zijlstra wrote:
> On Tue, Jan 31, 2017 at 10:22:47AM -0700, Ross Zwisler wrote:
> > On Tue, Jan 31, 2017 at 4:48 AM, Mike Galbraith <efault@gmx.de>
> > wrote:
> > > On Tue, 2017-01-31 at 16:30 +0530, Sachin Sant wrote:
> 
> 
> Could some of you test this? It seems to cure things in my (very)
> limited testing.

Hotplug stress gripe is gone here.

	-Mike
Sachin Sant Feb. 3, 2017, 4:33 a.m. UTC | #3
> On 02-Feb-2017, at 9:25 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> 
> On Tue, Jan 31, 2017 at 10:22:47AM -0700, Ross Zwisler wrote:
>> On Tue, Jan 31, 2017 at 4:48 AM, Mike Galbraith <efault@gmx.de> wrote:
>>> On Tue, 2017-01-31 at 16:30 +0530, Sachin Sant wrote:
> 
> 
> Could some of you test this? It seems to cure things in my (very)
> limited testing.
> 

I ran few cycles of cpu hot(un)plug tests. In most cases it works except one
where I ran into rcu stall:

[  173.493453] INFO: rcu_sched detected stalls on CPUs/tasks:
[  173.493473] 	8-...: (2 GPs behind) idle=006/140000000000000/0 softirq=0/0 fqs=2996 
[  173.493476] 	(detected by 0, t=6002 jiffies, g=885, c=884, q=6350)
[  173.493482] Task dump for CPU 8:
[  173.493484] cpuhp/8         R  running task        0  3416      2 0x00000884
[  173.493489] Call Trace:
[  173.493492] [c0000004f7b834a0] [c0000004f7b83560] 0xc0000004f7b83560 (unreliable)
[  173.493498] [c0000004f7b83670] [c000000000008d28] alignment_common+0x128/0x130
[  173.493503] --- interrupt: 600 at _raw_spin_lock+0x2c/0xc0
[  173.493503]     LR = try_to_wake_up+0x204/0x5c0
[  173.493507] [c0000004f7b83960] [c0000004f4d8084c] 0xc0000004f4d8084c (unreliable)
[  173.493511] [c0000004f7b83990] [c0000000000fef54] try_to_wake_up+0x204/0x5c0
[  173.493515] [c0000004f7b83a10] [c0000000000e2b88] create_worker+0x148/0x250
[  173.493519] [c0000004f7b83ab0] [c0000000000e6e1c] alloc_unbound_pwq+0x3bc/0x4c0
[  173.493522] [c0000004f7b83b10] [c0000000000e7084] wq_update_unbound_numa+0x164/0x270
[  173.493526] [c0000004f7b83bb0] [c0000000000e8990] workqueue_online_cpu+0x250/0x3b0
[  173.493529] [c0000004f7b83c70] [c0000000000c2758] cpuhp_invoke_callback+0x148/0x5b0
[  173.493533] [c0000004f7b83ce0] [c0000000000c2df8] cpuhp_up_callbacks+0x48/0x140
[  173.493536] [c0000004f7b83d30] [c0000000000c3e98] cpuhp_thread_fun+0x148/0x180
[  173.493540] [c0000004f7b83d60] [c0000000000f3930] smpboot_thread_fn+0x290/0x2a0
[  173.493544] [c0000004f7b83dc0] [c0000000000edb3c] kthread+0x14c/0x190
[  173.493547] [c0000004f7b83e30] [c00000000000b4e8] ret_from_kernel_thread+0x5c/0x74
[  243.913715] INFO: task kworker/0:2:380 blocked for more than 120 seconds.
[  243.913732]       Not tainted 4.10.0-rc6-next-20170202 #6
[  243.913735] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  243.913738] kworker/0:2     D    0   380      2 0x00000800
[  243.913746] Workqueue: events vmstat_shepherd
[  243.913748] Call Trace:
[  243.913752] [c0000000ff07f820] [c00000000011135c] enqueue_entity+0x81c/0x1200 (unreliable)
[  243.913757] [c0000000ff07f9f0] [c00000000001a660] __switch_to+0x300/0x400
[  243.913762] [c0000000ff07fa50] [c0000000008df4f4] __schedule+0x314/0xb10
[  243.913766] [c0000000ff07fb20] [c0000000008dfd30] schedule+0x40/0xb0
[  243.913769] [c0000000ff07fb50] [c0000000008e02b8] schedule_preempt_disabled+0x18/0x30
[  243.913773] [c0000000ff07fb70] [c0000000008e1654] __mutex_lock.isra.6+0x1a4/0x660
[  243.913777] [c0000000ff07fc00] [c0000000000c3828] get_online_cpus+0x48/0x90
[  243.913780] [c0000000ff07fc30] [c00000000025fd78] vmstat_shepherd+0x38/0x150
[  243.913784] [c0000000ff07fc80] [c0000000000e5794] process_one_work+0x1a4/0x4d0
[  243.913788] [c0000000ff07fd20] [c0000000000e5b58] worker_thread+0x98/0x5a0
[  243.913791] [c0000000ff07fdc0] [c0000000000edb3c] kthread+0x14c/0x190
[  243.913795] [c0000000ff07fe30] [c00000000000b4e8] ret_from_kernel_thread+0x5c/0x74
[  243.913824] INFO: task drmgr:3413 blocked for more than 120 seconds.
[  243.913826]       Not tainted 4.10.0-rc6-next-20170202 #6
[  243.913829] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  243.913831] drmgr           D    0  3413   3114 0x00040080
[  243.913834] Call Trace:
[  243.913836] [c000000257ff3380] [c000000257ff3440] 0xc000000257ff3440 (unreliable)
[  243.913840] [c000000257ff3550] [c00000000001a660] __switch_to+0x300/0x400
[  243.913844] [c000000257ff35b0] [c0000000008df4f4] __schedule+0x314/0xb10
[  243.913847] [c000000257ff3680] [c0000000008dfd30] schedule+0x40/0xb0
[  243.913851] [c000000257ff36b0] [c0000000008e4594] schedule_timeout+0x274/0x470
[  243.913855] [c000000257ff37b0] [c0000000008e0efc] wait_for_common+0x1ac/0x2c0
[  243.913858] [c000000257ff3830] [c0000000000c50e4] bringup_cpu+0x84/0xe0
[  243.913862] [c000000257ff3860] [c0000000000c2758] cpuhp_invoke_callback+0x148/0x5b0
[  243.913865] [c000000257ff38d0] [c0000000000c2df8] cpuhp_up_callbacks+0x48/0x140
[  243.913868] [c000000257ff3920] [c0000000000c5438] _cpu_up+0xe8/0x1c0
[  243.913872] [c000000257ff3980] [c0000000000c5630] do_cpu_up+0x120/0x150
[  243.913876] [c000000257ff3a00] [c0000000005c005c] cpu_subsys_online+0x5c/0xe0
[  243.913879] [c000000257ff3a50] [c0000000005b7d84] device_online+0xb4/0x120
[  243.913883] [c000000257ff3a90] [c000000000093424] dlpar_online_cpu+0x144/0x1e0
[  243.913887] [c000000257ff3b50] [c000000000093c08] dlpar_cpu_add+0x108/0x2f0
[  243.913891] [c000000257ff3be0] [c0000000000948dc] dlpar_cpu_probe+0x3c/0x80
[  243.913894] [c000000257ff3c20] [c0000000000207a8] arch_cpu_probe+0x38/0x60
[  243.913898] [c000000257ff3c40] [c0000000005c0880] cpu_probe_store+0x40/0x70
[  243.913902] [c000000257ff3c70] [c0000000005b2e94] dev_attr_store+0x34/0x60
[  243.913906] [c000000257ff3c90] [c0000000003b0fc4] sysfs_kf_write+0x64/0xa0
[  243.913910] [c000000257ff3cb0] [c0000000003afd10] kernfs_fop_write+0x170/0x250
[  243.913914] [c000000257ff3d00] [c0000000002fb0f0] __vfs_write+0x40/0x1c0
[  243.913917] [c000000257ff3d90] [c0000000002fcba8] vfs_write+0xc8/0x240
[  243.913921] [c000000257ff3de0] [c0000000002fe790] SyS_write+0x60/0x110
[  243.913924] [c000000257ff3e30] [c00000000000b184] system_call+0x38/0xe0
[  243.913929] INFO: task ppc64_cpu:3423 blocked for more than 120 seconds.
[  243.913931]       Not tainted 4.10.0-rc6-next-20170202 #6
[  243.913933] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.

Thanks
-Sachin
Peter Zijlstra Feb. 3, 2017, 8:53 a.m. UTC | #4
On Fri, Feb 03, 2017 at 10:03:14AM +0530, Sachin Sant wrote:
> 
> > On 02-Feb-2017, at 9:25 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > 
> > On Tue, Jan 31, 2017 at 10:22:47AM -0700, Ross Zwisler wrote:
> >> On Tue, Jan 31, 2017 at 4:48 AM, Mike Galbraith <efault@gmx.de> wrote:
> >>> On Tue, 2017-01-31 at 16:30 +0530, Sachin Sant wrote:
> > 
> > 
> > Could some of you test this? It seems to cure things in my (very)
> > limited testing.
> > 
> 
> I ran few cycles of cpu hot(un)plug tests. In most cases it works except one
> where I ran into rcu stall:
> 
> [  173.493453] INFO: rcu_sched detected stalls on CPUs/tasks:
> [  173.493473] 	8-...: (2 GPs behind) idle=006/140000000000000/0 softirq=0/0 fqs=2996 
> [  173.493476] 	(detected by 0, t=6002 jiffies, g=885, c=884, q=6350)

Right, I actually saw that too, but I don't think that would be related
to my patch. I'll see if I can dig into this though, ought to get fixed
regardless.

Thanks for testing!
Mike Galbraith Feb. 3, 2017, 12:59 p.m. UTC | #5
On Fri, 2017-02-03 at 09:53 +0100, Peter Zijlstra wrote:
> On Fri, Feb 03, 2017 at 10:03:14AM +0530, Sachin Sant wrote:

> > I ran few cycles of cpu hot(un)plug tests. In most cases it works except one
> > where I ran into rcu stall:
> > 
> > [  173.493453] INFO: rcu_sched detected stalls on CPUs/tasks:
> > [  173.493473] > > 	> > 8-...: (2 GPs behind) idle=006/140000000000000/0 softirq=0/0 fqs=2996 
> > [  173.493476] > > 	> > (detected by 0, t=6002 jiffies, g=885, c=884, q=6350)
> 
> Right, I actually saw that too, but I don't think that would be related
> to my patch. I'll see if I can dig into this though, ought to get fixed
> regardless.

FWIW, I'm not seeing stalls/hangs while beating hotplug up in tip. (so
next grew a wart?)

	-Mike
Borislav Petkov Feb. 3, 2017, 1:04 p.m. UTC | #6
On Thu, Feb 02, 2017 at 04:55:06PM +0100, Peter Zijlstra wrote:
> On Tue, Jan 31, 2017 at 10:22:47AM -0700, Ross Zwisler wrote:
> > On Tue, Jan 31, 2017 at 4:48 AM, Mike Galbraith <efault@gmx.de> wrote:
> > > On Tue, 2017-01-31 at 16:30 +0530, Sachin Sant wrote:
> 
> 
> Could some of you test this? It seems to cure things in my (very)
> limited testing.

Tested-by: Borislav Petkov <bp@suse.de>
Peter Zijlstra Feb. 3, 2017, 1:37 p.m. UTC | #7
On Fri, Feb 03, 2017 at 01:59:34PM +0100, Mike Galbraith wrote:
> On Fri, 2017-02-03 at 09:53 +0100, Peter Zijlstra wrote:
> > On Fri, Feb 03, 2017 at 10:03:14AM +0530, Sachin Sant wrote:
> 
> > > I ran few cycles of cpu hot(un)plug tests. In most cases it works except one
> > > where I ran into rcu stall:
> > > 
> > > [  173.493453] INFO: rcu_sched detected stalls on CPUs/tasks:
> > > [  173.493473] > > 	> > 8-...: (2 GPs behind) idle=006/140000000000000/0 softirq=0/0 fqs=2996 
> > > [  173.493476] > > 	> > (detected by 0, t=6002 jiffies, g=885, c=884, q=6350)
> > 
> > Right, I actually saw that too, but I don't think that would be related
> > to my patch. I'll see if I can dig into this though, ought to get fixed
> > regardless.
> 
> FWIW, I'm not seeing stalls/hangs while beating hotplug up in tip. (so
> next grew a wart?)

I've seen it on tip. It looks like hot unplug goes really slow when
there's running tasks on the CPU being taken down.

What I did was something like:

  taskset -p $((1<<1)) $$
  for ((i=0; i<20; i++)) do while :; do :; done & done

  taskset -p $((1<<0)) $$
  echo 0 > /sys/devices/system/cpu/cpu1/online

And with those 20 tasks stuck sucking cycles on CPU1, the unplug goes
_really_ slow and the RCU stall triggers. What I suspect happens is that
hotplug stops participating in the RCU state machine early, but only
tells RCU about it really late, and in between it gets suspicious it
takes too long.

I've yet to dig through the RCU code to figure out the exact sequence of
events, but found the above to be fairly reliable in triggering the
issue.
Mike Galbraith Feb. 3, 2017, 1:52 p.m. UTC | #8
On Fri, 2017-02-03 at 14:37 +0100, Peter Zijlstra wrote:
> On Fri, Feb 03, 2017 at 01:59:34PM +0100, Mike Galbraith wrote:

> > FWIW, I'm not seeing stalls/hangs while beating hotplug up in tip. (so
> > next grew a wart?)
> 
> I've seen it on tip. It looks like hot unplug goes really slow when
> there's running tasks on the CPU being taken down.
> 
> What I did was something like:
> 
>   taskset -p $((1<<1)) $$
>   for ((i=0; i<20; i++)) do while :; do :; done & done
> 
>   taskset -p $((1<<0)) $$
>   echo 0 > /sys/devices/system/cpu/cpu1/online
> 
> And with those 20 tasks stuck sucking cycles on CPU1, the unplug goes
> _really_ slow and the RCU stall triggers. What I suspect happens is that
> hotplug stops participating in the RCU state machine early, but only
> tells RCU about it really late, and in between it gets suspicious it
> takes too long.

Ah.  I wasn't doing a really hard pounding, just running a couple
instances of Steven's script.  To beat hell out of it, I add futextest,
stockfish and a small kbuild on a big box.

	-Mike
Paul E. McKenney Feb. 3, 2017, 3:44 p.m. UTC | #9
On Fri, Feb 03, 2017 at 02:37:48PM +0100, Peter Zijlstra wrote:
> On Fri, Feb 03, 2017 at 01:59:34PM +0100, Mike Galbraith wrote:
> > On Fri, 2017-02-03 at 09:53 +0100, Peter Zijlstra wrote:
> > > On Fri, Feb 03, 2017 at 10:03:14AM +0530, Sachin Sant wrote:
> > 
> > > > I ran few cycles of cpu hot(un)plug tests. In most cases it works except one
> > > > where I ran into rcu stall:
> > > > 
> > > > [  173.493453] INFO: rcu_sched detected stalls on CPUs/tasks:
> > > > [  173.493473] > > 	> > 8-...: (2 GPs behind) idle=006/140000000000000/0 softirq=0/0 fqs=2996 
> > > > [  173.493476] > > 	> > (detected by 0, t=6002 jiffies, g=885, c=884, q=6350)
> > > 
> > > Right, I actually saw that too, but I don't think that would be related
> > > to my patch. I'll see if I can dig into this though, ought to get fixed
> > > regardless.
> > 
> > FWIW, I'm not seeing stalls/hangs while beating hotplug up in tip. (so
> > next grew a wart?)
> 
> I've seen it on tip. It looks like hot unplug goes really slow when
> there's running tasks on the CPU being taken down.
> 
> What I did was something like:
> 
>   taskset -p $((1<<1)) $$
>   for ((i=0; i<20; i++)) do while :; do :; done & done
> 
>   taskset -p $((1<<0)) $$
>   echo 0 > /sys/devices/system/cpu/cpu1/online
> 
> And with those 20 tasks stuck sucking cycles on CPU1, the unplug goes
> _really_ slow and the RCU stall triggers. What I suspect happens is that
> hotplug stops participating in the RCU state machine early, but only
> tells RCU about it really late, and in between it gets suspicious it
> takes too long.
> 
> I've yet to dig through the RCU code to figure out the exact sequence of
> events, but found the above to be fairly reliable in triggering the
> issue.

If you send me the full splat from the dmesg and the RCU portions of
.config, I will take a look.  Is this new behavior, or a new test?

							Thanx, Paul
Paul E. McKenney Feb. 3, 2017, 3:54 p.m. UTC | #10
On Fri, Feb 03, 2017 at 07:44:57AM -0800, Paul E. McKenney wrote:
> On Fri, Feb 03, 2017 at 02:37:48PM +0100, Peter Zijlstra wrote:
> > On Fri, Feb 03, 2017 at 01:59:34PM +0100, Mike Galbraith wrote:
> > > On Fri, 2017-02-03 at 09:53 +0100, Peter Zijlstra wrote:
> > > > On Fri, Feb 03, 2017 at 10:03:14AM +0530, Sachin Sant wrote:
> > > 
> > > > > I ran few cycles of cpu hot(un)plug tests. In most cases it works except one
> > > > > where I ran into rcu stall:
> > > > > 
> > > > > [  173.493453] INFO: rcu_sched detected stalls on CPUs/tasks:
> > > > > [  173.493473] > > 	> > 8-...: (2 GPs behind) idle=006/140000000000000/0 softirq=0/0 fqs=2996 
> > > > > [  173.493476] > > 	> > (detected by 0, t=6002 jiffies, g=885, c=884, q=6350)
> > > > 
> > > > Right, I actually saw that too, but I don't think that would be related
> > > > to my patch. I'll see if I can dig into this though, ought to get fixed
> > > > regardless.
> > > 
> > > FWIW, I'm not seeing stalls/hangs while beating hotplug up in tip. (so
> > > next grew a wart?)
> > 
> > I've seen it on tip. It looks like hot unplug goes really slow when
> > there's running tasks on the CPU being taken down.
> > 
> > What I did was something like:
> > 
> >   taskset -p $((1<<1)) $$
> >   for ((i=0; i<20; i++)) do while :; do :; done & done
> > 
> >   taskset -p $((1<<0)) $$
> >   echo 0 > /sys/devices/system/cpu/cpu1/online
> > 
> > And with those 20 tasks stuck sucking cycles on CPU1, the unplug goes
> > _really_ slow and the RCU stall triggers. What I suspect happens is that
> > hotplug stops participating in the RCU state machine early, but only
> > tells RCU about it really late, and in between it gets suspicious it
> > takes too long.
> > 
> > I've yet to dig through the RCU code to figure out the exact sequence of
> > events, but found the above to be fairly reliable in triggering the
> > issue.

> If you send me the full splat from the dmesg and the RCU portions of
> .config, I will take a look.  Is this new behavior, or a new test?

If new behavior, I would be most suspicious of these commits in -rcu which
recently entered -tip:

19e4d983cda1 rcu: Place guard on rcu_all_qs() and rcu_note_context_switch() actions
913324b1364f rcu: Eliminate flavor scan in rcu_momentary_dyntick_idle()
fcdcfefafa45 rcu: Pull rcu_qs_ctr into rcu_dynticks structure
0919a0b7e7a5 rcu: Pull rcu_sched_qs_mask into rcu_dynticks structure
caa7c8e34293 rcu: Make rcu_note_context_switch() do deferred NOCB wakeups
41e4b159d516 rcu: Make rcu_all_qs() do deferred NOCB wakeups
b457a3356a68 rcu: Make call_rcu() do deferred NOCB wakeups

Does reverting any of these help?

							Thanx, Paul
Sachin Sant Feb. 6, 2017, 6:23 a.m. UTC | #11
>>> I've seen it on tip. It looks like hot unplug goes really slow when
>>> there's running tasks on the CPU being taken down.
>>> 
>>> What I did was something like:
>>> 
>>>  taskset -p $((1<<1)) $$
>>>  for ((i=0; i<20; i++)) do while :; do :; done & done
>>> 
>>>  taskset -p $((1<<0)) $$
>>>  echo 0 > /sys/devices/system/cpu/cpu1/online
>>> 
>>> And with those 20 tasks stuck sucking cycles on CPU1, the unplug goes
>>> _really_ slow and the RCU stall triggers. What I suspect happens is that
>>> hotplug stops participating in the RCU state machine early, but only
>>> tells RCU about it really late, and in between it gets suspicious it
>>> takes too long.
>>> 
>>> I've yet to dig through the RCU code to figure out the exact sequence of
>>> events, but found the above to be fairly reliable in triggering the
>>> issue.
> 
>> If you send me the full splat from the dmesg and the RCU portions of
>> .config, I will take a look.  Is this new behavior, or a new test?
> 

I have sent the required files to you via separate email.

> If new behavior, I would be most suspicious of these commits in -rcu which
> recently entered -tip:
> 
> 19e4d983cda1 rcu: Place guard on rcu_all_qs() and rcu_note_context_switch() actions
> 913324b1364f rcu: Eliminate flavor scan in rcu_momentary_dyntick_idle()
> fcdcfefafa45 rcu: Pull rcu_qs_ctr into rcu_dynticks structure
> 0919a0b7e7a5 rcu: Pull rcu_sched_qs_mask into rcu_dynticks structure
> caa7c8e34293 rcu: Make rcu_note_context_switch() do deferred NOCB wakeups
> 41e4b159d516 rcu: Make rcu_all_qs() do deferred NOCB wakeups
> b457a3356a68 rcu: Make call_rcu() do deferred NOCB wakeups
> 
> Does reverting any of these help?

I tried reverting the above commits. That does not help. I can still recreate the issue.

Thanks
-Sachin
Paul E. McKenney Feb. 6, 2017, 3:10 p.m. UTC | #12
On Mon, Feb 06, 2017 at 11:53:10AM +0530, Sachin Sant wrote:
> 
> >>> I've seen it on tip. It looks like hot unplug goes really slow when
> >>> there's running tasks on the CPU being taken down.
> >>> 
> >>> What I did was something like:
> >>> 
> >>>  taskset -p $((1<<1)) $$
> >>>  for ((i=0; i<20; i++)) do while :; do :; done & done
> >>> 
> >>>  taskset -p $((1<<0)) $$
> >>>  echo 0 > /sys/devices/system/cpu/cpu1/online
> >>> 
> >>> And with those 20 tasks stuck sucking cycles on CPU1, the unplug goes
> >>> _really_ slow and the RCU stall triggers. What I suspect happens is that
> >>> hotplug stops participating in the RCU state machine early, but only
> >>> tells RCU about it really late, and in between it gets suspicious it
> >>> takes too long.
> >>> 
> >>> I've yet to dig through the RCU code to figure out the exact sequence of
> >>> events, but found the above to be fairly reliable in triggering the
> >>> issue.
> > 
> >> If you send me the full splat from the dmesg and the RCU portions of
> >> .config, I will take a look.  Is this new behavior, or a new test?
> > 
> 
> I have sent the required files to you via separate email.
> 
> > If new behavior, I would be most suspicious of these commits in -rcu which
> > recently entered -tip:
> > 
> > 19e4d983cda1 rcu: Place guard on rcu_all_qs() and rcu_note_context_switch() actions
> > 913324b1364f rcu: Eliminate flavor scan in rcu_momentary_dyntick_idle()
> > fcdcfefafa45 rcu: Pull rcu_qs_ctr into rcu_dynticks structure
> > 0919a0b7e7a5 rcu: Pull rcu_sched_qs_mask into rcu_dynticks structure
> > caa7c8e34293 rcu: Make rcu_note_context_switch() do deferred NOCB wakeups
> > 41e4b159d516 rcu: Make rcu_all_qs() do deferred NOCB wakeups
> > b457a3356a68 rcu: Make call_rcu() do deferred NOCB wakeups
> > 
> > Does reverting any of these help?
> 
> I tried reverting the above commits. That does not help. I can still recreate the issue.

Thank you for testing, Sachin!

Could you please try building and testing with CONFIG_RCU_BOOST=y?
You will need to enable CONFIG_RCU_EXPERT=y to see this Kconfig option.

							Thanx, Paul
Paul E. McKenney Feb. 6, 2017, 3:14 p.m. UTC | #13
On Mon, Feb 06, 2017 at 07:10:48AM -0800, Paul E. McKenney wrote:
> On Mon, Feb 06, 2017 at 11:53:10AM +0530, Sachin Sant wrote:
> > 
> > >>> I've seen it on tip. It looks like hot unplug goes really slow when
> > >>> there's running tasks on the CPU being taken down.
> > >>> 
> > >>> What I did was something like:
> > >>> 
> > >>>  taskset -p $((1<<1)) $$
> > >>>  for ((i=0; i<20; i++)) do while :; do :; done & done
> > >>> 
> > >>>  taskset -p $((1<<0)) $$
> > >>>  echo 0 > /sys/devices/system/cpu/cpu1/online
> > >>> 
> > >>> And with those 20 tasks stuck sucking cycles on CPU1, the unplug goes
> > >>> _really_ slow and the RCU stall triggers. What I suspect happens is that
> > >>> hotplug stops participating in the RCU state machine early, but only
> > >>> tells RCU about it really late, and in between it gets suspicious it
> > >>> takes too long.
> > >>> 
> > >>> I've yet to dig through the RCU code to figure out the exact sequence of
> > >>> events, but found the above to be fairly reliable in triggering the
> > >>> issue.
> > > 
> > >> If you send me the full splat from the dmesg and the RCU portions of
> > >> .config, I will take a look.  Is this new behavior, or a new test?
> > > 
> > 
> > I have sent the required files to you via separate email.
> > 
> > > If new behavior, I would be most suspicious of these commits in -rcu which
> > > recently entered -tip:
> > > 
> > > 19e4d983cda1 rcu: Place guard on rcu_all_qs() and rcu_note_context_switch() actions
> > > 913324b1364f rcu: Eliminate flavor scan in rcu_momentary_dyntick_idle()
> > > fcdcfefafa45 rcu: Pull rcu_qs_ctr into rcu_dynticks structure
> > > 0919a0b7e7a5 rcu: Pull rcu_sched_qs_mask into rcu_dynticks structure
> > > caa7c8e34293 rcu: Make rcu_note_context_switch() do deferred NOCB wakeups
> > > 41e4b159d516 rcu: Make rcu_all_qs() do deferred NOCB wakeups
> > > b457a3356a68 rcu: Make call_rcu() do deferred NOCB wakeups
> > > 
> > > Does reverting any of these help?
> > 
> > I tried reverting the above commits. That does not help. I can still recreate the issue.
> 
> Thank you for testing, Sachin!
> 
> Could you please try building and testing with CONFIG_RCU_BOOST=y?
> You will need to enable CONFIG_RCU_EXPERT=y to see this Kconfig option.

Ah, but looking ahead to your .config file, you have CONFIG_PREEMPT=n,
which means boosting would not help and is not available in any case.

So it looks like there is a very long loop within an RCU read-side
critical section, and that this critical section needs to be broken
up a bit -- 21 seconds in pretty much any kind of critical section is
a bit excessive, after all.

							Thanx, Paul
Wanpeng Li Feb. 22, 2017, 9:03 a.m. UTC | #14
2017-02-02 23:55 GMT+08:00 Peter Zijlstra <peterz@infradead.org>:
> On Tue, Jan 31, 2017 at 10:22:47AM -0700, Ross Zwisler wrote:
>> On Tue, Jan 31, 2017 at 4:48 AM, Mike Galbraith <efault@gmx.de> wrote:
>> > On Tue, 2017-01-31 at 16:30 +0530, Sachin Sant wrote:
>
>
> Could some of you test this? It seems to cure things in my (very)
> limited testing.
>

Tested-by: Wanpeng Li <wanpeng.li@hotmail.com>

> ---
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 96e4ccc..b773821 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5609,7 +5609,7 @@ static void migrate_tasks(struct rq *dead_rq)
>  {
>         struct rq *rq = dead_rq;
>         struct task_struct *next, *stop = rq->stop;
> -       struct rq_flags rf, old_rf;
> +       struct rq_flags rf;
>         int dest_cpu;
>
>         /*
> @@ -5628,7 +5628,9 @@ static void migrate_tasks(struct rq *dead_rq)
>          * class method both need to have an up-to-date
>          * value of rq->clock[_task]
>          */
> +       rq_pin_lock(rq, &rf);
>         update_rq_clock(rq);
> +       rq_unpin_lock(rq, &rf);
>
>         for (;;) {
>                 /*
> @@ -5641,7 +5643,7 @@ static void migrate_tasks(struct rq *dead_rq)
>                 /*
>                  * pick_next_task assumes pinned rq->lock.
>                  */
> -               rq_pin_lock(rq, &rf);
> +               rq_repin_lock(rq, &rf);
>                 next = pick_next_task(rq, &fake_task, &rf);
>                 BUG_ON(!next);
>                 next->sched_class->put_prev_task(rq, next);
> @@ -5670,13 +5672,6 @@ static void migrate_tasks(struct rq *dead_rq)
>                         continue;
>                 }
>
> -               /*
> -                * __migrate_task() may return with a different
> -                * rq->lock held and a new cookie in 'rf', but we need
> -                * to preserve rf::clock_update_flags for 'dead_rq'.
> -                */
> -               old_rf = rf;
> -
>                 /* Find suitable destination for @next, with force if needed. */
>                 dest_cpu = select_fallback_rq(dead_rq->cpu, next);
>
> @@ -5685,7 +5680,6 @@ static void migrate_tasks(struct rq *dead_rq)
>                         raw_spin_unlock(&rq->lock);
>                         rq = dead_rq;
>                         raw_spin_lock(&rq->lock);
> -                       rf = old_rf;
>                 }
>                 raw_spin_unlock(&next->pi_lock);
>         }
diff mbox

Patch

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 96e4ccc..b773821 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5609,7 +5609,7 @@  static void migrate_tasks(struct rq *dead_rq)
 {
 	struct rq *rq = dead_rq;
 	struct task_struct *next, *stop = rq->stop;
-	struct rq_flags rf, old_rf;
+	struct rq_flags rf;
 	int dest_cpu;
 
 	/*
@@ -5628,7 +5628,9 @@  static void migrate_tasks(struct rq *dead_rq)
 	 * class method both need to have an up-to-date
 	 * value of rq->clock[_task]
 	 */
+	rq_pin_lock(rq, &rf);
 	update_rq_clock(rq);
+	rq_unpin_lock(rq, &rf);
 
 	for (;;) {
 		/*
@@ -5641,7 +5643,7 @@  static void migrate_tasks(struct rq *dead_rq)
 		/*
 		 * pick_next_task assumes pinned rq->lock.
 		 */
-		rq_pin_lock(rq, &rf);
+		rq_repin_lock(rq, &rf);
 		next = pick_next_task(rq, &fake_task, &rf);
 		BUG_ON(!next);
 		next->sched_class->put_prev_task(rq, next);
@@ -5670,13 +5672,6 @@  static void migrate_tasks(struct rq *dead_rq)
 			continue;
 		}
 
-		/*
-		 * __migrate_task() may return with a different
-		 * rq->lock held and a new cookie in 'rf', but we need
-		 * to preserve rf::clock_update_flags for 'dead_rq'.
-		 */
-		old_rf = rf;
-
 		/* Find suitable destination for @next, with force if needed. */
 		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
 
@@ -5685,7 +5680,6 @@  static void migrate_tasks(struct rq *dead_rq)
 			raw_spin_unlock(&rq->lock);
 			rq = dead_rq;
 			raw_spin_lock(&rq->lock);
-			rf = old_rf;
 		}
 		raw_spin_unlock(&next->pi_lock);
 	}