Message ID | 20221207213047.1246146-1-bodong@nvidia.com |
---|---|
State | New |
Headers | show |
Series | [SRU,J:linux-bluefield,1/1] tick/rcu: Stop allowing RCU_SOFTIRQ in idle | expand |
On 12/7/22 2:30 PM, Bodong Wang wrote: > From: Frederic Weisbecker <frederic@kernel.org> > > BugLink: https://bugs.launchpad.net/bugs/1998857 > > RCU_SOFTIRQ used to be special in that it could be raised on purpose > within the idle path to prevent from stopping the tick. Some code still > prevents from unnecessary warnings related to this specific behaviour > while entering in dynticks-idle mode. > > However the nohz layout has changed quite a bit in ten years, and the > removal of CONFIG_RCU_FAST_NO_HZ has been the final straw to this > safe-conduct. Now the RCU_SOFTIRQ vector is expected to be raised from > sane places. > > A remaining corner case is admitted though when the vector is invoked > in fragile hotplug path. > > Signed-off-by: Frederic Weisbecker <frederic@kernel.org> > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Peter Zijlstra <peterz@infradead.org> > Cc: Paul E. McKenney <paulmck@kernel.org> > Cc: Paul Menzel <pmenzel@molgen.mpg.de> > (cherry picked from commit 0345691b24c076655ce8f0f4bfd24cba3467ccbd) > Signed-off-by: Bodong Wang <bodong@nvidia.com> > > Change-Id: I1d3d2fe0fe8cd6bb6e36f0bb0e7404a19e0392dc > --- > include/linux/interrupt.h | 8 ++++++- > kernel/time/tick-sched.c | 50 +++++++++++++++++++++++++++++++-------- > 2 files changed, 47 insertions(+), 11 deletions(-) > > diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h > index 1f22a30c0963..3406da21e787 100644 > --- a/include/linux/interrupt.h > +++ b/include/linux/interrupt.h > @@ -528,7 +528,13 @@ enum > NR_SOFTIRQS > }; > > -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) > +/* > + * Ignoring the RCU vector after ksoftirqd is parked is fine > + * because: > + * 1) rcutree_migrate_callbacks() takes care of the queue. > + * 2) rcu_report_dead() reports the final quiescent states. > + */ > +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) > > /* map softirq index to softirq name. update 'softirq_to_name' in > * kernel/softirq.c when adding a new softirq. > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c > index 9c6f661fb436..69ff819ddbbb 100644 > --- a/kernel/time/tick-sched.c > +++ b/kernel/time/tick-sched.c > @@ -983,6 +983,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) > __tick_nohz_full_update_tick(ts, ktime_get()); > } > > +/* > + * A pending softirq outside an IRQ (or softirq disabled section) context > + * should be waiting for ksoftirqd to handle it. Therefore we shouldn't > + * reach here due to the need_resched() early check in can_stop_idle_tick(). > + * > + * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the > + * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, > + * triggering the below since wakep_softirqd() is ignored. > + * > + */ > +static bool report_idle_softirq(void) > +{ > + static int ratelimit; > + unsigned int pending = local_softirq_pending(); > + > + if (likely(!pending)) > + return false; > + > + /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ > + if (!cpu_active(smp_processor_id())) { > + pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; > + if (!pending) > + return false; > + } > + > + if (ratelimit < 10) > + return false; > + > + /* On RT, softirqs handling may be waiting on some lock */ > + if (!local_bh_blocked()) > + return false; > + > + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", > + pending); > + ratelimit++; > + > + return true; > +} > + > static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) > { > /* > @@ -1009,17 +1048,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) > if (need_resched()) > return false; > > - if (unlikely(local_softirq_pending())) { > - static int ratelimit; > - > - if (ratelimit < 10 && !local_bh_blocked() && > - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { > - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", > - (unsigned int) local_softirq_pending()); > - ratelimit++; > - } > + if (unlikely(report_idle_softirq())) > return false; > - } > > if (tick_nohz_full_enabled()) { > /* In the wrong thread.
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 1f22a30c0963..3406da21e787 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -528,7 +528,13 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * Ignoring the RCU vector after ksoftirqd is parked is fine + * because: + * 1) rcutree_migrate_callbacks() takes care of the queue. + * 2) rcu_report_dead() reports the final quiescent states. + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 9c6f661fb436..69ff819ddbbb 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -983,6 +983,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) __tick_nohz_full_update_tick(ts, ktime_get()); } +/* + * A pending softirq outside an IRQ (or softirq disabled section) context + * should be waiting for ksoftirqd to handle it. Therefore we shouldn't + * reach here due to the need_resched() early check in can_stop_idle_tick(). + * + * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the + * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, + * triggering the below since wakep_softirqd() is ignored. + * + */ +static bool report_idle_softirq(void) +{ + static int ratelimit; + unsigned int pending = local_softirq_pending(); + + if (likely(!pending)) + return false; + + /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ + if (!cpu_active(smp_processor_id())) { + pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; + if (!pending) + return false; + } + + if (ratelimit < 10) + return false; + + /* On RT, softirqs handling may be waiting on some lock */ + if (!local_bh_blocked()) + return false; + + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", + pending); + ratelimit++; + + return true; +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -1009,17 +1048,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (need_resched()) return false; - if (unlikely(local_softirq_pending())) { - static int ratelimit; - - if (ratelimit < 10 && !local_bh_blocked() && - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } + if (unlikely(report_idle_softirq())) return false; - } if (tick_nohz_full_enabled()) { /*