diff mbox series

[14/19] softirq: Make softirq control and processing RT aware

Message ID 20201113141734.324061522@linutronix.de
State New
Headers show
Series softirq: Cleanups and RT awareness | expand

Commit Message

Thomas Gleixner Nov. 13, 2020, 2:02 p.m. UTC
Provide a local lock based serialization for soft interrupts on RT which
allows the local_bh_disabled() sections and servicing soft interrupts to be
preemptible.

Provide the necessary inline helpers which allow to reuse the bulk of the
softirq processing code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/bottom_half.h |    2 
 kernel/softirq.c            |  207 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 201 insertions(+), 8 deletions(-)

Comments

Frederic Weisbecker Nov. 20, 2020, 12:26 a.m. UTC | #1
On Fri, Nov 13, 2020 at 03:02:21PM +0100, Thomas Gleixner wrote:
> +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
> +{
> +	unsigned long flags;
> +	int newcnt;
> +
> +	WARN_ON_ONCE(in_hardirq());
> +
> +	/* First entry of a task into a BH disabled section? */
> +	if (!current->softirq_disable_cnt) {
> +		if (preemptible()) {
> +			local_lock(&softirq_ctrl.lock);
> +			rcu_read_lock();

Ah you lock RCU because local_bh_disable() implies it and
since it doesn't disable preemption anymore, you must do it
explicitly?

Perhaps local_lock() should itself imply rcu_read_lock() ?

> +		} else {
> +			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
> +		}
> +	}
> +
> +	preempt_disable();

Do you really need to disable preemption here? Migration is disabled by local_lock()
and I can't figure out a scenario where the below can conflict with a preempting task.

> +	/*
> +	 * Track the per CPU softirq disabled state. On RT this is per CPU
> +	 * state to allow preemption of bottom half disabled sections.
> +	 */
> +	newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt);

__this_cpu_add_return() ?

> +	/*
> +	 * Reflect the result in the task state to prevent recursion on the
> +	 * local lock and to make softirq_count() & al work.
> +	 */
> +	current->softirq_disable_cnt = newcnt;
> +
> +	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
> +		raw_local_irq_save(flags);
> +		lockdep_softirqs_off(ip);
> +		raw_local_irq_restore(flags);
> +	}
> +	preempt_enable();
> +}
> +EXPORT_SYMBOL(__local_bh_disable_ip);
> +
> +static void __local_bh_enable(unsigned int cnt, bool unlock)
> +{
> +	unsigned long flags;
> +	int newcnt;
> +
> +	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
> +			    this_cpu_read(softirq_ctrl.cnt));

__this_cpu_read() ? Although that's lockdep only so not too important.

> +
> +	preempt_disable();

Same question about preempt_disable().

> +	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
> +		raw_local_irq_save(flags);
> +		lockdep_softirqs_on(_RET_IP_);
> +		raw_local_irq_restore(flags);
> +	}
> +
> +	newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt);

__this_cpu_sub_return?

> +	current->softirq_disable_cnt = newcnt;
> +	preempt_enable();
> +
> +	if (!newcnt && unlock) {
> +		rcu_read_unlock();
> +		local_unlock(&softirq_ctrl.lock);
> +	}
> +}

Thanks.
Thomas Gleixner Nov. 20, 2020, 1:27 p.m. UTC | #2
On Fri, Nov 20 2020 at 01:26, Frederic Weisbecker wrote:
> On Fri, Nov 13, 2020 at 03:02:21PM +0100, Thomas Gleixner wrote:
>> +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
>> +{
>> +	unsigned long flags;
>> +	int newcnt;
>> +
>> +	WARN_ON_ONCE(in_hardirq());
>> +
>> +	/* First entry of a task into a BH disabled section? */
>> +	if (!current->softirq_disable_cnt) {
>> +		if (preemptible()) {
>> +			local_lock(&softirq_ctrl.lock);
>> +			rcu_read_lock();
>
> Ah you lock RCU because local_bh_disable() implies it and
> since it doesn't disable preemption anymore, you must do it
> explicitly?
>
> Perhaps local_lock() should itself imply rcu_read_lock() ?

It's really only required for local_bh_disable(). Lemme add a comment.

>> +		} else {
>> +			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
>> +		}
>> +	}
>> +
>> +	preempt_disable();
>
> Do you really need to disable preemption here? Migration is disabled by local_lock()
> and I can't figure out a scenario where the below can conflict with a
> preempting task.

Indeed it's pointless.

>> +	/*
>> +	 * Track the per CPU softirq disabled state. On RT this is per CPU
>> +	 * state to allow preemption of bottom half disabled sections.
>> +	 */
>> +	newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt);
>
> __this_cpu_add_return() ?

Yep.

Thanks,

        tglx
Frederic Weisbecker Nov. 23, 2020, 1:44 p.m. UTC | #3
On Fri, Nov 13, 2020 at 03:02:21PM +0100, Thomas Gleixner wrote:
> +void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
> +{
> +	bool preempt_on = preemptible();
> +	unsigned long flags;
> +	u32 pending;
> +	int curcnt;
> +
> +	WARN_ON_ONCE(in_irq());
> +	lockdep_assert_irqs_enabled();
> +
> +	local_irq_save(flags);
> +	curcnt = this_cpu_read(softirq_ctrl.cnt);
> +
> +	/*
> +	 * If this is not reenabling soft interrupts, no point in trying to
> +	 * run pending ones.
> +	 */
> +	if (curcnt != cnt)
> +		goto out;
> +
> +	pending = local_softirq_pending();
> +	if (!pending || ksoftirqd_running(pending))
> +		goto out;
> +
> +	/*
> +	 * If this was called from non preemptible context, wake up the
> +	 * softirq daemon.
> +	 */
> +	if (!preempt_on) {
> +		wakeup_softirqd();
> +		goto out;
> +	}
> +
> +	/*
> +	 * Adjust softirq count to SOFTIRQ_OFFSET which makes
> +	 * in_serving_softirq() become true.
> +	 */
> +	cnt = SOFTIRQ_OFFSET;
> +	__local_bh_enable(cnt, false);

But then you enter __do_softirq() with softirq_count() == SOFTIRQ_OFFSET.
__do_softirq() calls softirq_handle_begin() which then sets it back to SOFTIRQ_DISABLE_OFFSET...

> +	__do_softirq();
> +
> +out:
> +	__local_bh_enable(cnt, preempt_on);

You escape from there with a correct preempt_count() but still the softirq executes
under SOFTIRQ_DISABLE_OFFSET and not SOFTIRQ_OFFSET, making in_serving_softirq() false.

> +	local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL(__local_bh_enable_ip);

Thanks.
diff mbox series

Patch

--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -4,7 +4,7 @@ 
 
 #include <linux/preempt.h>
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS)
 extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
 #else
 static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -13,6 +13,7 @@ 
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/local_lock.h>
 #include <linux/mm.h>
 #include <linux/notifier.h>
 #include <linux/percpu.h>
@@ -100,20 +101,208 @@  EXPORT_PER_CPU_SYMBOL_GPL(hardirq_contex
 #endif
 
 /*
- * preempt_count and SOFTIRQ_OFFSET usage:
- * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
- *   softirq processing.
- * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
+ * SOFTIRQ_OFFSET usage:
+ *
+ * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
+ * to a per CPU counter and to task::softirqs_disabled_cnt.
+ *
+ * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
+ *   processing.
+ *
+ * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
  *   on local_bh_disable or local_bh_enable.
+ *
  * This lets us distinguish between whether we are currently processing
  * softirq and whether we just have bh disabled.
  */
+#ifdef CONFIG_PREEMPT_RT
 
-#ifdef CONFIG_TRACE_IRQFLAGS
 /*
- * This is for softirq.c-internal use, where hardirqs are disabled
+ * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
+ * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
+ * softirq disabled section to be preempted.
+ *
+ * The per task counter is used for softirq_count(), in_softirq() and
+ * in_serving_softirqs() because these counts are only valid when the task
+ * holding softirq_ctrl::lock is running.
+ *
+ * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
+ * the task which is in a softirq disabled section is preempted or blocks.
+ */
+struct softirq_ctrl {
+	local_lock_t	lock;
+	int		cnt;
+};
+
+static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
+	.lock	= INIT_LOCAL_LOCK(softirq_ctrl.lock),
+};
+
+void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
+{
+	unsigned long flags;
+	int newcnt;
+
+	WARN_ON_ONCE(in_hardirq());
+
+	/* First entry of a task into a BH disabled section? */
+	if (!current->softirq_disable_cnt) {
+		if (preemptible()) {
+			local_lock(&softirq_ctrl.lock);
+			rcu_read_lock();
+		} else {
+			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
+		}
+	}
+
+	preempt_disable();
+	/*
+	 * Track the per CPU softirq disabled state. On RT this is per CPU
+	 * state to allow preemption of bottom half disabled sections.
+	 */
+	newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt);
+	/*
+	 * Reflect the result in the task state to prevent recursion on the
+	 * local lock and to make softirq_count() & al work.
+	 */
+	current->softirq_disable_cnt = newcnt;
+
+	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
+		raw_local_irq_save(flags);
+		lockdep_softirqs_off(ip);
+		raw_local_irq_restore(flags);
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL(__local_bh_disable_ip);
+
+static void __local_bh_enable(unsigned int cnt, bool unlock)
+{
+	unsigned long flags;
+	int newcnt;
+
+	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
+			    this_cpu_read(softirq_ctrl.cnt));
+
+	preempt_disable();
+	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
+		raw_local_irq_save(flags);
+		lockdep_softirqs_on(_RET_IP_);
+		raw_local_irq_restore(flags);
+	}
+
+	newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt);
+	current->softirq_disable_cnt = newcnt;
+	preempt_enable();
+
+	if (!newcnt && unlock) {
+		rcu_read_unlock();
+		local_unlock(&softirq_ctrl.lock);
+	}
+}
+
+void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
+{
+	bool preempt_on = preemptible();
+	unsigned long flags;
+	u32 pending;
+	int curcnt;
+
+	WARN_ON_ONCE(in_irq());
+	lockdep_assert_irqs_enabled();
+
+	local_irq_save(flags);
+	curcnt = this_cpu_read(softirq_ctrl.cnt);
+
+	/*
+	 * If this is not reenabling soft interrupts, no point in trying to
+	 * run pending ones.
+	 */
+	if (curcnt != cnt)
+		goto out;
+
+	pending = local_softirq_pending();
+	if (!pending || ksoftirqd_running(pending))
+		goto out;
+
+	/*
+	 * If this was called from non preemptible context, wake up the
+	 * softirq daemon.
+	 */
+	if (!preempt_on) {
+		wakeup_softirqd();
+		goto out;
+	}
+
+	/*
+	 * Adjust softirq count to SOFTIRQ_OFFSET which makes
+	 * in_serving_softirq() become true.
+	 */
+	cnt = SOFTIRQ_OFFSET;
+	__local_bh_enable(cnt, false);
+	__do_softirq();
+
+out:
+	__local_bh_enable(cnt, preempt_on);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__local_bh_enable_ip);
+
+/*
+ * Invoked from irq_enter_rcu() to prevent that tick_irq_enter()
+ * pointlessly wakes the softirq daemon. That's handled in __irq_exit_rcu().
+ * None of the above logic in the regular bh_disable/enable functions is
+ * required here.
+ */
+static inline void local_bh_disable_irq_enter(void)
+{
+	this_cpu_add(softirq_ctrl.cnt, SOFTIRQ_DISABLE_OFFSET);
+}
+
+static inline void local_bh_enable_irq_enter(void)
+{
+	this_cpu_sub(softirq_ctrl.cnt, SOFTIRQ_DISABLE_OFFSET);
+}
+
+/*
+ * Invoked from ksoftirqd_run() outside of the interrupt disabled section
+ * to acquire the per CPU local lock for reentrancy protection.
+ */
+static inline void ksoftirqd_run_begin(void)
+{
+	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
+	local_irq_disable();
+}
+
+/* Counterpart to ksoftirqd_run_begin() */
+static inline void ksoftirqd_run_end(void)
+{
+	__local_bh_enable(SOFTIRQ_OFFSET, true);
+	WARN_ON_ONCE(in_interrupt());
+	local_irq_enable();
+}
+
+static inline void softirq_handle_begin(void) { }
+static inline void softirq_handle_end(void) { }
+
+static inline void invoke_softirq(void)
+{
+	if (!this_cpu_read(softirq_ctrl.cnt))
+		wakeup_softirqd();
+}
+
+static inline bool should_wake_ksoftirqd(void)
+{
+	return !this_cpu_read(softirq_ctrl.cnt);
+}
+
+#else /* CONFIG_PREEMPT_RT */
+
+/*
+ * This one is for softirq.c-internal use, where hardirqs are disabled
  * legitimately:
  */
+#ifdef CONFIG_TRACE_IRQFLAGS
 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 {
 	unsigned long flags;
@@ -284,6 +473,8 @@  asmlinkage __visible void do_softirq(voi
 	local_irq_restore(flags);
 }
 
+#endif /* !CONFIG_PREEMPT_RT */
+
 /*
  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
  * but break the loop if need_resched() is set or after 2 ms.
@@ -388,8 +579,10 @@  asmlinkage __visible void __softirq_entr
 		pending >>= softirq_bit;
 	}
 
-	if (__this_cpu_read(ksoftirqd) == current)
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
+	    __this_cpu_read(ksoftirqd) == current)
 		rcu_softirq_qs();
+
 	local_irq_disable();
 
 	pending = local_softirq_pending();