diff mbox series

[v5,4/9] task_isolation: Add task isolation hooks to arch-independent code

Message ID ec4bacce635fed4e77ab46752d41432f270edf83.camel@marvell.com
State Superseded
Headers show
Series "Task_isolation" mode | expand

Commit Message

Alex Belits Nov. 23, 2020, 5:57 p.m. UTC
Kernel entry and exit functions for task isolation are added to context
tracking and common entry points. Common handling of pending work on exit
to userspace now processes isolation breaking, cleanup and start.

Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
[abelits@marvell.com: adapted for kernel 5.10]
Signed-off-by: Alex Belits <abelits@marvell.com>
---
 include/linux/hardirq.h   |  2 ++
 include/linux/sched.h     |  2 ++
 kernel/context_tracking.c |  5 +++++
 kernel/entry/common.c     | 10 +++++++++-
 kernel/irq/irqdesc.c      |  5 +++++
 5 files changed, 23 insertions(+), 1 deletion(-)

Comments

Thomas Gleixner Nov. 23, 2020, 10:31 p.m. UTC | #1
Alex,

On Mon, Nov 23 2020 at 17:57, Alex Belits wrote:
> Kernel entry and exit functions for task isolation are added to context
> tracking and common entry points. Common handling of pending work on exit
> to userspace now processes isolation breaking, cleanup and start.

Again: You fail to explain the rationale and just explain what the patch
is doing. I can see what the patch is doing from the patch itself.

> ---
>  include/linux/hardirq.h   |  2 ++
>  include/linux/sched.h     |  2 ++
>  kernel/context_tracking.c |  5 +++++
>  kernel/entry/common.c     | 10 +++++++++-
>  kernel/irq/irqdesc.c      |  5 +++++

At least 3 different subsystems, which means this again failed to be
split into seperate patches.

>  extern void synchronize_irq(unsigned int irq);
> @@ -115,6 +116,7 @@ extern void rcu_nmi_exit(void);
>  	do {							\
>  		lockdep_off();					\
>  		arch_nmi_enter();				\
> +		task_isolation_kernel_enter();			\

Where is the explanation why this is safe and correct vs. this fragile
code path?

> @@ -1762,6 +1763,7 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
>  #ifdef CONFIG_SMP
>  static __always_inline void scheduler_ipi(void)
>  {
> +	task_isolation_kernel_enter();

Why is the scheduler_ipi() special? Just because everything else cannot
happen at all? Oh well...

>  #define CREATE_TRACE_POINTS
>  #include <trace/events/context_tracking.h>
> @@ -100,6 +101,8 @@ void noinstr __context_tracking_enter(enum ctx_state state)
>  		__this_cpu_write(context_tracking.state, state);
>  	}
>  	context_tracking_recursion_exit();
> +
> +	task_isolation_exit_to_user_mode();

Why is this here at all and why is it outside of the recursion
protection

>  }
>  EXPORT_SYMBOL_GPL(__context_tracking_enter);
>  
> @@ -148,6 +151,8 @@ void noinstr __context_tracking_exit(enum ctx_state state)
>  	if (!context_tracking_recursion_enter())
>  		return;
>  
> +	task_isolation_kernel_enter();

while this is inside?

And why has the scheduler_ipi() on x86 call this twice? Just because?

>  	if (__this_cpu_read(context_tracking.state) == state) {
>  		if (__this_cpu_read(context_tracking.active)) {
>  			/*
> diff --git a/kernel/entry/common.c b/kernel/entry/common.c
>  static void exit_to_user_mode_prepare(struct pt_regs *regs)
>  {
> -	unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
> +	unsigned long ti_work;
>  
>  	lockdep_assert_irqs_disabled();
>  
> +	task_isolation_before_pending_work_check();
> +
> +	ti_work = READ_ONCE(current_thread_info()->flags);
> +
>  	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
>  		ti_work = exit_to_user_mode_loop(regs, ti_work);
>  
> +	if (unlikely(ti_work & _TIF_TASK_ISOLATION))
> +		task_isolation_start();

Where is the explaination of this change?

Aside of that how does anything of this compile on x86 at all?

Answer: It does not ...

Stop this frenzy right now. It's going nowhere and all you achieve is to
make people more grumpy than they are already.

Thanks,

        tglx
diff mbox series

Patch

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 754f67ac4326..b9e604ae6a0d 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -7,6 +7,7 @@ 
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <linux/vtime.h>
+#include <linux/isolation.h>
 #include <asm/hardirq.h>
 
 extern void synchronize_irq(unsigned int irq);
@@ -115,6 +116,7 @@  extern void rcu_nmi_exit(void);
 	do {							\
 		lockdep_off();					\
 		arch_nmi_enter();				\
+		task_isolation_kernel_enter();			\
 		printk_nmi_enter();				\
 		BUG_ON(in_nmi() == NMI_MASK);			\
 		__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);	\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5d8b17aa544b..51c2d774250b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@ 
 #include <linux/rseq.h>
 #include <linux/seqlock.h>
 #include <linux/kcsan.h>
+#include <linux/isolation.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -1762,6 +1763,7 @@  extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
 #ifdef CONFIG_SMP
 static __always_inline void scheduler_ipi(void)
 {
+	task_isolation_kernel_enter();
 	/*
 	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
 	 * TIF_NEED_RESCHED remotely (for the first time) will also send
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 36a98c48aedc..379a48fd0e65 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -21,6 +21,7 @@ 
 #include <linux/hardirq.h>
 #include <linux/export.h>
 #include <linux/kprobes.h>
+#include <linux/isolation.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/context_tracking.h>
@@ -100,6 +101,8 @@  void noinstr __context_tracking_enter(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, state);
 	}
 	context_tracking_recursion_exit();
+
+	task_isolation_exit_to_user_mode();
 }
 EXPORT_SYMBOL_GPL(__context_tracking_enter);
 
@@ -148,6 +151,8 @@  void noinstr __context_tracking_exit(enum ctx_state state)
 	if (!context_tracking_recursion_enter())
 		return;
 
+	task_isolation_kernel_enter();
+
 	if (__this_cpu_read(context_tracking.state) == state) {
 		if (__this_cpu_read(context_tracking.active)) {
 			/*
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index e9e2df3f3f9e..10a520894105 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -4,6 +4,7 @@ 
 #include <linux/entry-common.h>
 #include <linux/livepatch.h>
 #include <linux/audit.h>
+#include <linux/isolation.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -183,13 +184,20 @@  static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
 
 static void exit_to_user_mode_prepare(struct pt_regs *regs)
 {
-	unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
+	unsigned long ti_work;
 
 	lockdep_assert_irqs_disabled();
 
+	task_isolation_before_pending_work_check();
+
+	ti_work = READ_ONCE(current_thread_info()->flags);
+
 	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
 		ti_work = exit_to_user_mode_loop(regs, ti_work);
 
+	if (unlikely(ti_work & _TIF_TASK_ISOLATION))
+		task_isolation_start();
+
 	arch_exit_to_user_mode_prepare(regs, ti_work);
 
 	/* Ensure that the address limit is intact and no locks are held */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1a7723604399..b8f0a7574f55 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -16,6 +16,7 @@ 
 #include <linux/bitmap.h>
 #include <linux/irqdomain.h>
 #include <linux/sysfs.h>
+#include <linux/isolation.h>
 
 #include "internals.h"
 
@@ -669,6 +670,8 @@  int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 	unsigned int irq = hwirq;
 	int ret = 0;
 
+	task_isolation_kernel_enter();
+
 	irq_enter();
 
 #ifdef CONFIG_IRQ_DOMAIN
@@ -710,6 +713,8 @@  int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
 	unsigned int irq;
 	int ret = 0;
 
+	task_isolation_kernel_enter();
+
 	/*
 	 * NMI context needs to be setup earlier in order to deal with tracing.
 	 */