diff mbox series

[v5,6/9] task_isolation: arch/arm64: enable task isolation functionality

Message ID 91496c0cf8d24717a2641fc4d02063f3f10dc733.camel@marvell.com
State Superseded
Headers show
Series "Task_isolation" mode | expand

Commit Message

Alex Belits Nov. 23, 2020, 5:58 p.m. UTC
In do_notify_resume(), call task_isolation_before_pending_work_check()
first, to report isolation breaking, then after handling all pending
work, call task_isolation_start() for TIF_TASK_ISOLATION tasks.

Add _TIF_TASK_ISOLATION to _TIF_WORK_MASK, and _TIF_SYSCALL_WORK,
define local NOTIFY_RESUME_LOOP_FLAGS to check in the loop, since we
don't clear _TIF_TASK_ISOLATION in the loop.

Early kernel entry code calls task_isolation_kernel_enter(). In
particular:

Vectors:
el1_sync -> el1_sync_handler() -> task_isolation_kernel_enter()
el1_irq -> asm_nmi_enter(), handle_arch_irq()
el1_error -> do_serror()
el0_sync -> el0_sync_handler()
el0_irq -> handle_arch_irq()
el0_error -> do_serror()
el0_sync_compat -> el0_sync_compat_handler()
el0_irq_compat -> handle_arch_irq()
el0_error_compat -> do_serror()

SDEI entry:
__sdei_asm_handler -> __sdei_handler() -> nmi_enter()

Functions called from there:
asm_nmi_enter() -> nmi_enter() -> task_isolation_kernel_enter()
asm_nmi_exit() -> nmi_exit() -> task_isolation_kernel_return()

Handlers:
do_serror() -> nmi_enter() -> task_isolation_kernel_enter()
  or task_isolation_kernel_enter()
el1_sync_handler() -> task_isolation_kernel_enter()
el0_sync_handler() -> task_isolation_kernel_enter()
el0_sync_compat_handler() -> task_isolation_kernel_enter()

handle_arch_irq() is irqchip-specific, most call handle_domain_irq()
There is a separate patch for irqchips that do not follow this rule.

handle_domain_irq() -> task_isolation_kernel_enter()
do_handle_IPI() -> task_isolation_kernel_enter() (may be redundant)
nmi_enter() -> task_isolation_kernel_enter()

Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
[abelits@marvell.com: simplified to match kernel 5.10]
Signed-off-by: Alex Belits <abelits@marvell.com>
---
 arch/arm64/Kconfig                   |  1 +
 arch/arm64/include/asm/barrier.h     |  1 +
 arch/arm64/include/asm/thread_info.h |  7 +++++--
 arch/arm64/kernel/entry-common.c     |  7 +++++++
 arch/arm64/kernel/ptrace.c           | 10 ++++++++++
 arch/arm64/kernel/signal.c           | 13 ++++++++++++-
 arch/arm64/kernel/smp.c              |  3 +++
 7 files changed, 39 insertions(+), 3 deletions(-)

Comments

Mark Rutland Dec. 2, 2020, 1:59 p.m. UTC | #1
Hi Alex,

On Mon, Nov 23, 2020 at 05:58:06PM +0000, Alex Belits wrote:
> In do_notify_resume(), call task_isolation_before_pending_work_check()
> first, to report isolation breaking, then after handling all pending
> work, call task_isolation_start() for TIF_TASK_ISOLATION tasks.
> 
> Add _TIF_TASK_ISOLATION to _TIF_WORK_MASK, and _TIF_SYSCALL_WORK,
> define local NOTIFY_RESUME_LOOP_FLAGS to check in the loop, since we
> don't clear _TIF_TASK_ISOLATION in the loop.
> 
> Early kernel entry code calls task_isolation_kernel_enter(). In
> particular:
> 
> Vectors:
> el1_sync -> el1_sync_handler() -> task_isolation_kernel_enter()
> el1_irq -> asm_nmi_enter(), handle_arch_irq()
> el1_error -> do_serror()
> el0_sync -> el0_sync_handler()
> el0_irq -> handle_arch_irq()
> el0_error -> do_serror()
> el0_sync_compat -> el0_sync_compat_handler()
> el0_irq_compat -> handle_arch_irq()
> el0_error_compat -> do_serror()
> 
> SDEI entry:
> __sdei_asm_handler -> __sdei_handler() -> nmi_enter()

As a heads-up, the arm64 entry code is changing, as we found that our
lockdep, RCU, and context-tracking management wasn't quite right. I have
a series of patches:

  https://lore.kernel.org/r/20201130115950.22492-1-mark.rutland@arm.com

... which are queued in the arm64 for-next/fixes branch. I intend to
have some further rework ready for the next cycle. I'd appreciate if you
could Cc me on any patches altering the arm64 entry code, as I have a
vested interest.

That was quite obviously broken if PROVE_LOCKING and NO_HZ_FULL were
chosen and context tracking was in use (e.g. with
CONTEXT_TRACKING_FORCE), so I'm assuming that this series has not been
tested in that configuration. What sort of testing has this seen?

It would be very helpful for the next posting if you could provide any
instructions on how to test this series (e.g. with pointers to any test
suite that you have), since it's very easy to introduce subtle breakage
in this area without realising it.

> 
> Functions called from there:
> asm_nmi_enter() -> nmi_enter() -> task_isolation_kernel_enter()
> asm_nmi_exit() -> nmi_exit() -> task_isolation_kernel_return()
> 
> Handlers:
> do_serror() -> nmi_enter() -> task_isolation_kernel_enter()
>   or task_isolation_kernel_enter()
> el1_sync_handler() -> task_isolation_kernel_enter()
> el0_sync_handler() -> task_isolation_kernel_enter()
> el0_sync_compat_handler() -> task_isolation_kernel_enter()
> 
> handle_arch_irq() is irqchip-specific, most call handle_domain_irq()
> There is a separate patch for irqchips that do not follow this rule.
> 
> handle_domain_irq() -> task_isolation_kernel_enter()
> do_handle_IPI() -> task_isolation_kernel_enter() (may be redundant)
> nmi_enter() -> task_isolation_kernel_enter()

The IRQ cases look very odd to me. With the rework I've just done for
arm64, we'll do the regular context tracking accounting before we ever
get into handle_domain_irq() or similar, so I suspect that's not
necessary at all?

> 
> Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
> [abelits@marvell.com: simplified to match kernel 5.10]
> Signed-off-by: Alex Belits <abelits@marvell.com>
> ---
>  arch/arm64/Kconfig                   |  1 +
>  arch/arm64/include/asm/barrier.h     |  1 +
>  arch/arm64/include/asm/thread_info.h |  7 +++++--
>  arch/arm64/kernel/entry-common.c     |  7 +++++++
>  arch/arm64/kernel/ptrace.c           | 10 ++++++++++
>  arch/arm64/kernel/signal.c           | 13 ++++++++++++-
>  arch/arm64/kernel/smp.c              |  3 +++
>  7 files changed, 39 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 1515f6f153a0..fc958d8d8945 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -141,6 +141,7 @@ config ARM64
>  	select HAVE_ARCH_PREL32_RELOCATIONS
>  	select HAVE_ARCH_SECCOMP_FILTER
>  	select HAVE_ARCH_STACKLEAK
> +	select HAVE_ARCH_TASK_ISOLATION
>  	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
>  	select HAVE_ARCH_TRACEHOOK
>  	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
> index c3009b0e5239..ad5a6dd380cf 100644
> --- a/arch/arm64/include/asm/barrier.h
> +++ b/arch/arm64/include/asm/barrier.h
> @@ -49,6 +49,7 @@
>  #define dma_rmb()	dmb(oshld)
>  #define dma_wmb()	dmb(oshst)
>  
> +#define instr_sync()	isb()

I think I've asked on prior versions of the patchset, but what is this
for? Where is it going to be used, and what is the expected semantics?
I'm wary of exposing this outside of arch code because there aren't
strong cross-architectural semantics, and at the least this requires
some documentation.

If it's unused, please delete it.

[...]

> diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> index 43d4c329775f..8152760de683 100644
> --- a/arch/arm64/kernel/entry-common.c
> +++ b/arch/arm64/kernel/entry-common.c
> @@ -8,6 +8,7 @@
>  #include <linux/context_tracking.h>
>  #include <linux/ptrace.h>
>  #include <linux/thread_info.h>
> +#include <linux/isolation.h>
>  
>  #include <asm/cpufeature.h>
>  #include <asm/daifflags.h>
> @@ -77,6 +78,8 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
>  {
>  	unsigned long esr = read_sysreg(esr_el1);
>  
> +	task_isolation_kernel_enter();

For regular context tracking we only acount the user<->kernel
transitions.

This is a kernel->kernel transition, so surely this is not necessary?

If nothing else, it doesn't feel well-balanced.

I havwe not looked at the rest of this patch (or series) in detail.

Thanks,
Mark.
Alex Belits Dec. 4, 2020, 12:37 a.m. UTC | #2
On Wed, 2020-12-02 at 13:59 +0000, Mark Rutland wrote:
> External Email
> 
> -------------------------------------------------------------------
> ---
> Hi Alex,
> 
> On Mon, Nov 23, 2020 at 05:58:06PM +0000, Alex Belits wrote:
> > In do_notify_resume(), call
> > task_isolation_before_pending_work_check()
> > first, to report isolation breaking, then after handling all
> > pending
> > work, call task_isolation_start() for TIF_TASK_ISOLATION tasks.
> > 
> > Add _TIF_TASK_ISOLATION to _TIF_WORK_MASK, and _TIF_SYSCALL_WORK,
> > define local NOTIFY_RESUME_LOOP_FLAGS to check in the loop, since
> > we
> > don't clear _TIF_TASK_ISOLATION in the loop.
> > 
> > Early kernel entry code calls task_isolation_kernel_enter(). In
> > particular:
> > 
> > Vectors:
> > el1_sync -> el1_sync_handler() -> task_isolation_kernel_enter()
> > el1_irq -> asm_nmi_enter(), handle_arch_irq()
> > el1_error -> do_serror()
> > el0_sync -> el0_sync_handler()
> > el0_irq -> handle_arch_irq()
> > el0_error -> do_serror()
> > el0_sync_compat -> el0_sync_compat_handler()
> > el0_irq_compat -> handle_arch_irq()
> > el0_error_compat -> do_serror()
> > 
> > SDEI entry:
> > __sdei_asm_handler -> __sdei_handler() -> nmi_enter()
> 
> As a heads-up, the arm64 entry code is changing, as we found that our
> lockdep, RCU, and context-tracking management wasn't quite right. I
> have
> a series of patches:
> 
> https://lore.kernel.org/r/20201130115950.22492-1-mark.rutland@arm.com
> 
> ... which are queued in the arm64 for-next/fixes branch. I intend to
> have some further rework ready for the next cycle.

Thanks!

>  I'd appreciate if you
> could Cc me on any patches altering the arm64 entry code, as I have a
> vested interest.

I will do that.

> 
> That was quite obviously broken if PROVE_LOCKING and NO_HZ_FULL were
> chosen and context tracking was in use (e.g. with
> CONTEXT_TRACKING_FORCE),

I am not yet sure about TRACE_IRQFLAGS, however NO_HZ_FULL and
CONTEXT_TRACKING have to be enabled for it to do anything.

I will check it with PROVE_LOCKING and your patches.

Entry code only adds an inline function that, if task isolation is
enabled, uses raw_local_irq_save() / raw_local_irq_restore(), low-level 
operations and accesses per-CPU variabled by offset, so at very least
it should not add any problems. Even raw_local_irq_save() /
raw_local_irq_restore() probably should be removed, however I wanted to
have something that can be safely called if by whatever reason
interrupts were enabled before kernel was fully entered.

>  so I'm assuming that this series has not been
> tested in that configuration. What sort of testing has this seen?
> 

On various available arm64 hardware, with enabled

CONFIG_TASK_ISOLATION
CONFIG_NO_HZ_FULL
CONFIG_HIGH_RES_TIMERS

and disabled:

CONFIG_HZ_PERIODIC
CONFIG_NO_HZ_IDLE
CONFIG_NO_HZ

> It would be very helpful for the next posting if you could provide
> any
> instructions on how to test this series (e.g. with pointers to any
> test
> suite that you have), since it's very easy to introduce subtle
> breakage
> in this area without realising it.

I will. Currently libtmc ( https://github.com/abelits/libtmc ) contains
all userspace code used for testing, however I should document the
testing procedures.

> 
> > Functions called from there:
> > asm_nmi_enter() -> nmi_enter() -> task_isolation_kernel_enter()
> > asm_nmi_exit() -> nmi_exit() -> task_isolation_kernel_return()
> > 
> > Handlers:
> > do_serror() -> nmi_enter() -> task_isolation_kernel_enter()
> >   or task_isolation_kernel_enter()
> > el1_sync_handler() -> task_isolation_kernel_enter()
> > el0_sync_handler() -> task_isolation_kernel_enter()
> > el0_sync_compat_handler() -> task_isolation_kernel_enter()
> > 
> > handle_arch_irq() is irqchip-specific, most call
> > handle_domain_irq()
> > There is a separate patch for irqchips that do not follow this
> > rule.
> > 
> > handle_domain_irq() -> task_isolation_kernel_enter()
> > do_handle_IPI() -> task_isolation_kernel_enter() (may be redundant)
> > nmi_enter() -> task_isolation_kernel_enter()
> 
> The IRQ cases look very odd to me. With the rework I've just done for
> arm64, we'll do the regular context tracking accounting before we
> ever
> get into handle_domain_irq() or similar, so I suspect that's not
> necessary at all?

The goal is to call task_isolation_kernel_enter() before anything that
depends on a CPU state, including pipeline, that could remain un-
synchronized when the rest of the kernel was sending synchronization
IPIs. Similarly task_isolation_kernel_return() should be called when it
is safe to turn off synchronization. If rework allows it to be done
earlier, there is no need to touch more specific functions.

> --- a/arch/arm64/include/asm/barrier.h
> > +++ b/arch/arm64/include/asm/barrier.h
> > @@ -49,6 +49,7 @@
> >  #define dma_rmb()	dmb(oshld)
> >  #define dma_wmb()	dmb(oshst)
> >  
> > +#define instr_sync()	isb()
> 
> I think I've asked on prior versions of the patchset, but what is
> this
> for? Where is it going to be used, and what is the expected
> semantics?
> I'm wary of exposing this outside of arch code because there aren't
> strong cross-architectural semantics, and at the least this requires
> some documentation.

This is intended as an instruction pipeline flush for the situation
when arch-independent code has to synchronize with potential changes
that it missed. This is necessary after some other CPUs could modify
code (and send IPIs to notify the rest but not isolated CPU) while this
one was still running isolated task or, more likely, exiting from it,
so it might be unlucky enough to pick the old instructions before that
point.

It's only used on kernel entry.

> 
> If it's unused, please delete it.
> 
> [...]
> 
> > diff --git a/arch/arm64/kernel/entry-common.c
> > b/arch/arm64/kernel/entry-common.c
> > index 43d4c329775f..8152760de683 100644
> > --- a/arch/arm64/kernel/entry-common.c
> > +++ b/arch/arm64/kernel/entry-common.c
> > @@ -8,6 +8,7 @@
> >  #include <linux/context_tracking.h>
> >  #include <linux/ptrace.h>
> >  #include <linux/thread_info.h>
> > +#include <linux/isolation.h>
> >  
> >  #include <asm/cpufeature.h>
> >  #include <asm/daifflags.h>
> > @@ -77,6 +78,8 @@ asmlinkage void notrace el1_sync_handler(struct
> > pt_regs *regs)
> >  {
> >  	unsigned long esr = read_sysreg(esr_el1);
> >  
> > +	task_isolation_kernel_enter();
> 
> For regular context tracking we only acount the user<->kernel
> transitions.
> 
> This is a kernel->kernel transition, so surely this is not necessary?

Right. If we entered kernel from an isolated task, we have already
changed the flags.

> 
> If nothing else, it doesn't feel well-balanced.
> 
> I havwe not looked at the rest of this patch (or series) in detail.
> 
> Thanks,
> Mark.

My goal was to make sure that all transitions between kernel and
userspace are covered, so when in doubt I have added corresponding
calls those inline functions, and made them safe to be called from
those places. With improved entry-exit code it should be easier to be
sure where this can be done in a cleaner way.
Mark Rutland Dec. 7, 2020, 11:57 a.m. UTC | #3
On Fri, Dec 04, 2020 at 12:37:32AM +0000, Alex Belits wrote:
> On Wed, 2020-12-02 at 13:59 +0000, Mark Rutland wrote:
> > On Mon, Nov 23, 2020 at 05:58:06PM +0000, Alex Belits wrote:

> > As a heads-up, the arm64 entry code is changing, as we found that
> > our lockdep, RCU, and context-tracking management wasn't quite
> > right. I have a series of patches:
> > 
> > https://lore.kernel.org/r/20201130115950.22492-1-mark.rutland@arm.com
> > 
> > ... which are queued in the arm64 for-next/fixes branch. I intend to
> > have some further rework ready for the next cycle.

> > That was quite obviously broken if PROVE_LOCKING and NO_HZ_FULL were
> > chosen and context tracking was in use (e.g. with
> > CONTEXT_TRACKING_FORCE),
> 
> I am not yet sure about TRACE_IRQFLAGS, however NO_HZ_FULL and
> CONTEXT_TRACKING have to be enabled for it to do anything.
> 
> I will check it with PROVE_LOCKING and your patches.
	
Thanks. In future, please do test this functionality with PROVE_LOCKING,
because otherwise bugs with RCU and IRQ state maangement will easily be
missed (as has been the case until very recently).

Testing with all those debug optiosn enabled (and stating that you have
done so) will give reviuewers much greater confidence that this works,
and if that does start spewing errors it save everyone the time
identifying that.

> Entry code only adds an inline function that, if task isolation is
> enabled, uses raw_local_irq_save() / raw_local_irq_restore(), low-level 
> operations and accesses per-CPU variabled by offset, so at very least
> it should not add any problems. Even raw_local_irq_save() /
> raw_local_irq_restore() probably should be removed, however I wanted to
> have something that can be safely called if by whatever reason
> interrupts were enabled before kernel was fully entered.

Sure. In the new flows we have new enter_from_*() and exit_to_*()
functions where these calls should be able to live (and so we should be
able to ensure a more consistent environment).

The near-term plan for arm64 is to migrate more of the exception triage
assembly to C, then to rework the arm64 entry code and generic entry
code to be more similar, then to migrate as much as possible to the
generic entry code. So please bear in mind that anything that adds to
the differences between the two is goingf to be problematic.

> >  so I'm assuming that this series has not been
> > tested in that configuration. What sort of testing has this seen?
> 
> On various available arm64 hardware, with enabled
> 
> CONFIG_TASK_ISOLATION
> CONFIG_NO_HZ_FULL
> CONFIG_HIGH_RES_TIMERS
> 
> and disabled:
> 
> CONFIG_HZ_PERIODIC
> CONFIG_NO_HZ_IDLE
> CONFIG_NO_HZ

Ok. I'd recommend looking at the various debug options under the "kernel
hacking" section in kconfig, and enabling some of those. At the very
least PROVE_LOCKING, ideally also using the lockup dectors and anything
else for debugging RCU, etc.

[...]

> > > Functions called from there:
> > > asm_nmi_enter() -> nmi_enter() -> task_isolation_kernel_enter()
> > > asm_nmi_exit() -> nmi_exit() -> task_isolation_kernel_return()
> > > 
> > > Handlers:
> > > do_serror() -> nmi_enter() -> task_isolation_kernel_enter()
> > >   or task_isolation_kernel_enter()
> > > el1_sync_handler() -> task_isolation_kernel_enter()
> > > el0_sync_handler() -> task_isolation_kernel_enter()
> > > el0_sync_compat_handler() -> task_isolation_kernel_enter()
> > > 
> > > handle_arch_irq() is irqchip-specific, most call
> > > handle_domain_irq()
> > > There is a separate patch for irqchips that do not follow this
> > > rule.
> > > 
> > > handle_domain_irq() -> task_isolation_kernel_enter()
> > > do_handle_IPI() -> task_isolation_kernel_enter() (may be redundant)
> > > nmi_enter() -> task_isolation_kernel_enter()
> > 
> > The IRQ cases look very odd to me. With the rework I've just done
> > for arm64, we'll do the regular context tracking accounting before
> > we ever get into handle_domain_irq() or similar, so I suspect that's
> > not necessary at all?
> 
> The goal is to call task_isolation_kernel_enter() before anything that
> depends on a CPU state, including pipeline, that could remain un-
> synchronized when the rest of the kernel was sending synchronization
> IPIs. Similarly task_isolation_kernel_return() should be called when it
> is safe to turn off synchronization. If rework allows it to be done
> earlier, there is no need to touch more specific functions.

Sure; I think that's sorted as a result of the changes I made recently.

> 
> > --- a/arch/arm64/include/asm/barrier.h
> > > +++ b/arch/arm64/include/asm/barrier.h
> > > @@ -49,6 +49,7 @@
> > >  #define dma_rmb()	dmb(oshld)
> > >  #define dma_wmb()	dmb(oshst)
> > >  
> > > +#define instr_sync()	isb()
> > 
> > I think I've asked on prior versions of the patchset, but what is
> > this for? Where is it going to be used, and what is the expected
> > semantics?  I'm wary of exposing this outside of arch code because
> > there aren't strong cross-architectural semantics, and at the least
> > this requires some documentation.
> 
> This is intended as an instruction pipeline flush for the situation
> when arch-independent code has to synchronize with potential changes
> that it missed. This is necessary after some other CPUs could modify
> code (and send IPIs to notify the rest but not isolated CPU) while this
> one was still running isolated task or, more likely, exiting from it,
> so it might be unlucky enough to pick the old instructions before that
> point.
> 
> It's only used on kernel entry.

Sure. My point is that instr_sync() is a very generic sounding name
that doesn't get any of that across, and it's entirely undocumented.

I think something like arch_simulate_kick_cpu() would be better to get
the intended semantic across, and we should add thorough documentation
somewhere as to what this is meant to do.

Thanks,
Mark.
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1515f6f153a0..fc958d8d8945 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -141,6 +141,7 @@  config ARM64
 	select HAVE_ARCH_PREL32_RELOCATIONS
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_STACKLEAK
+	select HAVE_ARCH_TASK_ISOLATION
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index c3009b0e5239..ad5a6dd380cf 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -49,6 +49,7 @@ 
 #define dma_rmb()	dmb(oshld)
 #define dma_wmb()	dmb(oshst)
 
+#define instr_sync()	isb()
 /*
  * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
  * and 0 otherwise.
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 1fbab854a51b..3321c69c46fe 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -68,6 +68,7 @@  void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
 #define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
 #define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
+#define TIF_TASK_ISOLATION	7	/* task isolation enabled for task */
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
 #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
@@ -87,6 +88,7 @@  void arch_release_task_struct(struct task_struct *tsk);
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_TASK_ISOLATION	(1 << TIF_TASK_ISOLATION)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
@@ -101,11 +103,12 @@  void arch_release_task_struct(struct task_struct *tsk);
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-				 _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT)
+				 _TIF_UPROBE | _TIF_FSCHECK | \
+				 _TIF_MTE_ASYNC_FAULT | _TIF_TASK_ISOLATION)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
-				 _TIF_SYSCALL_EMU)
+				 _TIF_SYSCALL_EMU | _TIF_TASK_ISOLATION)
 
 #ifdef CONFIG_SHADOW_CALL_STACK
 #define INIT_SCS							\
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 43d4c329775f..8152760de683 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -8,6 +8,7 @@ 
 #include <linux/context_tracking.h>
 #include <linux/ptrace.h>
 #include <linux/thread_info.h>
+#include <linux/isolation.h>
 
 #include <asm/cpufeature.h>
 #include <asm/daifflags.h>
@@ -77,6 +78,8 @@  asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
+	task_isolation_kernel_enter();
+
 	switch (ESR_ELx_EC(esr)) {
 	case ESR_ELx_EC_DABT_CUR:
 	case ESR_ELx_EC_IABT_CUR:
@@ -249,6 +252,8 @@  asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
+	task_isolation_kernel_enter();
+
 	switch (ESR_ELx_EC(esr)) {
 	case ESR_ELx_EC_SVC64:
 		el0_svc(regs);
@@ -321,6 +326,8 @@  asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
+	task_isolation_kernel_enter();
+
 	switch (ESR_ELx_EC(esr)) {
 	case ESR_ELx_EC_SVC32:
 		el0_svc_compat(regs);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index f49b349e16a3..2941f2b16796 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -29,6 +29,7 @@ 
 #include <linux/regset.h>
 #include <linux/tracehook.h>
 #include <linux/elf.h>
+#include <linux/isolation.h>
 
 #include <asm/compat.h>
 #include <asm/cpufeature.h>
@@ -1803,6 +1804,15 @@  int syscall_trace_enter(struct pt_regs *regs)
 			return NO_SYSCALL;
 	}
 
+	/*
+	 * In task isolation mode, we may prevent the syscall from
+	 * running, and if so we also deliver a signal to the process.
+	 */
+	if (test_thread_flag(TIF_TASK_ISOLATION)) {
+		if (task_isolation_syscall(regs->syscallno) == -1)
+			return NO_SYSCALL;
+	}
+
 	/* Do the secure computing after ptrace; failures should be fast. */
 	if (secure_computing() == -1)
 		return NO_SYSCALL;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index a8184cad8890..e3a82b75e39d 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -20,6 +20,7 @@ 
 #include <linux/tracehook.h>
 #include <linux/ratelimit.h>
 #include <linux/syscalls.h>
+#include <linux/isolation.h>
 
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
@@ -911,6 +912,11 @@  static void do_signal(struct pt_regs *regs)
 	restore_saved_sigmask();
 }
 
+#define NOTIFY_RESUME_LOOP_FLAGS \
+	(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
+	 _TIF_FOREIGN_FPSTATE | _TIF_UPROBE | _TIF_FSCHECK | \
+	 _TIF_MTE_ASYNC_FAULT)
+
 asmlinkage void do_notify_resume(struct pt_regs *regs,
 				 unsigned long thread_flags)
 {
@@ -921,6 +927,8 @@  asmlinkage void do_notify_resume(struct pt_regs *regs,
 	 */
 	trace_hardirqs_off();
 
+	task_isolation_before_pending_work_check();
+
 	do {
 		/* Check valid user FS if needed */
 		addr_limit_user_check();
@@ -956,7 +964,10 @@  asmlinkage void do_notify_resume(struct pt_regs *regs,
 
 		local_daif_mask();
 		thread_flags = READ_ONCE(current_thread_info()->flags);
-	} while (thread_flags & _TIF_WORK_MASK);
+	} while (thread_flags & NOTIFY_RESUME_LOOP_FLAGS);
+
+	if (thread_flags & _TIF_TASK_ISOLATION)
+		task_isolation_start();
 }
 
 unsigned long __ro_after_init signal_minsigstksz;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 18e9727d3f64..4401eac4710c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -33,6 +33,7 @@ 
 #include <linux/kernel_stat.h>
 #include <linux/kexec.h>
 #include <linux/kvm_host.h>
+#include <linux/isolation.h>
 
 #include <asm/alternative.h>
 #include <asm/atomic.h>
@@ -890,6 +891,8 @@  static void do_handle_IPI(int ipinr)
 {
 	unsigned int cpu = smp_processor_id();
 
+	task_isolation_kernel_enter();
+
 	if ((unsigned)ipinr < NR_IPI)
 		trace_ipi_entry_rcuidle(ipi_types[ipinr]);