diff mbox series

[v7] um: Enable preemption in UML

Message ID 20230928091042.1422353-1-anton.ivanov@cambridgegreys.com
State Superseded
Headers show
Series [v7] um: Enable preemption in UML | expand

Commit Message

Anton Ivanov Sept. 28, 2023, 9:10 a.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

1. Preemption requires saving/restoring FPU state. This patch
adds support for it using GCC intrinsics as well as appropriate
storage space in the thread structure. We reuse the space
which is already allocated for the userspace threads in the
thread_info structure.

2. irq critical sections need preempt_disable()/preempt_enable().

3. TLB critical sections need preempt_disable()/preempt_enable().

4. UML TLB flush is also invoked during a fork. This happens
with interrupts and preempt disabled which disagrees with the
standard mm locking via rwsem. The mm lock for this code path
had to be replaced with an rcu.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/Kconfig                   |  2 +-
 arch/um/include/asm/fpu/api.h     |  9 ++--
 arch/um/include/asm/thread_info.h |  2 +-
 arch/um/kernel/Makefile           |  4 ++
 arch/um/kernel/fpu.c              | 75 +++++++++++++++++++++++++++++++
 arch/um/kernel/irq.c              |  2 +
 arch/um/kernel/tlb.c              | 12 +++++
 7 files changed, 101 insertions(+), 5 deletions(-)
 create mode 100644 arch/um/kernel/fpu.c

Comments

Anton Ivanov Sept. 28, 2023, 9:12 a.m. UTC | #1
On 28/09/2023 10:10, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> 1. Preemption requires saving/restoring FPU state. This patch
> adds support for it using GCC intrinsics as well as appropriate
> storage space in the thread structure. We reuse the space
> which is already allocated for the userspace threads in the
> thread_info structure.
> 
> 2. irq critical sections need preempt_disable()/preempt_enable().
> 
> 3. TLB critical sections need preempt_disable()/preempt_enable().
> 
> 4. UML TLB flush is also invoked during a fork. This happens
> with interrupts and preempt disabled which disagrees with the
> standard mm locking via rwsem. The mm lock for this code path
> had to be replaced with an rcu.

Cleanup, reuse of existing allocated space (it was unused in kernel
and kernel threads).

This is intended to go on top of the VM cleanup series.

Tested including avx benchmarks with simultaneous kernel FPU use (the
RAID6 benchmarks).

> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   arch/um/Kconfig                   |  2 +-
>   arch/um/include/asm/fpu/api.h     |  9 ++--
>   arch/um/include/asm/thread_info.h |  2 +-
>   arch/um/kernel/Makefile           |  4 ++
>   arch/um/kernel/fpu.c              | 75 +++++++++++++++++++++++++++++++
>   arch/um/kernel/irq.c              |  2 +
>   arch/um/kernel/tlb.c              | 12 +++++
>   7 files changed, 101 insertions(+), 5 deletions(-)
>   create mode 100644 arch/um/kernel/fpu.c
> 
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index b5e179360534..19176fde82f3 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -11,7 +11,7 @@ config UML
>   	select ARCH_HAS_KCOV
>   	select ARCH_HAS_STRNCPY_FROM_USER
>   	select ARCH_HAS_STRNLEN_USER
> -	select ARCH_NO_PREEMPT
> +	select ARCH_NO_PREEMPT_DYNAMIC
>   	select HAVE_ARCH_AUDITSYSCALL
>   	select HAVE_ARCH_KASAN if X86_64
>   	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
> index 71bfd9ef3938..9e7680bf48f0 100644
> --- a/arch/um/include/asm/fpu/api.h
> +++ b/arch/um/include/asm/fpu/api.h
> @@ -4,12 +4,15 @@
>   
>   /* Copyright (c) 2020 Cambridge Greys Ltd
>    * Copyright (c) 2020 Red Hat Inc.
> - * A set of "dummy" defines to allow the direct inclusion
> - * of x86 optimized copy, xor, etc routines into the
> - * UML code tree. */
> + */
>   
> +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
> +extern void kernel_fpu_begin(void);
> +extern void kernel_fpu_end(void);
> +#else
>   #define kernel_fpu_begin() (void)0
>   #define kernel_fpu_end() (void)0
> +#endif
>   
>   static inline bool irq_fpu_usable(void)
>   {
> diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
> index c7b4b49826a2..02935c37d58f 100644
> --- a/arch/um/include/asm/thread_info.h
> +++ b/arch/um/include/asm/thread_info.h
> @@ -23,7 +23,7 @@ struct thread_info {
>   	int			preempt_count;  /* 0 => preemptable,
>   						   <0 => BUG */
>   	struct thread_info	*real_thread;    /* Points to non-IRQ stack */
> -	unsigned long aux_fp_regs[FP_SIZE];	/* auxiliary fp_regs to save/restore
> +	unsigned long aux_fp_regs[FP_SIZE] __aligned(64);	/* auxiliary fp_regs to save/restore
>   						   them out-of-band */
>   };
>   
> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
> index 811188be954c..c616e884a488 100644
> --- a/arch/um/kernel/Makefile
> +++ b/arch/um/kernel/Makefile
> @@ -26,9 +26,13 @@ obj-$(CONFIG_OF) += dtb.o
>   obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
>   obj-$(CONFIG_STACKTRACE) += stacktrace.o
>   obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
> +obj-$(CONFIG_PREEMPT) += fpu.o
> +obj-$(CONFIG_PREEMPT_VOLUNTARY) += fpu.o
>   
>   USER_OBJS := config.o
>   
> +CFLAGS_fpu.o += -mxsave -mxsaveopt
> +
>   include $(srctree)/arch/um/scripts/Makefile.rules
>   
>   targets := config.c config.tmp capflags.c
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..fb9eb6678e01
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2023 Cambridge Greys Ltd
> + * Copyright (C) 2023 Red Hat Inc
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <asm/fpu/api.h>
> +#include <asm/cpufeature.h>
> +
> +/*
> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
> + */
> +
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +
> +/* UML and driver code it pulls out of the x86 tree knows about 387 features
> + * up to and including AVX512. TILE, etc are not yet supported.
> + */
> +
> +#define KNOWN_387_FEATURES 0xFF
> +
> +void kernel_fpu_begin(void)
> +{
> +	preempt_disable();
> +
> +	WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> +	this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
> +	else {
> +		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +			__builtin_ia32_xsave64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave64(&current_thread_info()->aux_fp_regs);
> +	}
> +#else
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt(&current->aux_fp_regs, KNOWN_387_FEATURES);
> +	else {
> +		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +			__builtin_ia32_xsave(&current->aux_fp_regs, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave(&current->aux_fp_regs);
> +	}
> +#endif
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
> +
> +void kernel_fpu_end(void)
> +{
> +	WARN_ON(!this_cpu_read(in_kernel_fpu));
> +
> +#ifdef CONFIG_64BIT
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +		__builtin_ia32_xrstor64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
> +	else
> +		__builtin_ia32_fxrstor64(&current_thread_info()->aux_fp_regs);
> +#else
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +		__builtin_ia32_xrstor(&current_tread_info()->aux_fp_regs, KNOWN_387_FEATURES);
> +	else
> +		__builtin_ia32_fxrstor(&current_thread_info()->aux_fp_regs);
> +#endif
> +	this_cpu_write(in_kernel_fpu, false);
> +
> +	preempt_enable();
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
> +
> diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
> index 635d44606bfe..c02525da45df 100644
> --- a/arch/um/kernel/irq.c
> +++ b/arch/um/kernel/irq.c
> @@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
>   
>   void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
>   {
> +	preempt_disable();
>   	_sigio_handler(regs, irqs_suspended);
> +	preempt_enable();
>   }
>   
>   static struct irq_entry *get_irq_entry_by_fd(int fd)
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 247396b732e5..862174a2a3f5 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -322,6 +322,8 @@ static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
>   	unsigned long addr = start_addr, next;
>   	int ret = 0, userspace = 1;
>   
> +	preempt_disable();
> +
>   	hvc = INIT_HVC(mm, force, userspace);
>   	pgd = pgd_offset(mm, addr);
>   	do {
> @@ -346,6 +348,7 @@ static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
>   		       "process: %d\n", task_tgid_vnr(current));
>   		mm_idp->kill = 1;
>   	}
> +	preempt_enable();
>   }
>   
>   static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
> @@ -362,6 +365,9 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
>   
>   	mm = &init_mm;
>   	hvc = INIT_HVC(mm, force, userspace);
> +
> +	preempt_disable();
> +
>   	for (addr = start; addr < end;) {
>   		pgd = pgd_offset(mm, addr);
>   		if (!pgd_present(*pgd)) {
> @@ -449,6 +455,9 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
>   
>   	if (err < 0)
>   		panic("flush_tlb_kernel failed, errno = %d\n", err);
> +
> +	preempt_enable();
> +
>   	return updated;
>   }
>   
> @@ -466,6 +475,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
>   
>   	address &= PAGE_MASK;
>   
> +	preempt_disable();
> +
>   	pgd = pgd_offset(mm, address);
>   	if (!pgd_present(*pgd))
>   		goto kill;
> @@ -520,6 +531,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
>   
>   	*pte = pte_mkuptodate(*pte);
>   
> +	preempt_enable();
>   	return;
>   
>   kill:
Johannes Berg March 28, 2024, 9:27 a.m. UTC | #2
> @@ -23,7 +23,7 @@ struct thread_info {
>  	int			preempt_count;  /* 0 => preemptable,
>  						   <0 => BUG */
>  	struct thread_info	*real_thread;    /* Points to non-IRQ stack */
> -	unsigned long aux_fp_regs[FP_SIZE];	/* auxiliary fp_regs to save/restore
> +	unsigned long aux_fp_regs[FP_SIZE] __aligned(64);	/* auxiliary fp_regs to save/restore
>  						   them out-of-band */

nit: that comment looks strange now, maybe pull it up before the member?

 /* auxiliary ... out-of-band */
 unsigned long aux_fp_regs[...] __aligned(64);


> +#ifdef CONFIG_64BIT
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
> +	else {
> +		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +			__builtin_ia32_xsave64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave64(&current_thread_info()->aux_fp_regs);
> +	}

Why not write this as a chain?

	if (likely(cpu_has(...))
		__builtin_ia32_xsaveopt64(...);
	else if (likely(cpu_has(...)))
		__builtin_ia32_xave64(...);
	else
		__builtin_ia32_fxsave64(...);


and IMHO pulling the "&current_thread_info()->aux_fp_regs" that appears
on all three of them into a local variable would make that more readable
too.

> +#else
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt(&current->aux_fp_regs, KNOWN_387_FEATURES);
> +	else {
> +		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +			__builtin_ia32_xsave(&current->aux_fp_regs, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave(&current->aux_fp_regs);
> +	}
> +#endif

And all of the above also applies for 32-bit,

> +void kernel_fpu_end(void)

and this as well.

johannes
Anton Ivanov March 28, 2024, 2:40 p.m. UTC | #3
On 28/03/2024 09:27, Johannes Berg wrote:
> 
>> @@ -23,7 +23,7 @@ struct thread_info {
>>   	int			preempt_count;  /* 0 => preemptable,
>>   						   <0 => BUG */
>>   	struct thread_info	*real_thread;    /* Points to non-IRQ stack */
>> -	unsigned long aux_fp_regs[FP_SIZE];	/* auxiliary fp_regs to save/restore
>> +	unsigned long aux_fp_regs[FP_SIZE] __aligned(64);	/* auxiliary fp_regs to save/restore
>>   						   them out-of-band */
> 
> nit: that comment looks strange now, maybe pull it up before the member?
> 
>   /* auxiliary ... out-of-band */
>   unsigned long aux_fp_regs[...] __aligned(64);
> 

Ack.

> 
>> +#ifdef CONFIG_64BIT
>> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>> +		__builtin_ia32_xsaveopt64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
>> +	else {
>> +		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
>> +			__builtin_ia32_xsave64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
>> +		else
>> +			__builtin_ia32_fxsave64(&current_thread_info()->aux_fp_regs);
>> +	}
> 
> Why not write this as a chain?
> 
> 	if (likely(cpu_has(...))
> 		__builtin_ia32_xsaveopt64(...);
> 	else if (likely(cpu_has(...)))
> 		__builtin_ia32_xave64(...);
> 	else
> 		__builtin_ia32_fxsave64(...);
> 
> 
> and IMHO pulling the "&current_thread_info()->aux_fp_regs" that appears
> on all three of them into a local variable would make that more readable
> too.

Ack.

> 
>> +#else
>> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>> +		__builtin_ia32_xsaveopt(&current->aux_fp_regs, KNOWN_387_FEATURES);
>> +	else {
>> +		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
>> +			__builtin_ia32_xsave(&current->aux_fp_regs, KNOWN_387_FEATURES);
>> +		else
>> +			__builtin_ia32_fxsave(&current->aux_fp_regs);
>> +	}
>> +#endif
> 
> And all of the above also applies for 32-bit,

Ack.

> 
>> +void kernel_fpu_end(void)
> 
> and this as well.
> 
> johannes
> 
> 

Will fix, rebase, retest and resubmit.
diff mbox series

Patch

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index b5e179360534..19176fde82f3 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -11,7 +11,7 @@  config UML
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_STRNCPY_FROM_USER
 	select ARCH_HAS_STRNLEN_USER
-	select ARCH_NO_PREEMPT
+	select ARCH_NO_PREEMPT_DYNAMIC
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_KASAN if X86_64
 	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
index 71bfd9ef3938..9e7680bf48f0 100644
--- a/arch/um/include/asm/fpu/api.h
+++ b/arch/um/include/asm/fpu/api.h
@@ -4,12 +4,15 @@ 
 
 /* Copyright (c) 2020 Cambridge Greys Ltd
  * Copyright (c) 2020 Red Hat Inc.
- * A set of "dummy" defines to allow the direct inclusion
- * of x86 optimized copy, xor, etc routines into the
- * UML code tree. */
+ */
 
+#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
+#else
 #define kernel_fpu_begin() (void)0
 #define kernel_fpu_end() (void)0
+#endif
 
 static inline bool irq_fpu_usable(void)
 {
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index c7b4b49826a2..02935c37d58f 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -23,7 +23,7 @@  struct thread_info {
 	int			preempt_count;  /* 0 => preemptable,
 						   <0 => BUG */
 	struct thread_info	*real_thread;    /* Points to non-IRQ stack */
-	unsigned long aux_fp_regs[FP_SIZE];	/* auxiliary fp_regs to save/restore
+	unsigned long aux_fp_regs[FP_SIZE] __aligned(64);	/* auxiliary fp_regs to save/restore
 						   them out-of-band */
 };
 
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 811188be954c..c616e884a488 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -26,9 +26,13 @@  obj-$(CONFIG_OF) += dtb.o
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
+obj-$(CONFIG_PREEMPT) += fpu.o
+obj-$(CONFIG_PREEMPT_VOLUNTARY) += fpu.o
 
 USER_OBJS := config.o
 
+CFLAGS_fpu.o += -mxsave -mxsaveopt
+
 include $(srctree)/arch/um/scripts/Makefile.rules
 
 targets := config.c config.tmp capflags.c
diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
new file mode 100644
index 000000000000..fb9eb6678e01
--- /dev/null
+++ b/arch/um/kernel/fpu.c
@@ -0,0 +1,75 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Cambridge Greys Ltd
+ * Copyright (C) 2023 Red Hat Inc
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <asm/fpu/api.h>
+#include <asm/cpufeature.h>
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ */
+
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+/* UML and driver code it pulls out of the x86 tree knows about 387 features
+ * up to and including AVX512. TILE, etc are not yet supported.
+ */
+
+#define KNOWN_387_FEATURES 0xFF
+
+void kernel_fpu_begin(void)
+{
+	preempt_disable();
+
+	WARN_ON(this_cpu_read(in_kernel_fpu));
+
+	this_cpu_write(in_kernel_fpu, true);
+
+#ifdef CONFIG_64BIT
+	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
+		__builtin_ia32_xsaveopt64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
+	else {
+		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+			__builtin_ia32_xsave64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
+		else
+			__builtin_ia32_fxsave64(&current_thread_info()->aux_fp_regs);
+	}
+#else
+	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
+		__builtin_ia32_xsaveopt(&current->aux_fp_regs, KNOWN_387_FEATURES);
+	else {
+		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+			__builtin_ia32_xsave(&current->aux_fp_regs, KNOWN_387_FEATURES);
+		else
+			__builtin_ia32_fxsave(&current->aux_fp_regs);
+	}
+#endif
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+	WARN_ON(!this_cpu_read(in_kernel_fpu));
+
+#ifdef CONFIG_64BIT
+	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+		__builtin_ia32_xrstor64(&current_thread_info()->aux_fp_regs, KNOWN_387_FEATURES);
+	else
+		__builtin_ia32_fxrstor64(&current_thread_info()->aux_fp_regs);
+#else
+	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+		__builtin_ia32_xrstor(&current_tread_info()->aux_fp_regs, KNOWN_387_FEATURES);
+	else
+		__builtin_ia32_fxrstor(&current_thread_info()->aux_fp_regs);
+#endif
+	this_cpu_write(in_kernel_fpu, false);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 635d44606bfe..c02525da45df 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -195,7 +195,9 @@  static void _sigio_handler(struct uml_pt_regs *regs,
 
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
+	preempt_disable();
 	_sigio_handler(regs, irqs_suspended);
+	preempt_enable();
 }
 
 static struct irq_entry *get_irq_entry_by_fd(int fd)
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 247396b732e5..862174a2a3f5 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -322,6 +322,8 @@  static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 	unsigned long addr = start_addr, next;
 	int ret = 0, userspace = 1;
 
+	preempt_disable();
+
 	hvc = INIT_HVC(mm, force, userspace);
 	pgd = pgd_offset(mm, addr);
 	do {
@@ -346,6 +348,7 @@  static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 		       "process: %d\n", task_tgid_vnr(current));
 		mm_idp->kill = 1;
 	}
+	preempt_enable();
 }
 
 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
@@ -362,6 +365,9 @@  static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 
 	mm = &init_mm;
 	hvc = INIT_HVC(mm, force, userspace);
+
+	preempt_disable();
+
 	for (addr = start; addr < end;) {
 		pgd = pgd_offset(mm, addr);
 		if (!pgd_present(*pgd)) {
@@ -449,6 +455,9 @@  static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 
 	if (err < 0)
 		panic("flush_tlb_kernel failed, errno = %d\n", err);
+
+	preempt_enable();
+
 	return updated;
 }
 
@@ -466,6 +475,8 @@  void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 
 	address &= PAGE_MASK;
 
+	preempt_disable();
+
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
 		goto kill;
@@ -520,6 +531,7 @@  void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 
 	*pte = pte_mkuptodate(*pte);
 
+	preempt_enable();
 	return;
 
 kill: