diff mbox series

[v2] um: Enable preemption in UML

Message ID 20230921092836.190291-1-anton.ivanov@cambridgegreys.com
State Superseded
Headers show
Series [v2] um: Enable preemption in UML | expand

Commit Message

Anton Ivanov Sept. 21, 2023, 9:28 a.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Preemption requires saving/restoring FPU state. This patch
adds support for it using GCC intrinsics.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/Kconfig                         |  1 -
 arch/um/Makefile                        |  3 +-
 arch/um/include/asm/fpu/api.h           |  4 +-
 arch/um/include/asm/processor-generic.h |  3 +
 arch/um/kernel/Makefile                 |  2 +-
 arch/um/kernel/fpu.c                    | 83 +++++++++++++++++++++++++
 arch/um/kernel/irq.c                    |  2 +
 7 files changed, 93 insertions(+), 5 deletions(-)
 create mode 100644 arch/um/kernel/fpu.c

Comments

Anton Ivanov Sept. 21, 2023, 9:34 a.m. UTC | #1
On 21/09/2023 10:28, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Preemption requires saving/restoring FPU state. This patch
> adds support for it using GCC intrinsics.

Marginally slower than before

3m 03s for find /usr -type f -exec cat {} > /dev/null \;

Still, better than the non-PREEMPT 20 minutes for the same job

Tested including the use of stress: stress --cpu 16 --io 16 --hdd 16 --timeout 60

This test was making non-PREEMPT unusable. You had to wait until it completes to get it to
respond to anything. PREEMPT remains usable and responsive throughout.

All in all - we have to go back and finish the CPU flags translation work as we really
need working alternatives and/or borrowing the CPU stuff from the x86 tree instead of
reimplementing it.

> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   arch/um/Kconfig                         |  1 -
>   arch/um/Makefile                        |  3 +-
>   arch/um/include/asm/fpu/api.h           |  4 +-
>   arch/um/include/asm/processor-generic.h |  3 +
>   arch/um/kernel/Makefile                 |  2 +-
>   arch/um/kernel/fpu.c                    | 83 +++++++++++++++++++++++++
>   arch/um/kernel/irq.c                    |  2 +
>   7 files changed, 93 insertions(+), 5 deletions(-)
>   create mode 100644 arch/um/kernel/fpu.c
> 
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index b5e179360534..603f5fd82293 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -11,7 +11,6 @@ config UML
>   	select ARCH_HAS_KCOV
>   	select ARCH_HAS_STRNCPY_FROM_USER
>   	select ARCH_HAS_STRNLEN_USER
> -	select ARCH_NO_PREEMPT
>   	select HAVE_ARCH_AUDITSYSCALL
>   	select HAVE_ARCH_KASAN if X86_64
>   	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/Makefile b/arch/um/Makefile
> index 82f05f250634..6454f735cc9a 100644
> --- a/arch/um/Makefile
> +++ b/arch/um/Makefile
> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
>   	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap	\
>   	-Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
>   	-Din6addr_loopback=kernel_in6addr_loopback \
> -	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
> +	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
> +	-mxsave -mxsaveopt
>   
>   KBUILD_RUSTFLAGS += -Crelocation-model=pie
>   
> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
> index 71bfd9ef3938..0094624ae9b4 100644
> --- a/arch/um/include/asm/fpu/api.h
> +++ b/arch/um/include/asm/fpu/api.h
> @@ -8,8 +8,8 @@
>    * of x86 optimized copy, xor, etc routines into the
>    * UML code tree. */
>   
> -#define kernel_fpu_begin() (void)0
> -#define kernel_fpu_end() (void)0
> +void kernel_fpu_begin(void);
> +void kernel_fpu_end(void);
>   
>   static inline bool irq_fpu_usable(void)
>   {
> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> index 7414154b8e9a..d5924d0e16a7 100644
> --- a/arch/um/include/asm/processor-generic.h
> +++ b/arch/um/include/asm/processor-generic.h
> @@ -44,6 +44,9 @@ struct thread_struct {
>   			} cb;
>   		} u;
>   	} request;
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> +    u8 fpu[2048] __aligned(64); /* Intel docs require xsave/xrestore area to be aligned to 16 bytes */
> +#endif
>   };
>   
>   #define INIT_THREAD \
> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
> index 811188be954c..5d9fbaa544be 100644
> --- a/arch/um/kernel/Makefile
> +++ b/arch/um/kernel/Makefile
> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
>   
>   obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
>   	physmem.o process.o ptrace.o reboot.o sigio.o \
> -	signal.o sysrq.o time.o tlb.o trap.o \
> +	signal.o sysrq.o time.o tlb.o trap.o fpu.o\
>   	um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
>   obj-y += load_file.o
>   
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..fb90da5a0227
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> @@ -0,0 +1,83 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2023 Cambridge Greys Ltd
> + * Copyright (C) 2023 Red Hat Inc
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <asm/fpu/api.h>
> +#include <asm/cpufeature.h>
> +
> +/*
> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
> + */
> +
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +#endif
> +
> +/* UML knows about 387 features up to and including AVX512, tile, etc are not yet
> + * supported.
> + */
> +
> +#define KNOWN_387_FEATURES 0xFF
> +
> +
> +void kernel_fpu_begin(void)
> +{
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> +	preempt_disable();
> +
> +	WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> +	this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else {
> +		if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
> +			__builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave64(&current->thread.fpu);
> +	}
> +#else
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else {
> +		if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
> +			__builtin_ia32_xsave(&current->thread.fpu, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave(&current->thread.fpu);
> +	}
> +#endif
> +#endif
> +}
> +
> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
> +
> +void kernel_fpu_end(void)
> +{
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> +	WARN_ON(!this_cpu_read(in_kernel_fpu));
> +
> +#ifdef CONFIG_64BIT
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +		__builtin_ia32_xrstor64(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else
> +		__builtin_ia32_fxrstor64(&current->thread.fpu);
> +#else
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +		__builtin_ia32_xrstor(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else
> +		__builtin_ia32_fxrstor(&current->thread.fpu);
> +#endif
> +	this_cpu_write(in_kernel_fpu, false);
> +
> +	preempt_enable();
> +#endif
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
> +
> diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
> index 635d44606bfe..c02525da45df 100644
> --- a/arch/um/kernel/irq.c
> +++ b/arch/um/kernel/irq.c
> @@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
>   
>   void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
>   {
> +	preempt_disable();
>   	_sigio_handler(regs, irqs_suspended);
> +	preempt_enable();
>   }
>   
>   static struct irq_entry *get_irq_entry_by_fd(int fd)
Johannes Berg Sept. 21, 2023, 9:35 a.m. UTC | #2
Hi,

Haven't chimed in here before, but thanks for looking at this! I
actually really wanted at least the things you get for debug from this
in the past, so much appreciated.

I think I actually tried a simpler approach and it kind of even worked,
but my userspace likely has very little FPU usage.


> +/* UML knows about 387 features up to and including AVX512, tile, etc are not yet
> + * supported.
> + */

Do you ensure that somehow? Warn if it's there at least? I may have
missed that.

Or is it just for the saving? But what if you don't save enough and the
host CPU has more?


> +void kernel_fpu_begin(void)
> +{
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)

maybe leave it as an inline in case it'd be empty, i.e. neither of these
are defined?

> +	preempt_disable();
> +
> +	WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> +	this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else {
> +		if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
> +			__builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave64(&current->thread.fpu);
> +	}

nit: you could make all of these else if chains, and avoid the braces:

if (likely(...))
	...
else if (cpu_has(...))
	...
else
	...


> +void kernel_fpu_end(void)

same comments as above apply here.


>  void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
>  {
> +	preempt_disable();
>  	_sigio_handler(regs, irqs_suspended);
> +	preempt_enable();
> 

I may have to look at those time-travel bits here :)

Thanks,
johannes
Anton Ivanov Sept. 21, 2023, 9:42 a.m. UTC | #3
On 21/09/2023 10:35, Johannes Berg wrote:
> Hi,
> 
> Haven't chimed in here before, but thanks for looking at this! I
> actually really wanted at least the things you get for debug from this
> in the past, so much appreciated.
> 
> I think I actually tried a simpler approach and it kind of even worked,
> but my userspace likely has very little FPU usage.
> 
> 
>> +/* UML knows about 387 features up to and including AVX512, tile, etc are not yet
>> + * supported.
>> + */
> 
> Do you ensure that somehow? Warn if it's there at least? I may have
> missed that.

We can if we want. We can just cut-off feature reflection into the kernel in the
setup. Presently it reflects all features. That can be filtered if we want to.
For example - x86 has boot flags to disable xrestore and xrestoreopt

> 
> Or is it just for the saving? But what if you don't save enough and the
> host CPU has more?

This is just for the saving. AFAIK no linux module touches _TILE and the other recent
fancy stuff. The host kernel source looks that way.

> 
> 
>> +void kernel_fpu_begin(void)
>> +{
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> 
> maybe leave it as an inline in case it'd be empty, i.e. neither of these
> are defined?
> 
>> +	preempt_disable();
>> +
>> +	WARN_ON(this_cpu_read(in_kernel_fpu));
>> +
>> +	this_cpu_write(in_kernel_fpu, true);
>> +
>> +#ifdef CONFIG_64BIT
>> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>> +		__builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
>> +	else {
>> +		if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
>> +			__builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
>> +		else
>> +			__builtin_ia32_fxsave64(&current->thread.fpu);
>> +	}
> 
> nit: you could make all of these else if chains, and avoid the braces:
> 
> if (likely(...))
> 	...
> else if (cpu_has(...))
> 	...
> else
> 	...
> 
> 
>> +void kernel_fpu_end(void)
> 
> same comments as above apply here.
> 
> 
>>   void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
>>   {
>> +	preempt_disable();
>>   	_sigio_handler(regs, irqs_suspended);
>> +	preempt_enable();
>>
> 
> I may have to look at those time-travel bits here :)

Yes. I was going to ask you to double check if they all work as intended.

Basic use seems without tt seems OK.

> 
> Thanks,
> johannes
> 
> _______________________________________________
> linux-um mailing list
> linux-um@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-um
>
Anton Ivanov Sept. 21, 2023, 10:32 a.m. UTC | #4
On 21/09/2023 10:42, Anton Ivanov wrote:
> 
> 
> On 21/09/2023 10:35, Johannes Berg wrote:
>> Hi,
>>
>> Haven't chimed in here before, but thanks for looking at this! I
>> actually really wanted at least the things you get for debug from this
>> in the past, so much appreciated.
>>
>> I think I actually tried a simpler approach and it kind of even worked,
>> but my userspace likely has very little FPU usage.
>>
>>
>>> +/* UML knows about 387 features up to and including AVX512, tile, etc are not yet
>>> + * supported.
>>> + */
>>
>> Do you ensure that somehow? Warn if it's there at least? I may have
>> missed that.
> 
> We can if we want. We can just cut-off feature reflection into the kernel in the
> setup. Presently it reflects all features. That can be filtered if we want to.
> For example - x86 has boot flags to disable xrestore and xrestoreopt
> 
>>
>> Or is it just for the saving? But what if you don't save enough and the
>> host CPU has more?
> 
> This is just for the saving. AFAIK no linux module touches _TILE and the other recent
> fancy stuff. The host kernel source looks that way.

This brings back the question of

1. Alternatives (this has become even more nightmarish since I last looked at it)
2. Static branch by key - this actually looks doable and is used by nearly all
crypto, I may look at it when I have some free time.

> 
>>
>>
>>> +void kernel_fpu_begin(void)
>>> +{
>>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>>
>> maybe leave it as an inline in case it'd be empty, i.e. neither of these
>> are defined?
>>
>>> +    preempt_disable();
>>> +
>>> +    WARN_ON(this_cpu_read(in_kernel_fpu));
>>> +
>>> +    this_cpu_write(in_kernel_fpu, true);
>>> +
>>> +#ifdef CONFIG_64BIT
>>> +    if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>>> +        __builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
>>> +    else {
>>> +        if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
>>> +            __builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
>>> +        else
>>> +            __builtin_ia32_fxsave64(&current->thread.fpu);
>>> +    }
>>
>> nit: you could make all of these else if chains, and avoid the braces:
>>
>> if (likely(...))
>>     ...
>> else if (cpu_has(...))
>>     ...
>> else
>>     ...
>>
>>
>>> +void kernel_fpu_end(void)
>>
>> same comments as above apply here.
>>
>>
>>>   void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
>>>   {
>>> +    preempt_disable();
>>>       _sigio_handler(regs, irqs_suspended);
>>> +    preempt_enable();
>>>
>>
>> I may have to look at those time-travel bits here :)
> 
> Yes. I was going to ask you to double check if they all work as intended.
> 
> Basic use seems without tt seems OK.
> 
>>
>> Thanks,
>> johannes
>>
>> _______________________________________________
>> linux-um mailing list
>> linux-um@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-um
>>
>
Peter Lafreniere Sept. 21, 2023, 1:46 p.m. UTC | #5
On Thu, Sep 21, 2023 at 05:28, anton.ivanov@cambridgegreys.com wrote:
> 
> From: Anton Ivanov anton.ivanov@cambridgegreys.com
> 
> 
> Preemption requires saving/restoring FPU state. This patch
> adds support for it using GCC intrinsics.
> 
> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
> 
> ---
> arch/um/Kconfig | 1 -
> arch/um/Makefile | 3 +-
> arch/um/include/asm/fpu/api.h | 4 +-
> arch/um/include/asm/processor-generic.h | 3 +
> arch/um/kernel/Makefile | 2 +-
> arch/um/kernel/fpu.c | 83 +++++++++++++++++++++++++
> arch/um/kernel/irq.c | 2 +
> 7 files changed, 93 insertions(+), 5 deletions(-)
> create mode 100644 arch/um/kernel/fpu.c
> 
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index b5e179360534..603f5fd82293 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -11,7 +11,6 @@ config UML
> select ARCH_HAS_KCOV
> select ARCH_HAS_STRNCPY_FROM_USER
> select ARCH_HAS_STRNLEN_USER
> - select ARCH_NO_PREEMPT
> select HAVE_ARCH_AUDITSYSCALL
> select HAVE_ARCH_KASAN if X86_64
> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/Makefile b/arch/um/Makefile
> index 82f05f250634..6454f735cc9a 100644
> --- a/arch/um/Makefile
> +++ b/arch/um/Makefile
> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
> -Din6addr_loopback=kernel_in6addr_loopback \
> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
> + -mxsave -mxsaveopt
> 
> KBUILD_RUSTFLAGS += -Crelocation-model=pie
> 
> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
> index 71bfd9ef3938..0094624ae9b4 100644
> --- a/arch/um/include/asm/fpu/api.h
> +++ b/arch/um/include/asm/fpu/api.h
> @@ -8,8 +8,8 @@
> * of x86 optimized copy, xor, etc routines into the
> * UML code tree. /
> 
> -#define kernel_fpu_begin() (void)0
> -#define kernel_fpu_end() (void)0
> +void kernel_fpu_begin(void);
> +void kernel_fpu_end(void);
> 
> static inline bool irq_fpu_usable(void)
> {
> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> index 7414154b8e9a..d5924d0e16a7 100644
> --- a/arch/um/include/asm/processor-generic.h
> +++ b/arch/um/include/asm/processor-generic.h
> @@ -44,6 +44,9 @@ struct thread_struct {
> } cb;
> } u;
> } request;
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> + u8 fpu[2048] __aligned(64); / Intel docs require xsave/xrestore area to be aligned to 16 bytes /
> +#endif
> };
> 
> #define INIT_THREAD \
> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
> index 811188be954c..5d9fbaa544be 100644
> --- a/arch/um/kernel/Makefile
> +++ b/arch/um/kernel/Makefile
> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
> 
> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
> physmem.o process.o ptrace.o reboot.o sigio.o \
> - signal.o sysrq.o time.o tlb.o trap.o \
> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\
> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
> obj-y += load_file.o
> 
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..fb90da5a0227
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> @@ -0,0 +1,83 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/
> + * Copyright (C) 2023 Cambridge Greys Ltd
> + * Copyright (C) 2023 Red Hat Inc
> + */
> +
> +#include <linux/cpu.h>
> 
> +#include <linux/init.h>
> 
> +#include <asm/fpu/api.h>
> 
> +#include <asm/cpufeature.h>
> 
> +
> +/*
> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
> + /
> +
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +#endif
> +
> +/ UML knows about 387 features up to and including AVX512, tile, etc are not yet
> + * supported.
> + */

Features that are not supported probably shouldn't be reflected, including
if the appropiate xsave instruction isn't available.

> +
> +#define KNOWN_387_FEATURES 0xFF
> +
> +
> +void kernel_fpu_begin(void)
> +{
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> + preempt_disable();
> +
> + WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> + this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> + __builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
> 
> + else {
> + if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
> + __builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
> 
> + else
> + __builtin_ia32_fxsave64(&current->thread.fpu);
> 
> + }
> +#else
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> + __builtin_ia32_xsaveopt(&current->thread.fpu, KNOWN_387_FEATURES);
> 
> + else {
> + if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
> + __builtin_ia32_xsave(&current->thread.fpu, KNOWN_387_FEATURES);
> 
> + else
> + __builtin_ia32_fxsave(&current->thread.fpu);
> 
> + }
> +#endif
> +#endif
> +}

I would keep kernel_fpu_begin() as an empty macro if !defined(PREEMPT) &&
!defined(PREEMPT_VOLUNTARY) to avoid calling empty functions, then only
compile fpu.c when you need PREEMPT behavior. This has the added bonus of
removing a level of #if in this function.

I agree with Johannes about the if-else chains.

The if-else chains would be a good use for alternatives, but we know how
that works with UML.

And what's the reasoning behind making xsaveopt support likely, but not
xsave in the fallback path?

> +
> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
> +
> +void kernel_fpu_end(void)
> +{
> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
> + WARN_ON(!this_cpu_read(in_kernel_fpu));
> +
> +#ifdef CONFIG_64BIT
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xrstor64(&current->thread.fpu, KNOWN_387_FEATURES);
> 
> + else
> + __builtin_ia32_fxrstor64(&current->thread.fpu);
> 
> +#else
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xrstor(&current->thread.fpu, KNOWN_387_FEATURES);
> 
> + else
> + __builtin_ia32_fxrstor(&current->thread.fpu);
> 
> +#endif
> + this_cpu_write(in_kernel_fpu, false);
> +
> + preempt_enable();
> +#endif
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
> +

See above, minus the likely() part.

> diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
> index 635d44606bfe..c02525da45df 100644
> --- a/arch/um/kernel/irq.c
> +++ b/arch/um/kernel/irq.c
> @@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
> 
> void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
> {
> + preempt_disable();
> _sigio_handler(regs, irqs_suspended);
> + preempt_enable();
> }
> 
> static struct irq_entry *get_irq_entry_by_fd(int fd)
> --
> 2.30.2

As a reminder, compilations with debug.config fail with the following error:

---
In file included from arch/um/kernel/irq.c:19:
./arch/um/include/shared/kern_util.h:53:12: error: conflicting types for ‘__cant_sleep’; have ‘int(void)’
   53 | extern int __cant_sleep(void);
      |            ^~~~~~~~~~~~
In file included from ./include/linux/cpumask.h:10,
                 from arch/um/kernel/irq.c:10:
./include/linux/kernel.h:129:13: note: previous declaration of ‘__cant_sleep’ with type ‘void(const char *, int,  int)’
  129 | extern void __cant_sleep(const char *file, int line, int preempt_offset);
      |             ^~~~~~~~~~~~
---

Cheers,
Peter
Anton Ivanov Sept. 21, 2023, 1:57 p.m. UTC | #6
On 21/09/2023 14:46, Peter Lafreniere wrote:
> On Thu, Sep 21, 2023 at 05:28, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>>
>> Preemption requires saving/restoring FPU state. This patch
>> adds support for it using GCC intrinsics.
>>
>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>> ---
>> arch/um/Kconfig | 1 -
>> arch/um/Makefile | 3 +-
>> arch/um/include/asm/fpu/api.h | 4 +-
>> arch/um/include/asm/processor-generic.h | 3 +
>> arch/um/kernel/Makefile | 2 +-
>> arch/um/kernel/fpu.c | 83 +++++++++++++++++++++++++
>> arch/um/kernel/irq.c | 2 +
>> 7 files changed, 93 insertions(+), 5 deletions(-)
>> create mode 100644 arch/um/kernel/fpu.c
>>
>> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
>> index b5e179360534..603f5fd82293 100644
>> --- a/arch/um/Kconfig
>> +++ b/arch/um/Kconfig
>> @@ -11,7 +11,6 @@ config UML
>> select ARCH_HAS_KCOV
>> select ARCH_HAS_STRNCPY_FROM_USER
>> select ARCH_HAS_STRNLEN_USER
>> - select ARCH_NO_PREEMPT
>> select HAVE_ARCH_AUDITSYSCALL
>> select HAVE_ARCH_KASAN if X86_64
>> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
>> diff --git a/arch/um/Makefile b/arch/um/Makefile
>> index 82f05f250634..6454f735cc9a 100644
>> --- a/arch/um/Makefile
>> +++ b/arch/um/Makefile
>> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
>> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
>> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
>> -Din6addr_loopback=kernel_in6addr_loopback \
>> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
>> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
>> + -mxsave -mxsaveopt
>>
>> KBUILD_RUSTFLAGS += -Crelocation-model=pie
>>
>> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
>> index 71bfd9ef3938..0094624ae9b4 100644
>> --- a/arch/um/include/asm/fpu/api.h
>> +++ b/arch/um/include/asm/fpu/api.h
>> @@ -8,8 +8,8 @@
>> * of x86 optimized copy, xor, etc routines into the
>> * UML code tree. /
>>
>> -#define kernel_fpu_begin() (void)0
>> -#define kernel_fpu_end() (void)0
>> +void kernel_fpu_begin(void);
>> +void kernel_fpu_end(void);
>>
>> static inline bool irq_fpu_usable(void)
>> {
>> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
>> index 7414154b8e9a..d5924d0e16a7 100644
>> --- a/arch/um/include/asm/processor-generic.h
>> +++ b/arch/um/include/asm/processor-generic.h
>> @@ -44,6 +44,9 @@ struct thread_struct {
>> } cb;
>> } u;
>> } request;
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>> + u8 fpu[2048] __aligned(64); / Intel docs require xsave/xrestore area to be aligned to 16 bytes /
>> +#endif
>> };
>>
>> #define INIT_THREAD \
>> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
>> index 811188be954c..5d9fbaa544be 100644
>> --- a/arch/um/kernel/Makefile
>> +++ b/arch/um/kernel/Makefile
>> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
>>
>> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
>> physmem.o process.o ptrace.o reboot.o sigio.o \
>> - signal.o sysrq.o time.o tlb.o trap.o \
>> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\
>> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
>> obj-y += load_file.o
>>
>> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
>> new file mode 100644
>> index 000000000000..fb90da5a0227
>> --- /dev/null
>> +++ b/arch/um/kernel/fpu.c
>> @@ -0,0 +1,83 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/
>> + * Copyright (C) 2023 Cambridge Greys Ltd
>> + * Copyright (C) 2023 Red Hat Inc
>> + */
>> +
>> +#include <linux/cpu.h>
>>
>> +#include <linux/init.h>
>>
>> +#include <asm/fpu/api.h>
>>
>> +#include <asm/cpufeature.h>
>>
>> +
>> +/*
>> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
>> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
>> + /
>> +
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
>> +#endif
>> +
>> +/ UML knows about 387 features up to and including AVX512, tile, etc are not yet
>> + * supported.
>> + */
> Features that are not supported probably shouldn't be reflected, including
> if the appropiate xsave instruction isn't available.

They are OK to use by userspace and the same source is used for /proc/cpuinfo which can be used by userspace to see all available features.

Userspace task switching in UML uses a different mechanism for save/restore which will preserve them (ptrace REGs).

>
>> +
>> +#define KNOWN_387_FEATURES 0xFF
>> +
>> +
>> +void kernel_fpu_begin(void)
>> +{
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>> + preempt_disable();
>> +
>> + WARN_ON(this_cpu_read(in_kernel_fpu));
>> +
>> + this_cpu_write(in_kernel_fpu, true);
>> +
>> +#ifdef CONFIG_64BIT
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>> + __builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else {
>> + if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
>> + __builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else
>> + __builtin_ia32_fxsave64(&current->thread.fpu);
>>
>> + }
>> +#else
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>> + __builtin_ia32_xsaveopt(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else {
>> + if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
>> + __builtin_ia32_xsave(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else
>> + __builtin_ia32_fxsave(&current->thread.fpu);
>>
>> + }
>> +#endif
>> +#endif
>> +}
> I would keep kernel_fpu_begin() as an empty macro if !defined(PREEMPT) &&
> !defined(PREEMPT_VOLUNTARY) to avoid calling empty functions, then only
> compile fpu.c when you need PREEMPT behavior. This has the added bonus of
> removing a level of #if in this function.

Sure. We can. IMHO the compiler will inline it and optimize it out.

>
> I agree with Johannes about the if-else chains.
>
> The if-else chains would be a good use for alternatives, but we know how
> that works with UML.
>
> And what's the reasoning behind making xsaveopt support likely, but not
> xsave in the fallback path?

xsaveopt has been around for what? 10+ years now? That is likely :) Very likely.

Sure we can put a likely on the following xsave. If you are hitting that, you are on a pre-2011 CPU.

>
>> +
>> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
>> +
>> +void kernel_fpu_end(void)
>> +{
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>> + WARN_ON(!this_cpu_read(in_kernel_fpu));
>> +
>> +#ifdef CONFIG_64BIT
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
>> + __builtin_ia32_xrstor64(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else
>> + __builtin_ia32_fxrstor64(&current->thread.fpu);
>>
>> +#else
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
>> + __builtin_ia32_xrstor(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else
>> + __builtin_ia32_fxrstor(&current->thread.fpu);
>>
>> +#endif
>> + this_cpu_write(in_kernel_fpu, false);
>> +
>> + preempt_enable();
>> +#endif
>> +}
>> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
>> +
> See above, minus the likely() part.
>
>> diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
>> index 635d44606bfe..c02525da45df 100644
>> --- a/arch/um/kernel/irq.c
>> +++ b/arch/um/kernel/irq.c
>> @@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
>>
>> void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
>> {
>> + preempt_disable();
>> _sigio_handler(regs, irqs_suspended);
>> + preempt_enable();
>> }
>>
>> static struct irq_entry *get_irq_entry_by_fd(int fd)
>> --
>> 2.30.2
> As a reminder, compilations with debug.config fail with the following error:
>
> ---
> In file included from arch/um/kernel/irq.c:19:
> ./arch/um/include/shared/kern_util.h:53:12: error: conflicting types for ‘__cant_sleep’; have ‘int(void)’
>     53 | extern int __cant_sleep(void);
>        |            ^~~~~~~~~~~~
> In file included from ./include/linux/cpumask.h:10,
>                   from arch/um/kernel/irq.c:10:
> ./include/linux/kernel.h:129:13: note: previous declaration of ‘__cant_sleep’ with type ‘void(const char *, int,  int)’
>    129 | extern void __cant_sleep(const char *file, int line, int preempt_offset);

OK. Will look into this one. This looks like a bug in the first place.

>        |             ^~~~~~~~~~~~
> ---
>
> Cheers,
> Peter
>
>
Anton Ivanov Sept. 21, 2023, 2 p.m. UTC | #7
On 21/09/2023 14:46, Peter Lafreniere wrote:
> On Thu, Sep 21, 2023 at 05:28, anton.ivanov@cambridgegreys.com wrote:
>>
>> From: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>>
>> Preemption requires saving/restoring FPU state. This patch
>> adds support for it using GCC intrinsics.
>>
>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>> ---
>> arch/um/Kconfig | 1 -
>> arch/um/Makefile | 3 +-
>> arch/um/include/asm/fpu/api.h | 4 +-
>> arch/um/include/asm/processor-generic.h | 3 +
>> arch/um/kernel/Makefile | 2 +-
>> arch/um/kernel/fpu.c | 83 +++++++++++++++++++++++++
>> arch/um/kernel/irq.c | 2 +
>> 7 files changed, 93 insertions(+), 5 deletions(-)
>> create mode 100644 arch/um/kernel/fpu.c
>>
>> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
>> index b5e179360534..603f5fd82293 100644
>> --- a/arch/um/Kconfig
>> +++ b/arch/um/Kconfig
>> @@ -11,7 +11,6 @@ config UML
>> select ARCH_HAS_KCOV
>> select ARCH_HAS_STRNCPY_FROM_USER
>> select ARCH_HAS_STRNLEN_USER
>> - select ARCH_NO_PREEMPT
>> select HAVE_ARCH_AUDITSYSCALL
>> select HAVE_ARCH_KASAN if X86_64
>> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
>> diff --git a/arch/um/Makefile b/arch/um/Makefile
>> index 82f05f250634..6454f735cc9a 100644
>> --- a/arch/um/Makefile
>> +++ b/arch/um/Makefile
>> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
>> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
>> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
>> -Din6addr_loopback=kernel_in6addr_loopback \
>> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
>> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
>> + -mxsave -mxsaveopt
>>
>> KBUILD_RUSTFLAGS += -Crelocation-model=pie
>>
>> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
>> index 71bfd9ef3938..0094624ae9b4 100644
>> --- a/arch/um/include/asm/fpu/api.h
>> +++ b/arch/um/include/asm/fpu/api.h
>> @@ -8,8 +8,8 @@
>> * of x86 optimized copy, xor, etc routines into the
>> * UML code tree. /
>>
>> -#define kernel_fpu_begin() (void)0
>> -#define kernel_fpu_end() (void)0
>> +void kernel_fpu_begin(void);
>> +void kernel_fpu_end(void);
>>
>> static inline bool irq_fpu_usable(void)
>> {
>> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
>> index 7414154b8e9a..d5924d0e16a7 100644
>> --- a/arch/um/include/asm/processor-generic.h
>> +++ b/arch/um/include/asm/processor-generic.h
>> @@ -44,6 +44,9 @@ struct thread_struct {
>> } cb;
>> } u;
>> } request;
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>> + u8 fpu[2048] __aligned(64); / Intel docs require xsave/xrestore area to be aligned to 16 bytes /
>> +#endif
>> };
>>
>> #define INIT_THREAD \
>> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
>> index 811188be954c..5d9fbaa544be 100644
>> --- a/arch/um/kernel/Makefile
>> +++ b/arch/um/kernel/Makefile
>> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
>>
>> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
>> physmem.o process.o ptrace.o reboot.o sigio.o \
>> - signal.o sysrq.o time.o tlb.o trap.o \
>> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\
>> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
>> obj-y += load_file.o
>>
>> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
>> new file mode 100644
>> index 000000000000..fb90da5a0227
>> --- /dev/null
>> +++ b/arch/um/kernel/fpu.c
>> @@ -0,0 +1,83 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/
>> + * Copyright (C) 2023 Cambridge Greys Ltd
>> + * Copyright (C) 2023 Red Hat Inc
>> + */
>> +
>> +#include <linux/cpu.h>
>>
>> +#include <linux/init.h>
>>
>> +#include <asm/fpu/api.h>
>>
>> +#include <asm/cpufeature.h>
>>
>> +
>> +/*
>> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
>> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
>> + /
>> +
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
>> +#endif
>> +
>> +/ UML knows about 387 features up to and including AVX512, tile, etc are not yet
>> + * supported.
>> + */
> 
> Features that are not supported probably shouldn't be reflected, including
> if the appropiate xsave instruction isn't available.
> 
>> +
>> +#define KNOWN_387_FEATURES 0xFF
>> +
>> +
>> +void kernel_fpu_begin(void)
>> +{
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>> + preempt_disable();
>> +
>> + WARN_ON(this_cpu_read(in_kernel_fpu));
>> +
>> + this_cpu_write(in_kernel_fpu, true);
>> +
>> +#ifdef CONFIG_64BIT
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>> + __builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else {
>> + if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
>> + __builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else
>> + __builtin_ia32_fxsave64(&current->thread.fpu);
>>
>> + }
>> +#else
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>> + __builtin_ia32_xsaveopt(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else {
>> + if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
>> + __builtin_ia32_xsave(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else
>> + __builtin_ia32_fxsave(&current->thread.fpu);
>>
>> + }
>> +#endif
>> +#endif
>> +}
> 
> I would keep kernel_fpu_begin() as an empty macro if !defined(PREEMPT) &&
> !defined(PREEMPT_VOLUNTARY) to avoid calling empty functions, then only
> compile fpu.c when you need PREEMPT behavior. This has the added bonus of
> removing a level of #if in this function.
> 
> I agree with Johannes about the if-else chains.
> 
> The if-else chains would be a good use for alternatives, but we know how
> that works with UML.
> 
> And what's the reasoning behind making xsaveopt support likely, but not
> xsave in the fallback path?
> 
>> +
>> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
>> +
>> +void kernel_fpu_end(void)
>> +{
>> +#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
>> + WARN_ON(!this_cpu_read(in_kernel_fpu));
>> +
>> +#ifdef CONFIG_64BIT
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
>> + __builtin_ia32_xrstor64(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else
>> + __builtin_ia32_fxrstor64(&current->thread.fpu);
>>
>> +#else
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
>> + __builtin_ia32_xrstor(&current->thread.fpu, KNOWN_387_FEATURES);
>>
>> + else
>> + __builtin_ia32_fxrstor(&current->thread.fpu);
>>
>> +#endif
>> + this_cpu_write(in_kernel_fpu, false);
>> +
>> + preempt_enable();
>> +#endif
>> +}
>> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
>> +
> 
> See above, minus the likely() part.
> 
>> diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
>> index 635d44606bfe..c02525da45df 100644
>> --- a/arch/um/kernel/irq.c
>> +++ b/arch/um/kernel/irq.c
>> @@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
>>
>> void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
>> {
>> + preempt_disable();
>> _sigio_handler(regs, irqs_suspended);
>> + preempt_enable();
>> }
>>
>> static struct irq_entry *get_irq_entry_by_fd(int fd)
>> --
>> 2.30.2
> 
> As a reminder, compilations with debug.config fail with the following error:
> 
> ---
> In file included from arch/um/kernel/irq.c:19:
> ./arch/um/include/shared/kern_util.h:53:12: error: conflicting types for ‘__cant_sleep’; have ‘int(void)’
>     53 | extern int __cant_sleep(void);
>        |            ^~~~~~~~~~~~
> In file included from ./include/linux/cpumask.h:10,
>                   from arch/um/kernel/irq.c:10:
> ./include/linux/kernel.h:129:13: note: previous declaration of ‘__cant_sleep’ with type ‘void(const char *, int,  int)’
>    129 | extern void __cant_sleep(const char *file, int line, int preempt_offset);
>        |             ^~~~~~~~~~~~
> ---

This is a bug - we ended up with a name clash. This will need to be fixed separately. Patch shortly.

> 
> Cheers,
> Peter
> 
> 
> _______________________________________________
> linux-um mailing list
> linux-um@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-um
diff mbox series

Patch

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index b5e179360534..603f5fd82293 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -11,7 +11,6 @@  config UML
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_STRNCPY_FROM_USER
 	select ARCH_HAS_STRNLEN_USER
-	select ARCH_NO_PREEMPT
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_KASAN if X86_64
 	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 82f05f250634..6454f735cc9a 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -61,7 +61,8 @@  KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
 	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap	\
 	-Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
 	-Din6addr_loopback=kernel_in6addr_loopback \
-	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
+	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
+	-mxsave -mxsaveopt
 
 KBUILD_RUSTFLAGS += -Crelocation-model=pie
 
diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
index 71bfd9ef3938..0094624ae9b4 100644
--- a/arch/um/include/asm/fpu/api.h
+++ b/arch/um/include/asm/fpu/api.h
@@ -8,8 +8,8 @@ 
  * of x86 optimized copy, xor, etc routines into the
  * UML code tree. */
 
-#define kernel_fpu_begin() (void)0
-#define kernel_fpu_end() (void)0
+void kernel_fpu_begin(void);
+void kernel_fpu_end(void);
 
 static inline bool irq_fpu_usable(void)
 {
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 7414154b8e9a..d5924d0e16a7 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -44,6 +44,9 @@  struct thread_struct {
 			} cb;
 		} u;
 	} request;
+#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
+    u8 fpu[2048] __aligned(64); /* Intel docs require xsave/xrestore area to be aligned to 16 bytes */
+#endif
 };
 
 #define INIT_THREAD \
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 811188be954c..5d9fbaa544be 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -16,7 +16,7 @@  extra-y := vmlinux.lds
 
 obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
 	physmem.o process.o ptrace.o reboot.o sigio.o \
-	signal.o sysrq.o time.o tlb.o trap.o \
+	signal.o sysrq.o time.o tlb.o trap.o fpu.o\
 	um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
 obj-y += load_file.o
 
diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
new file mode 100644
index 000000000000..fb90da5a0227
--- /dev/null
+++ b/arch/um/kernel/fpu.c
@@ -0,0 +1,83 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Cambridge Greys Ltd
+ * Copyright (C) 2023 Red Hat Inc
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <asm/fpu/api.h>
+#include <asm/cpufeature.h>
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ */
+
+#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+#endif
+
+/* UML knows about 387 features up to and including AVX512, tile, etc are not yet
+ * supported.
+ */
+
+#define KNOWN_387_FEATURES 0xFF
+
+
+void kernel_fpu_begin(void)
+{
+#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
+	preempt_disable();
+
+	WARN_ON(this_cpu_read(in_kernel_fpu));
+
+	this_cpu_write(in_kernel_fpu, true);
+
+#ifdef CONFIG_64BIT
+	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
+		__builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
+	else {
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
+			__builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
+		else
+			__builtin_ia32_fxsave64(&current->thread.fpu);
+	}
+#else
+	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
+		__builtin_ia32_xsaveopt(&current->thread.fpu, KNOWN_387_FEATURES);
+	else {
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE))
+			__builtin_ia32_xsave(&current->thread.fpu, KNOWN_387_FEATURES);
+		else
+			__builtin_ia32_fxsave(&current->thread.fpu);
+	}
+#endif
+#endif
+}
+
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+#if defined(PREEMPT) || defined(PREEMPT_VOLUNTRARY)
+	WARN_ON(!this_cpu_read(in_kernel_fpu));
+
+#ifdef CONFIG_64BIT
+	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+		__builtin_ia32_xrstor64(&current->thread.fpu, KNOWN_387_FEATURES);
+	else
+		__builtin_ia32_fxrstor64(&current->thread.fpu);
+#else
+	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+		__builtin_ia32_xrstor(&current->thread.fpu, KNOWN_387_FEATURES);
+	else
+		__builtin_ia32_fxrstor(&current->thread.fpu);
+#endif
+	this_cpu_write(in_kernel_fpu, false);
+
+	preempt_enable();
+#endif
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 635d44606bfe..c02525da45df 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -195,7 +195,9 @@  static void _sigio_handler(struct uml_pt_regs *regs,
 
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
+	preempt_disable();
 	_sigio_handler(regs, irqs_suspended);
+	preempt_enable();
 }
 
 static struct irq_entry *get_irq_entry_by_fd(int fd)