diff mbox series

um: Enable preemption in UML

Message ID 20230920123115.4106721-1-anton.ivanov@cambridgegreys.com
State Superseded
Headers show
Series um: Enable preemption in UML | expand

Commit Message

Anton Ivanov Sept. 20, 2023, 12:31 p.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Preemption requires saving/restoring FPU state. This patch
adds support for it using GCC intrinsics.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/Kconfig                         |  1 -
 arch/um/Makefile                        |  3 +-
 arch/um/include/asm/fpu/api.h           |  4 +-
 arch/um/include/asm/processor-generic.h |  1 +
 arch/um/kernel/Makefile                 |  2 +-
 arch/um/kernel/fpu.c                    | 49 +++++++++++++++++++++++++
 6 files changed, 55 insertions(+), 5 deletions(-)
 create mode 100644 arch/um/kernel/fpu.c

Comments

Peter Lafreniere Sept. 20, 2023, 2:49 p.m. UTC | #1
On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote:
> 
> From: Anton Ivanov anton.ivanov@cambridgegreys.com
> 
> 
> Preemption requires saving/restoring FPU state. This patch
> adds support for it using GCC intrinsics.
> 
> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
> 
> ---
> arch/um/Kconfig | 1 -
> arch/um/Makefile | 3 +-
> arch/um/include/asm/fpu/api.h | 4 +-
> arch/um/include/asm/processor-generic.h | 1 +
> arch/um/kernel/Makefile | 2 +-
> arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++
> 6 files changed, 55 insertions(+), 5 deletions(-)
> create mode 100644 arch/um/kernel/fpu.c
> 
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index b5e179360534..603f5fd82293 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -11,7 +11,6 @@ config UML
> select ARCH_HAS_KCOV
> select ARCH_HAS_STRNCPY_FROM_USER
> select ARCH_HAS_STRNLEN_USER
> - select ARCH_NO_PREEMPT
> select HAVE_ARCH_AUDITSYSCALL
> select HAVE_ARCH_KASAN if X86_64
> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/Makefile b/arch/um/Makefile
> index 82f05f250634..35a059baadeb 100644
> --- a/arch/um/Makefile
> +++ b/arch/um/Makefile
> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
> -Din6addr_loopback=kernel_in6addr_loopback \
> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
> + -mxsave

Since we're now using a new isa extention, I would add a check in
linux_main() with a warning message if xsave is not available.

It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled. 

> 
> KBUILD_RUSTFLAGS += -Crelocation-model=pie
> 
> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
> index 71bfd9ef3938..0094624ae9b4 100644
> --- a/arch/um/include/asm/fpu/api.h
> +++ b/arch/um/include/asm/fpu/api.h
> @@ -8,8 +8,8 @@
> * of x86 optimized copy, xor, etc routines into the
> * UML code tree. /
> 
> -#define kernel_fpu_begin() (void)0
> -#define kernel_fpu_end() (void)0
> +void kernel_fpu_begin(void);
> +void kernel_fpu_end(void);
> 
> static inline bool irq_fpu_usable(void)
> {
> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> index 7414154b8e9a..1c3287f1a444 100644
> --- a/arch/um/include/asm/processor-generic.h
> +++ b/arch/um/include/asm/processor-generic.h
> @@ -44,6 +44,7 @@ struct thread_struct {
> } cb;
> } u;
> } request;
> + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes /
> };
> 
> #define INIT_THREAD \
> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
> index 811188be954c..5d9fbaa544be 100644
> --- a/arch/um/kernel/Makefile
> +++ b/arch/um/kernel/Makefile
> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
> 
> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
> physmem.o process.o ptrace.o reboot.o sigio.o \
> - signal.o sysrq.o time.o tlb.o trap.o \
> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\
> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
> obj-y += load_file.o
> 
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..4142a08c474d
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/
> + * Copyright (C) 2023 Cambridge Greys Ltd
> + * Copyright (C) 2023 Red Hat Inc
> + */
> +
> +#include <linux/cpu.h>
> 
> +#include <linux/init.h>
> 
> +#include <asm/fpu/api.h>
> 
> +
> +/*
> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
> + */
> +
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +
> +void kernel_fpu_begin(void)
> +{
> + preempt_disable();
> +
> + WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> + this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> + __builtin_ia32_fxsave64(&current->thread.fpu);
> 
> +#else
> + __builtin_ia32_fxsave(&current->thread.fpu);
> 
> +#endif
> +}
> +
> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
> +
> +void kernel_fpu_end(void)
> +{
> + WARN_ON(!this_cpu_read(in_kernel_fpu));
> +
> +#ifdef CONFIG_64BIT
> + __builtin_ia32_fxrstor64(&current->thread.fpu);
> 
> +#else
> + __builtin_ia32_fxrstor(&current->thread.fpu);
> 
> +#endif
> + this_cpu_write(in_kernel_fpu, false);
> +
> + preempt_enable();
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
> +

According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor
instructions do not save ymm, zmm, or mask registers. Please make that explicit
in your comment, or replace intrinsics with inline assembler to support those
registers.

Several x86 crypto routines use these larger registers which could overwrite
userspace registers. These modules aren't built by default, and may not be
available on UML. Even so, this issue is worth a comment, at the least.

On my system, lscpu within uml shows all the feature flags that are available
on the host system, which includes AVX extentions.

Cheers,
Peter Lafreniere
Anton Ivanov Sept. 20, 2023, 3:03 p.m. UTC | #2
On 20/09/2023 15:49, Peter Lafreniere wrote:
> On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote:
>>
>> From: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>>
>> Preemption requires saving/restoring FPU state. This patch
>> adds support for it using GCC intrinsics.
>>
>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>> ---
>> arch/um/Kconfig | 1 -
>> arch/um/Makefile | 3 +-
>> arch/um/include/asm/fpu/api.h | 4 +-
>> arch/um/include/asm/processor-generic.h | 1 +
>> arch/um/kernel/Makefile | 2 +-
>> arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++
>> 6 files changed, 55 insertions(+), 5 deletions(-)
>> create mode 100644 arch/um/kernel/fpu.c
>>
>> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
>> index b5e179360534..603f5fd82293 100644
>> --- a/arch/um/Kconfig
>> +++ b/arch/um/Kconfig
>> @@ -11,7 +11,6 @@ config UML
>> select ARCH_HAS_KCOV
>> select ARCH_HAS_STRNCPY_FROM_USER
>> select ARCH_HAS_STRNLEN_USER
>> - select ARCH_NO_PREEMPT
>> select HAVE_ARCH_AUDITSYSCALL
>> select HAVE_ARCH_KASAN if X86_64
>> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
>> diff --git a/arch/um/Makefile b/arch/um/Makefile
>> index 82f05f250634..35a059baadeb 100644
>> --- a/arch/um/Makefile
>> +++ b/arch/um/Makefile
>> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
>> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
>> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
>> -Din6addr_loopback=kernel_in6addr_loopback \
>> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
>> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
>> + -mxsave
> 
> Since we're now using a new isa extention, I would add a check in
> linux_main() with a warning message if xsave is not available.
> 
> It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled.

Ack. Will do.

The patch will have a few versions.

I would like to try to sync up with the existing PTRACE fpregs code used for the running the userspace if possible. We should (at least) use the same regs store/restore space.

> 
>>
>> KBUILD_RUSTFLAGS += -Crelocation-model=pie
>>
>> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
>> index 71bfd9ef3938..0094624ae9b4 100644
>> --- a/arch/um/include/asm/fpu/api.h
>> +++ b/arch/um/include/asm/fpu/api.h
>> @@ -8,8 +8,8 @@
>> * of x86 optimized copy, xor, etc routines into the
>> * UML code tree. /
>>
>> -#define kernel_fpu_begin() (void)0
>> -#define kernel_fpu_end() (void)0
>> +void kernel_fpu_begin(void);
>> +void kernel_fpu_end(void);
>>
>> static inline bool irq_fpu_usable(void)
>> {
>> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
>> index 7414154b8e9a..1c3287f1a444 100644
>> --- a/arch/um/include/asm/processor-generic.h
>> +++ b/arch/um/include/asm/processor-generic.h
>> @@ -44,6 +44,7 @@ struct thread_struct {
>> } cb;
>> } u;
>> } request;
>> + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes /
>> };
>>
>> #define INIT_THREAD \
>> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
>> index 811188be954c..5d9fbaa544be 100644
>> --- a/arch/um/kernel/Makefile
>> +++ b/arch/um/kernel/Makefile
>> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
>>
>> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
>> physmem.o process.o ptrace.o reboot.o sigio.o \
>> - signal.o sysrq.o time.o tlb.o trap.o \
>> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\
>> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
>> obj-y += load_file.o
>>
>> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
>> new file mode 100644
>> index 000000000000..4142a08c474d
>> --- /dev/null
>> +++ b/arch/um/kernel/fpu.c
>> @@ -0,0 +1,49 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/
>> + * Copyright (C) 2023 Cambridge Greys Ltd
>> + * Copyright (C) 2023 Red Hat Inc
>> + */
>> +
>> +#include <linux/cpu.h>
>>
>> +#include <linux/init.h>
>>
>> +#include <asm/fpu/api.h>
>>
>> +
>> +/*
>> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
>> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
>> + */
>> +
>> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
>> +
>> +void kernel_fpu_begin(void)
>> +{
>> + preempt_disable();
>> +
>> + WARN_ON(this_cpu_read(in_kernel_fpu));
>> +
>> + this_cpu_write(in_kernel_fpu, true);
>> +
>> +#ifdef CONFIG_64BIT
>> + __builtin_ia32_fxsave64(&current->thread.fpu);
>>
>> +#else
>> + __builtin_ia32_fxsave(&current->thread.fpu);
>>
>> +#endif
>> +}
>> +
>> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
>> +
>> +void kernel_fpu_end(void)
>> +{
>> + WARN_ON(!this_cpu_read(in_kernel_fpu));
>> +
>> +#ifdef CONFIG_64BIT
>> + __builtin_ia32_fxrstor64(&current->thread.fpu);
>>
>> +#else
>> + __builtin_ia32_fxrstor(&current->thread.fpu);
>>
>> +#endif
>> + this_cpu_write(in_kernel_fpu, false);
>> +
>> + preempt_enable();
>> +}
>> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
>> +
> 
> According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor
> instructions do not save ymm, zmm, or mask registers. Please make that explicit
> in your comment, or replace intrinsics with inline assembler to support those
> registers.
> 
> Several x86 crypto routines use these larger registers which could overwrite
> userspace registers. These modules aren't built by default, and may not be
> available on UML. Even so, this issue is worth a comment, at the least.
> 
> On my system, lscpu within uml shows all the feature flags that are available
> on the host system, which includes AVX extentions.
> 
> Cheers,
> Peter Lafreniere
> 
>
Peter Lafreniere Sept. 20, 2023, 3:04 p.m. UTC | #3
On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote:
> 
> From: Anton Ivanov anton.ivanov@cambridgegreys.com
> 
> 
> Preemption requires saving/restoring FPU state. This patch
> adds support for it using GCC intrinsics.
> 
> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
> 
> ---
> arch/um/Kconfig | 1 -
> arch/um/Makefile | 3 +-
> arch/um/include/asm/fpu/api.h | 4 +-
> arch/um/include/asm/processor-generic.h | 1 +
> arch/um/kernel/Makefile | 2 +-
> arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++
> 6 files changed, 55 insertions(+), 5 deletions(-)
> create mode 100644 arch/um/kernel/fpu.c
> 
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index b5e179360534..603f5fd82293 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -11,7 +11,6 @@ config UML
> select ARCH_HAS_KCOV
> select ARCH_HAS_STRNCPY_FROM_USER
> select ARCH_HAS_STRNLEN_USER
> - select ARCH_NO_PREEMPT
> select HAVE_ARCH_AUDITSYSCALL
> select HAVE_ARCH_KASAN if X86_64
> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/Makefile b/arch/um/Makefile
> index 82f05f250634..35a059baadeb 100644
> --- a/arch/um/Makefile
> +++ b/arch/um/Makefile
> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
> -Din6addr_loopback=kernel_in6addr_loopback \
> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
> + -mxsave

Since we're now using a new isa extention, I would add a check in
linux_main() with a warning message if xsave is not available.

It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled. 

> 
> KBUILD_RUSTFLAGS += -Crelocation-model=pie
> 
> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
> index 71bfd9ef3938..0094624ae9b4 100644
> --- a/arch/um/include/asm/fpu/api.h
> +++ b/arch/um/include/asm/fpu/api.h
> @@ -8,8 +8,8 @@
> * of x86 optimized copy, xor, etc routines into the
> * UML code tree. /
> 
> -#define kernel_fpu_begin() (void)0
> -#define kernel_fpu_end() (void)0
> +void kernel_fpu_begin(void);
> +void kernel_fpu_end(void);
> 
> static inline bool irq_fpu_usable(void)
> {
> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> index 7414154b8e9a..1c3287f1a444 100644
> --- a/arch/um/include/asm/processor-generic.h
> +++ b/arch/um/include/asm/processor-generic.h
> @@ -44,6 +44,7 @@ struct thread_struct {
> } cb;
> } u;
> } request;
> + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes /
> };
> 
> #define INIT_THREAD \
> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
> index 811188be954c..5d9fbaa544be 100644
> --- a/arch/um/kernel/Makefile
> +++ b/arch/um/kernel/Makefile
> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
> 
> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
> physmem.o process.o ptrace.o reboot.o sigio.o \
> - signal.o sysrq.o time.o tlb.o trap.o \
> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\
> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
> obj-y += load_file.o
> 
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..4142a08c474d
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/
> + * Copyright (C) 2023 Cambridge Greys Ltd
> + * Copyright (C) 2023 Red Hat Inc
> + */
> +
> +#include <linux/cpu.h>
> 
> +#include <linux/init.h>
> 
> +#include <asm/fpu/api.h>
> 
> +
> +/*
> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
> + */
> +
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +
> +void kernel_fpu_begin(void)
> +{
> + preempt_disable();
> +
> + WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> + this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> + __builtin_ia32_fxsave64(&current->thread.fpu);
> 
> +#else
> + __builtin_ia32_fxsave(&current->thread.fpu);
> 
> +#endif
> +}
> +
> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
> +
> +void kernel_fpu_end(void)
> +{
> + WARN_ON(!this_cpu_read(in_kernel_fpu));
> +
> +#ifdef CONFIG_64BIT
> + __builtin_ia32_fxrstor64(&current->thread.fpu);
> 
> +#else
> + __builtin_ia32_fxrstor(&current->thread.fpu);
> 
> +#endif
> + this_cpu_write(in_kernel_fpu, false);
> +
> + preempt_enable();
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
> +

According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor
instructions do not save ymm, zmm, or mask registers. Please make that explicit
in your comment, or replace intrinsics with inline assembler to support those
registers.

Several x86 crypto routines use these larger registers which could overwrite
userspace registers. These modules aren't built by default, and may not be
available on UML. Even so, this issue is worth a comment, at the least.

On my system, lscpu within uml shows all the feature flags that are available
on the host system, which includes AVX extentions.

One more thing:
With this patch applied, uml builds with defconfig + debug.config fails,
but the same build failure does not happen without this patch. A standard
defconfig builds fine in both cases.

Cheers,
Peter Lafreniere
Anton Ivanov Sept. 20, 2023, 3:38 p.m. UTC | #4
On 20/09/2023 16:04, Peter Lafreniere wrote:
> On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote:
>>
>> From: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>>
>> Preemption requires saving/restoring FPU state. This patch
>> adds support for it using GCC intrinsics.
>>
>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>> ---
>> arch/um/Kconfig | 1 -
>> arch/um/Makefile | 3 +-
>> arch/um/include/asm/fpu/api.h | 4 +-
>> arch/um/include/asm/processor-generic.h | 1 +
>> arch/um/kernel/Makefile | 2 +-
>> arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++
>> 6 files changed, 55 insertions(+), 5 deletions(-)
>> create mode 100644 arch/um/kernel/fpu.c
>>
>> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
>> index b5e179360534..603f5fd82293 100644
>> --- a/arch/um/Kconfig
>> +++ b/arch/um/Kconfig
>> @@ -11,7 +11,6 @@ config UML
>> select ARCH_HAS_KCOV
>> select ARCH_HAS_STRNCPY_FROM_USER
>> select ARCH_HAS_STRNLEN_USER
>> - select ARCH_NO_PREEMPT
>> select HAVE_ARCH_AUDITSYSCALL
>> select HAVE_ARCH_KASAN if X86_64
>> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
>> diff --git a/arch/um/Makefile b/arch/um/Makefile
>> index 82f05f250634..35a059baadeb 100644
>> --- a/arch/um/Makefile
>> +++ b/arch/um/Makefile
>> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
>> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
>> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
>> -Din6addr_loopback=kernel_in6addr_loopback \
>> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
>> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
>> + -mxsave
> 
> Since we're now using a new isa extention, I would add a check in
> linux_main() with a warning message if xsave is not available.
> 
> It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled.
> 
>>
>> KBUILD_RUSTFLAGS += -Crelocation-model=pie
>>
>> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
>> index 71bfd9ef3938..0094624ae9b4 100644
>> --- a/arch/um/include/asm/fpu/api.h
>> +++ b/arch/um/include/asm/fpu/api.h
>> @@ -8,8 +8,8 @@
>> * of x86 optimized copy, xor, etc routines into the
>> * UML code tree. /
>>
>> -#define kernel_fpu_begin() (void)0
>> -#define kernel_fpu_end() (void)0
>> +void kernel_fpu_begin(void);
>> +void kernel_fpu_end(void);
>>
>> static inline bool irq_fpu_usable(void)
>> {
>> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
>> index 7414154b8e9a..1c3287f1a444 100644
>> --- a/arch/um/include/asm/processor-generic.h
>> +++ b/arch/um/include/asm/processor-generic.h
>> @@ -44,6 +44,7 @@ struct thread_struct {
>> } cb;
>> } u;
>> } request;
>> + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes /
>> };
>>
>> #define INIT_THREAD \
>> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
>> index 811188be954c..5d9fbaa544be 100644
>> --- a/arch/um/kernel/Makefile
>> +++ b/arch/um/kernel/Makefile
>> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
>>
>> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
>> physmem.o process.o ptrace.o reboot.o sigio.o \
>> - signal.o sysrq.o time.o tlb.o trap.o \
>> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\
>> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
>> obj-y += load_file.o
>>
>> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
>> new file mode 100644
>> index 000000000000..4142a08c474d
>> --- /dev/null
>> +++ b/arch/um/kernel/fpu.c
>> @@ -0,0 +1,49 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/
>> + * Copyright (C) 2023 Cambridge Greys Ltd
>> + * Copyright (C) 2023 Red Hat Inc
>> + */
>> +
>> +#include <linux/cpu.h>
>>
>> +#include <linux/init.h>
>>
>> +#include <asm/fpu/api.h>
>>
>> +
>> +/*
>> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
>> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
>> + */
>> +
>> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
>> +
>> +void kernel_fpu_begin(void)
>> +{
>> + preempt_disable();
>> +
>> + WARN_ON(this_cpu_read(in_kernel_fpu));
>> +
>> + this_cpu_write(in_kernel_fpu, true);
>> +
>> +#ifdef CONFIG_64BIT
>> + __builtin_ia32_fxsave64(&current->thread.fpu);
>>
>> +#else
>> + __builtin_ia32_fxsave(&current->thread.fpu);
>>
>> +#endif
>> +}
>> +
>> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
>> +
>> +void kernel_fpu_end(void)
>> +{
>> + WARN_ON(!this_cpu_read(in_kernel_fpu));
>> +
>> +#ifdef CONFIG_64BIT
>> + __builtin_ia32_fxrstor64(&current->thread.fpu);
>>
>> +#else
>> + __builtin_ia32_fxrstor(&current->thread.fpu);
>>
>> +#endif
>> + this_cpu_write(in_kernel_fpu, false);
>> +
>> + preempt_enable();
>> +}
>> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
>> +
> 
> According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor
> instructions do not save ymm, zmm, or mask registers. Please make that explicit
> in your comment, or replace intrinsics with inline assembler to support those
> registers.

It may ened up with that :(

> 
> Several x86 crypto routines use these larger registers which could overwrite
> userspace registers. These modules aren't built by default, and may not be
> available on UML. Even so, this issue is worth a comment, at the least.

We did some work to enable some of them. So we may need to save them too.

> 
> On my system, lscpu within uml shows all the feature flags that are available
> on the host system, which includes AVX extentions.

Yes. The code to reflect features into the guest was added at about the same time
as enabling some of the AVX extensions. We could not enable them everywhere, because
most of this code in Linux relies on alternatives support and that is not working
in UML - it does not have the means to "patch" running code and its "alternatives"
are effectively a NOOP.

They are definitely enabled for RAID6 IIRC and as well as anything else which does
a jump table based on CPU features.

> 
> One more thing:
> With this patch applied, uml builds with defconfig + debug.config fails,
> but the same build failure does not happen without this patch. A standard
> defconfig builds fine in both cases.
>

Ack. Will look at it.


> Cheers,
> Peter Lafreniere
> 
>
Anton Ivanov Sept. 21, 2023, 7:12 a.m. UTC | #5
On 20/09/2023 15:49, Peter Lafreniere wrote:
> On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>>
>> Preemption requires saving/restoring FPU state. This patch
>> adds support for it using GCC intrinsics.
>>
>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
>>
>> ---
>> arch/um/Kconfig | 1 -
>> arch/um/Makefile | 3 +-
>> arch/um/include/asm/fpu/api.h | 4 +-
>> arch/um/include/asm/processor-generic.h | 1 +
>> arch/um/kernel/Makefile | 2 +-
>> arch/um/kernel/fpu.c | 49 +++++++++++++++++++++++++
>> 6 files changed, 55 insertions(+), 5 deletions(-)
>> create mode 100644 arch/um/kernel/fpu.c
>>
>> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
>> index b5e179360534..603f5fd82293 100644
>> --- a/arch/um/Kconfig
>> +++ b/arch/um/Kconfig
>> @@ -11,7 +11,6 @@ config UML
>> select ARCH_HAS_KCOV
>> select ARCH_HAS_STRNCPY_FROM_USER
>> select ARCH_HAS_STRNLEN_USER
>> - select ARCH_NO_PREEMPT
>> select HAVE_ARCH_AUDITSYSCALL
>> select HAVE_ARCH_KASAN if X86_64
>> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
>> diff --git a/arch/um/Makefile b/arch/um/Makefile
>> index 82f05f250634..35a059baadeb 100644
>> --- a/arch/um/Makefile
>> +++ b/arch/um/Makefile
>> @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
>> $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
>> -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
>> -Din6addr_loopback=kernel_in6addr_loopback \
>> - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
>> + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
>> + -mxsave
> Since we're now using a new isa extention, I would add a check in
> linux_main() with a warning message if xsave is not available.
>
> It also wouldn't hurt to only require xsave if CONFIG_PREEMPT is enabled.

Indeed. The full area ends up being quite large - half a page or thereabouts.

>
>> KBUILD_RUSTFLAGS += -Crelocation-model=pie
>>
>> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
>> index 71bfd9ef3938..0094624ae9b4 100644
>> --- a/arch/um/include/asm/fpu/api.h
>> +++ b/arch/um/include/asm/fpu/api.h
>> @@ -8,8 +8,8 @@
>> * of x86 optimized copy, xor, etc routines into the
>> * UML code tree. /
>>
>> -#define kernel_fpu_begin() (void)0
>> -#define kernel_fpu_end() (void)0
>> +void kernel_fpu_begin(void);
>> +void kernel_fpu_end(void);
>>
>> static inline bool irq_fpu_usable(void)
>> {
>> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
>> index 7414154b8e9a..1c3287f1a444 100644
>> --- a/arch/um/include/asm/processor-generic.h
>> +++ b/arch/um/include/asm/processor-generic.h
>> @@ -44,6 +44,7 @@ struct thread_struct {
>> } cb;
>> } u;
>> } request;
>> + u8 fpu[512] __aligned(16); / Intel docs require xsave/xrestore area to be aligned to 16 bytes /
>> };
>>
>> #define INIT_THREAD \
>> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
>> index 811188be954c..5d9fbaa544be 100644
>> --- a/arch/um/kernel/Makefile
>> +++ b/arch/um/kernel/Makefile
>> @@ -16,7 +16,7 @@ extra-y := vmlinux.lds
>>
>> obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
>> physmem.o process.o ptrace.o reboot.o sigio.o \
>> - signal.o sysrq.o time.o tlb.o trap.o \
>> + signal.o sysrq.o time.o tlb.o trap.o fpu.o\
>> um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
>> obj-y += load_file.o
>>
>> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
>> new file mode 100644
>> index 000000000000..4142a08c474d
>> --- /dev/null
>> +++ b/arch/um/kernel/fpu.c
>> @@ -0,0 +1,49 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/
>> + * Copyright (C) 2023 Cambridge Greys Ltd
>> + * Copyright (C) 2023 Red Hat Inc
>> + */
>> +
>> +#include <linux/cpu.h>
>>
>> +#include <linux/init.h>
>>
>> +#include <asm/fpu/api.h>
>>
>> +
>> +/*
>> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
>> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
>> + */
>> +
>> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
>> +
>> +void kernel_fpu_begin(void)
>> +{
>> + preempt_disable();
>> +
>> + WARN_ON(this_cpu_read(in_kernel_fpu));
>> +
>> + this_cpu_write(in_kernel_fpu, true);
>> +
>> +#ifdef CONFIG_64BIT
>> + __builtin_ia32_fxsave64(&current->thread.fpu);
>>
>> +#else
>> + __builtin_ia32_fxsave(&current->thread.fpu);
>>
>> +#endif
>> +}
>> +
>> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
>> +
>> +void kernel_fpu_end(void)
>> +{
>> + WARN_ON(!this_cpu_read(in_kernel_fpu));
>> +
>> +#ifdef CONFIG_64BIT
>> + __builtin_ia32_fxrstor64(&current->thread.fpu);
>>
>> +#else
>> + __builtin_ia32_fxrstor(&current->thread.fpu);
>>
>> +#endif
>> + this_cpu_write(in_kernel_fpu, false);
>> +
>> + preempt_enable();
>> +}
>> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
>> +
> According to https://www.felixcloutier.com/x86/fxsave, the fxsave and fxrstor
> instructions do not save ymm, zmm, or mask registers. Please make that explicit
> in your comment, or replace intrinsics with inline assembler to support those
> registers.
>
> Several x86 crypto routines use these larger registers which could overwrite
> userspace registers. These modules aren't built by default, and may not be
> available on UML. Even so, this issue is worth a comment, at the least.
>
> On my system, lscpu within uml shows all the feature flags that are available
> on the host system, which includes AVX extentions.

That should be xsave, not fxsave upgrading to xsaveopt if available.

>
> Cheers,
> Peter Lafreniere
>
>
Richard Weinberger Sept. 21, 2023, 7:30 a.m. UTC | #6
----- Ursprüngliche Mail -----
> Von: "anton ivanov" <anton.ivanov@cambridgegreys.com>
> An: "Peter Lafreniere" <peter@n8pjl.ca>
> CC: "Johannes Berg" <johannes@sipsolutions.net>, "linux-um" <linux-um@lists.infradead.org>, "richard" <richard@nod.at>
> Gesendet: Donnerstag, 21. September 2023 09:12:33
> Betreff: Re: [PATCH] um: Enable preemption in UML

> On 20/09/2023 15:49, Peter Lafreniere wrote:
>> On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote:
>>> From: Anton Ivanov anton.ivanov@cambridgegreys.com
>>>
>>>
>>> Preemption requires saving/restoring FPU state. This patch
>>> adds support for it using GCC intrinsics.
>>>
>>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com

I'm not so sure whether we should really directly start with CONFIG_PREEMPT.
How about implementing CONFIG_PREEMPT_COUNT first?

Thanks,
//richard
Anton Ivanov Sept. 21, 2023, 8 a.m. UTC | #7
On 21/09/2023 08:30, Richard Weinberger wrote:
> ----- Ursprüngliche Mail -----
>> Von: "anton ivanov" <anton.ivanov@cambridgegreys.com>
>> An: "Peter Lafreniere" <peter@n8pjl.ca>
>> CC: "Johannes Berg" <johannes@sipsolutions.net>, "linux-um" <linux-um@lists.infradead.org>, "richard" <richard@nod.at>
>> Gesendet: Donnerstag, 21. September 2023 09:12:33
>> Betreff: Re: [PATCH] um: Enable preemption in UML
>> On 20/09/2023 15:49, Peter Lafreniere wrote:
>>> On Wed, Sep 20, 2023 at 08:31, anton.ivanov@cambridgegreys.com wrote:
>>>> From: Anton Ivanov anton.ivanov@cambridgegreys.com
>>>>
>>>>
>>>> Preemption requires saving/restoring FPU state. This patch
>>>> adds support for it using GCC intrinsics.
>>>>
>>>> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
> I'm not so sure whether we should really directly start with CONFIG_PREEMPT.

I have managed to make it work except the IRQ return bit suggested by Peter. We are quite paranoid regarding locking and we are UP so nearly all of it worked out of the box.

> How about implementing CONFIG_PREEMPT_COUNT first?

Sure. Will try that today.

>
> Thanks,
> //richard
>
diff mbox series

Patch

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index b5e179360534..603f5fd82293 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -11,7 +11,6 @@  config UML
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_STRNCPY_FROM_USER
 	select ARCH_HAS_STRNLEN_USER
-	select ARCH_NO_PREEMPT
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_KASAN if X86_64
 	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 82f05f250634..35a059baadeb 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -61,7 +61,8 @@  KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
 	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap	\
 	-Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
 	-Din6addr_loopback=kernel_in6addr_loopback \
-	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
+	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
+	-mxsave
 
 KBUILD_RUSTFLAGS += -Crelocation-model=pie
 
diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
index 71bfd9ef3938..0094624ae9b4 100644
--- a/arch/um/include/asm/fpu/api.h
+++ b/arch/um/include/asm/fpu/api.h
@@ -8,8 +8,8 @@ 
  * of x86 optimized copy, xor, etc routines into the
  * UML code tree. */
 
-#define kernel_fpu_begin() (void)0
-#define kernel_fpu_end() (void)0
+void kernel_fpu_begin(void);
+void kernel_fpu_end(void);
 
 static inline bool irq_fpu_usable(void)
 {
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 7414154b8e9a..1c3287f1a444 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -44,6 +44,7 @@  struct thread_struct {
 			} cb;
 		} u;
 	} request;
+    u8 fpu[512] __aligned(16); /* Intel docs require xsave/xrestore area to be aligned to 16 bytes */
 };
 
 #define INIT_THREAD \
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 811188be954c..5d9fbaa544be 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -16,7 +16,7 @@  extra-y := vmlinux.lds
 
 obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
 	physmem.o process.o ptrace.o reboot.o sigio.o \
-	signal.o sysrq.o time.o tlb.o trap.o \
+	signal.o sysrq.o time.o tlb.o trap.o fpu.o\
 	um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
 obj-y += load_file.o
 
diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
new file mode 100644
index 000000000000..4142a08c474d
--- /dev/null
+++ b/arch/um/kernel/fpu.c
@@ -0,0 +1,49 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Cambridge Greys Ltd
+ * Copyright (C) 2023 Red Hat Inc
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <asm/fpu/api.h>
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ */
+
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+void kernel_fpu_begin(void)
+{
+	preempt_disable();
+
+	WARN_ON(this_cpu_read(in_kernel_fpu));
+
+	this_cpu_write(in_kernel_fpu, true);
+
+#ifdef CONFIG_64BIT
+	__builtin_ia32_fxsave64(&current->thread.fpu);
+#else
+	__builtin_ia32_fxsave(&current->thread.fpu);
+#endif
+}
+
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+	WARN_ON(!this_cpu_read(in_kernel_fpu));
+
+#ifdef CONFIG_64BIT
+	__builtin_ia32_fxrstor64(&current->thread.fpu);
+#else
+	__builtin_ia32_fxrstor(&current->thread.fpu);
+#endif
+	this_cpu_write(in_kernel_fpu, false);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
+