diff mbox series

[PULL,04/29] arm/kvm: add support for MTE

Message ID 20230518125107.146421-5-peter.maydell@linaro.org
State New
Headers show
Series [PULL,01/29] sbsa-ref: switch default cpu core to Neoverse-N1 | expand

Commit Message

Peter Maydell May 18, 2023, 12:50 p.m. UTC
From: Cornelia Huck <cohuck@redhat.com>

Extend the 'mte' property for the virt machine to cover KVM as
well. For KVM, we don't allocate tag memory, but instead enable the
capability.

If MTE has been enabled, we need to disable migration, as we do not
yet have a way to migrate the tags as well. Therefore, MTE will stay
off with KVM unless requested explicitly.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20230428095533.21747-2-cohuck@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/cpu.h     |  4 +++
 target/arm/kvm_arm.h | 19 ++++++++++++
 hw/arm/virt.c        | 73 +++++++++++++++++++++++++-------------------
 target/arm/cpu.c     |  9 +++---
 target/arm/kvm.c     | 35 +++++++++++++++++++++
 target/arm/kvm64.c   |  5 +++
 6 files changed, 109 insertions(+), 36 deletions(-)

Comments

Alex Bennée May 19, 2023, 12:51 p.m. UTC | #1
Peter Maydell <peter.maydell@linaro.org> writes:

> From: Cornelia Huck <cohuck@redhat.com>
>
> Extend the 'mte' property for the virt machine to cover KVM as
> well. For KVM, we don't allocate tag memory, but instead enable the
> capability.
>
> If MTE has been enabled, we need to disable migration, as we do not
> yet have a way to migrate the tags as well. Therefore, MTE will stay
> off with KVM unless requested explicitly.
>
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Message-id: 20230428095533.21747-2-cohuck@redhat.com
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

I bisected to this commit which causes a segfault on one of my test
kernels (6.3.2 arm64):

  ➜  ag MTE .config
  486:CONFIG_ARM64_AS_HAS_MTE=y
  487:CONFIG_ARM64_MTE=y
  2949:CONFIG_WLAN_VENDOR_ADMTEK=y
  3573:# CONFIG_I2C_SIMTEC is not set
  5278:# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
  9749:CONFIG_ARCH_USE_MEMTEST=y
  9750:CONFIG_MEMTEST=y

And crashed with a nullptr deref:

  ➜  gdb --args ./qemu-system-aarch64 -cpu max,pauth-impdef=on -machine type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio -netdev user,id=unet,hostfwd=
  tcp::2222-:22,hostfwd=tcp::1234-:1234 -device virtio-net-pci,netdev=unet -kernel ~/lsrc/linux.git/builds/arm64.initramfs/arch/arm64/boot/Image -append "console=ttyAMA0 init=\"busybox poweroff\"" -m 4096 -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on -numa node,memdev=mem -smp 4
  GNU gdb (Debian 13.1-2) 13.1
  Copyright (C) 2023 Free Software Foundation, Inc.
  License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
  This is free software: you are free to change and redistribute it.
  There is NO WARRANTY, to the extent permitted by law.
  Type "show copying" and "show warranty" for details.
  This GDB was configured as "x86_64-linux-gnu".
  Type "show configuration" for configuration details.
  For bug reporting instructions, please see:
  <https://www.gnu.org/software/gdb/bugs/>.
  Find the GDB manual and other documentation resources online at:
      <http://www.gnu.org/software/gdb/documentation/>.

  For help, type "help".
  Type "apropos word" to search for commands related to "word"...
  Executed .gdbinit
  Reading symbols from ./qemu-system-aarch64...
  (gdb) r
  Starting program: /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64 -cpu max,pauth-impdef=on -machine type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22,hostfwd=tcp::1234-:1234 -device virtio-net-pci,netdev=unet -kernel /home/alex/lsrc/linux.git/builds/arm64.initramfs/arch/arm64/boot/Image -append console=ttyAMA0\ init=\"busybox\ poweroff\" -m 4096 -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on -numa node,memdev=mem -smp 4
  [Thread debugging using libthread_db enabled]
  Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
  [New Thread 0x7fffe71e26c0 (LWP 1701008)]
  [New Thread 0x7fffe666d6c0 (LWP 1701009)]
  [New Thread 0x7fffe5e6c6c0 (LWP 1701010)]
  [New Thread 0x7fffe566b6c0 (LWP 1701011)]
  [New Thread 0x7fffe4e6a6c0 (LWP 1701012)]

  Thread 3 "qemu-system-aar" received signal SIGSEGV, Segmentation fault.
  [Switching to Thread 0x7fffe666d6c0 (LWP 1701009)]
  0x0000555556005725 in address_space_to_flatview (as=0x67726f223d656d61) at /home/alex/lsrc/qemu.git/include/exec/memory.h:1105
  1105        return qatomic_rcu_read(&as->current_map);
  (gdb) bt
  #0  0x0000555556005725 in address_space_to_flatview (as=0x67726f223d656d61) at /home/alex/lsrc/qemu.git/include/exec/memory.h:1105
  #1  0x000055555600578b in address_space_translate (as=0x67726f223d656d61, addr=35765504, xlat=0x7fffe6667c10, len=0x0, is_write=false, attrs=...)
      at /home/alex/lsrc/qemu.git/include/exec/memory.h:2792
  #2  0x000055555600600e in allocation_tag_mem
      (env=0x555557512000, ptr_mmu_idx=3, ptr=18446462598803595264, ptr_access=MMU_DATA_STORE, ptr_size=256, tag_access=MMU_DATA_LOAD, tag_size=8, ra=140734806726420)
      at ../../target/arm/tcg/mte_helper.c:176
  #3  0x0000555556006aee in helper_stgm (env=0x555557512000, ptr=18446462598803595264, val=0) at ../../target/arm/tcg/mte_helper.c:461
  #4  0x00007fff60299714 in code_gen_buffer ()
  #5  0x000055555617d0e4 in cpu_tb_exec (cpu=0x55555750f800, itb=0x7fffa0299600, tb_exit=0x7fffe6668270) at ../../accel/tcg/cpu-exec.c:460
  #6  0x000055555617dd79 in cpu_loop_exec_tb (cpu=0x55555750f800, tb=0x7fffa0299600, pc=18446603336361784892, last_tb=0x7fffe6668280, tb_exit=0x7fffe6668270)
      at ../../accel/tcg/cpu-exec.c:893
  #7  0x000055555617e0e3 in cpu_exec_loop (cpu=0x55555750f800, sc=0x7fffe6668300) at ../../accel/tcg/cpu-exec.c:1013
  #8  0x000055555617e1e3 in cpu_exec_setjmp (cpu=0x55555750f800, sc=0x7fffe6668300) at ../../accel/tcg/cpu-exec.c:1043
  #9  0x000055555617e26a in cpu_exec (cpu=0x55555750f800) at ../../accel/tcg/cpu-exec.c:1069
  #10 0x00005555561a43ac in tcg_cpus_exec (cpu=0x55555750f800) at ../../accel/tcg/tcg-accel-ops.c:81
  #11 0x00005555561a4a64 in mttcg_cpu_thread_fn (arg=0x55555750f800) at ../../accel/tcg/tcg-accel-ops-mttcg.c:95
  #12 0x00005555563ba633 in qemu_thread_start (args=0x5555575afae0) at ../../util/qemu-thread-posix.c:541
  #13 0x00007ffff4b17fd4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
  #14 0x00007ffff4b985bc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
  (gdb) thread 0
  Invalid thread ID: 0
  (gdb) frame 0
  #0  0x0000555556005725 in address_space_to_flatview (as=0x67726f223d656d61) at /home/alex/lsrc/qemu.git/include/exec/memory.h:1105
  1105        return qatomic_rcu_read(&as->current_map);
  (gdb) p as
  $1 = (AddressSpace *) 0x67726f223d656d61
  (gdb) p *$
  Cannot access memory at address 0x67726f223d656d61
  (gdb) f 2
  #2  0x000055555600600e in allocation_tag_mem (env=0x555557512000, ptr_mmu_idx=3, ptr=18446462598803595264, ptr_access=MMU_DATA_STORE, ptr_size=256, 
      tag_access=MMU_DATA_LOAD, tag_size=8, ra=140734806726420) at ../../target/arm/tcg/mte_helper.c:176
  176         mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
  (gdb) l
  171         tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
  172
  173         /* Look up the address in tag space. */
  174         tag_asi = attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
  175         tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
  176         mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
  177                                      tag_access == MMU_DATA_STORE, attrs);
  178
  179         /*
  180          * Note that @mr will never be NULL.  If there is nothing in the address
  (gdb) p tag_as
  $2 = (AddressSpace *) 0x67726f223d656d61
  (gdb) p *$
  Cannot access memory at address 0x67726f223d656d61

> ---
>  target/arm/cpu.h     |  4 +++
>  target/arm/kvm_arm.h | 19 ++++++++++++
>  hw/arm/virt.c        | 73 +++++++++++++++++++++++++-------------------
>  target/arm/cpu.c     |  9 +++---
>  target/arm/kvm.c     | 35 +++++++++++++++++++++
>  target/arm/kvm64.c   |  5 +++
>  6 files changed, 109 insertions(+), 36 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index d469a2637b3..c3463e39bcd 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -935,6 +935,9 @@ struct ArchCPU {
>       */
>      uint32_t psci_conduit;
>  
> +    /* CPU has Memory Tag Extension */
> +    bool has_mte;
> +
>      /* For v8M, initial value of the Secure VTOR */
>      uint32_t init_svtor;
>      /* For v8M, initial value of the Non-secure VTOR */
> @@ -1053,6 +1056,7 @@ struct ArchCPU {
>      bool prop_pauth;
>      bool prop_pauth_impdef;
>      bool prop_lpa2;
> +    OnOffAuto prop_mte;
>  
>      /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
>      uint32_t dcz_blocksize;
> diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
> index 330fbe5c722..2083547bf60 100644
> --- a/target/arm/kvm_arm.h
> +++ b/target/arm/kvm_arm.h
> @@ -313,6 +313,13 @@ bool kvm_arm_pmu_supported(void);
>   */
>  bool kvm_arm_sve_supported(void);
>  
> +/**
> + * kvm_arm_mte_supported:
> + *
> + * Returns: true if KVM can enable MTE, and false otherwise.
> + */
> +bool kvm_arm_mte_supported(void);
> +
>  /**
>   * kvm_arm_get_max_vm_ipa_size:
>   * @ms: Machine state handle
> @@ -377,6 +384,8 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa);
>  
>  int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
>  
> +void kvm_arm_enable_mte(Object *cpuobj, Error **errp);
> +
>  #else
>  
>  /*
> @@ -403,6 +412,11 @@ static inline bool kvm_arm_steal_time_supported(void)
>      return false;
>  }
>  
> +static inline bool kvm_arm_mte_supported(void)
> +{
> +    return false;
> +}
> +
>  /*
>   * These functions should never actually be called without KVM support.
>   */
> @@ -451,6 +465,11 @@ static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs)
>      g_assert_not_reached();
>  }
>  
> +static inline void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
> +{
> +    g_assert_not_reached();
> +}
> +
>  #endif
>  
>  static inline const char *gic_class_name(void)
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index b99ae185016..06b514b25c3 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2146,7 +2146,7 @@ static void machvirt_init(MachineState *machine)
>          exit(1);
>      }
>  
> -    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
> +    if (vms->mte && hvf_enabled()) {
>          error_report("mach-virt: %s does not support providing "
>                       "MTE to the guest CPU",
>                       current_accel_name());
> @@ -2216,39 +2216,48 @@ static void machvirt_init(MachineState *machine)
>          }
>  
>          if (vms->mte) {
> -            /* Create the memory region only once, but link to all cpus. */
> -            if (!tag_sysmem) {
> -                /*
> -                 * The property exists only if MemTag is supported.
> -                 * If it is, we must allocate the ram to back that up.
> -                 */
> -                if (!object_property_find(cpuobj, "tag-memory")) {
> -                    error_report("MTE requested, but not supported "
> -                                 "by the guest CPU");
> +            if (tcg_enabled()) {
> +                /* Create the memory region only once, but link to all cpus. */
> +                if (!tag_sysmem) {
> +                    /*
> +                     * The property exists only if MemTag is supported.
> +                     * If it is, we must allocate the ram to back that up.
> +                     */
> +                    if (!object_property_find(cpuobj, "tag-memory")) {
> +                        error_report("MTE requested, but not supported "
> +                                     "by the guest CPU");
> +                        exit(1);
> +                    }
> +
> +                    tag_sysmem = g_new(MemoryRegion, 1);
> +                    memory_region_init(tag_sysmem, OBJECT(machine),
> +                                       "tag-memory", UINT64_MAX / 32);
> +
> +                    if (vms->secure) {
> +                        secure_tag_sysmem = g_new(MemoryRegion, 1);
> +                        memory_region_init(secure_tag_sysmem, OBJECT(machine),
> +                                           "secure-tag-memory",
> +                                           UINT64_MAX / 32);
> +
> +                        /* As with ram, secure-tag takes precedence over tag. */
> +                        memory_region_add_subregion_overlap(secure_tag_sysmem,
> +                                                            0, tag_sysmem, -1);
> +                    }
> +                }
> +
> +                object_property_set_link(cpuobj, "tag-memory",
> +                                         OBJECT(tag_sysmem), &error_abort);
> +                if (vms->secure) {
> +                    object_property_set_link(cpuobj, "secure-tag-memory",
> +                                             OBJECT(secure_tag_sysmem),
> +                                             &error_abort);
> +                }
> +            } else if (kvm_enabled()) {
> +                if (!kvm_arm_mte_supported()) {
> +                    error_report("MTE requested, but not supported by KVM");
>                      exit(1);
>                  }
> -
> -                tag_sysmem = g_new(MemoryRegion, 1);
> -                memory_region_init(tag_sysmem, OBJECT(machine),
> -                                   "tag-memory", UINT64_MAX / 32);
> -
> -                if (vms->secure) {
> -                    secure_tag_sysmem = g_new(MemoryRegion, 1);
> -                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
> -                                       "secure-tag-memory", UINT64_MAX / 32);
> -
> -                    /* As with ram, secure-tag takes precedence over tag.  */
> -                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
> -                                                        tag_sysmem, -1);
> -                }
> -            }
> -
> -            object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem),
> -                                     &error_abort);
> -            if (vms->secure) {
> -                object_property_set_link(cpuobj, "secure-tag-memory",
> -                                         OBJECT(secure_tag_sysmem),
> -                                         &error_abort);
> +                kvm_arm_enable_mte(cpuobj, &error_abort);
>              }
>          }
>  
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index 5182ed0c911..f6a88e52ac2 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -1480,6 +1480,7 @@ void arm_cpu_post_init(Object *obj)
>                                       qdev_prop_allow_set_link_before_realize,
>                                       OBJ_PROP_LINK_STRONG);
>          }
> +        cpu->has_mte = true;
>      }
>  #endif
>  }
> @@ -1616,7 +1617,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>          }
>          if (cpu->tag_memory) {
>              error_setg(errp,
> -                       "Cannot enable %s when guest CPUs has MTE enabled",
> +                       "Cannot enable %s when guest CPUs has tag memory enabled",
>                         current_accel_name());
>              return;
>          }
> @@ -1996,10 +1997,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>      }
>  
>  #ifndef CONFIG_USER_ONLY
> -    if (cpu->tag_memory == NULL && cpu_isar_feature(aa64_mte, cpu)) {
> +    if (!cpu->has_mte && cpu_isar_feature(aa64_mte, cpu)) {
>          /*
> -         * Disable the MTE feature bits if we do not have tag-memory
> -         * provided by the machine.
> +         * Disable the MTE feature bits if we do not have the feature
> +         * setup by the machine.
>           */
>          cpu->isar.id_aa64pfr1 =
>              FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 84da49332c4..9553488ecd1 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -31,6 +31,7 @@
>  #include "hw/boards.h"
>  #include "hw/irq.h"
>  #include "qemu/log.h"
> +#include "migration/blocker.h"
>  
>  const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>      KVM_CAP_LAST_INFO
> @@ -1064,3 +1065,37 @@ bool kvm_arch_cpu_check_are_resettable(void)
>  void kvm_arch_accel_class_init(ObjectClass *oc)
>  {
>  }
> +
> +void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
> +{
> +    static bool tried_to_enable;
> +    static bool succeeded_to_enable;
> +    Error *mte_migration_blocker = NULL;
> +    int ret;
> +
> +    if (!tried_to_enable) {
> +        /*
> +         * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make
> +         * sense), and we only want a single migration blocker as well.
> +         */
> +        tried_to_enable = true;
> +
> +        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0);
> +        if (ret) {
> +            error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE");
> +            return;
> +        }
> +
> +        /* TODO: add proper migration support with MTE enabled */
> +        error_setg(&mte_migration_blocker,
> +                   "Live migration disabled due to MTE enabled");
> +        if (migrate_add_blocker(mte_migration_blocker, errp)) {
> +            error_free(mte_migration_blocker);
> +            return;
> +        }
> +        succeeded_to_enable = true;
> +    }
> +    if (succeeded_to_enable) {
> +        object_property_set_bool(cpuobj, "has_mte", true, NULL);
> +    }
> +}
> diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
> index 810db33ccbd..1893f387936 100644
> --- a/target/arm/kvm64.c
> +++ b/target/arm/kvm64.c
> @@ -756,6 +756,11 @@ bool kvm_arm_steal_time_supported(void)
>      return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME);
>  }
>  
> +bool kvm_arm_mte_supported(void)
> +{
> +    return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE);
> +}
> +
>  QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
>  
>  uint32_t kvm_arm_sve_get_vls(CPUState *cs)
Peter Maydell May 19, 2023, 1:31 p.m. UTC | #2
On Fri, 19 May 2023 at 13:55, Alex Bennée <alex.bennee@linaro.org> wrote:
>
>
> Peter Maydell <peter.maydell@linaro.org> writes:
>
> > From: Cornelia Huck <cohuck@redhat.com>
> >
> > Extend the 'mte' property for the virt machine to cover KVM as
> > well. For KVM, we don't allocate tag memory, but instead enable the
> > capability.
> >
> > If MTE has been enabled, we need to disable migration, as we do not
> > yet have a way to migrate the tags as well. Therefore, MTE will stay
> > off with KVM unless requested explicitly.
> >
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> > Message-id: 20230428095533.21747-2-cohuck@redhat.com
> > Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
>
> I bisected to this commit which causes a segfault on one of my test
> kernels (6.3.2 arm64):
>
>   ➜  ag MTE .config
>   486:CONFIG_ARM64_AS_HAS_MTE=y
>   487:CONFIG_ARM64_MTE=y
>   2949:CONFIG_WLAN_VENDOR_ADMTEK=y
>   3573:# CONFIG_I2C_SIMTEC is not set
>   5278:# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
>   9749:CONFIG_ARCH_USE_MEMTEST=y
>   9750:CONFIG_MEMTEST=y

Try this entirely untested patch?

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index f6a88e52ac2..f350661a928 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1593,6 +1593,15 @@ static void arm_cpu_realizefn(DeviceState *dev,
Error **errp)
         }
     }

+    /*
+     * For TCG, we can only present MTE to the guest if the board gave us
+     * tag RAM. Set has_mte appropriately so code below doesn't need to
+     * care whether we're TCG or KVM when deciding if MTE is present.
+     */
+    if (tcg_enabled() || qtest_enabled()) {
+        cpu->has_mte = cpu->tag_memory != NULL;
+    }
+
     if (!tcg_enabled() && !qtest_enabled()) {
         /*
          * We assume that no accelerator except TCG (and the "not really an

(Signed-off-by: Peter Maydell <peter.maydell@linaro.org> if it
works and you want to turn it into a proper patch...)

-- PMM
Peter Maydell May 19, 2023, 2:52 p.m. UTC | #3
On Fri, 19 May 2023 at 14:31, Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Fri, 19 May 2023 at 13:55, Alex Bennée <alex.bennee@linaro.org> wrote:
> >
> >
> > Peter Maydell <peter.maydell@linaro.org> writes:
> >
> > > From: Cornelia Huck <cohuck@redhat.com>
> > >
> > > Extend the 'mte' property for the virt machine to cover KVM as
> > > well. For KVM, we don't allocate tag memory, but instead enable the
> > > capability.
> > >
> > > If MTE has been enabled, we need to disable migration, as we do not
> > > yet have a way to migrate the tags as well. Therefore, MTE will stay
> > > off with KVM unless requested explicitly.
> > >
> > > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > > Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> > > Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> > > Message-id: 20230428095533.21747-2-cohuck@redhat.com
> > > Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> >
> > I bisected to this commit which causes a segfault on one of my test
> > kernels (6.3.2 arm64):
> >
> >   ➜  ag MTE .config
> >   486:CONFIG_ARM64_AS_HAS_MTE=y
> >   487:CONFIG_ARM64_MTE=y
> >   2949:CONFIG_WLAN_VENDOR_ADMTEK=y
> >   3573:# CONFIG_I2C_SIMTEC is not set
> >   5278:# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
> >   9749:CONFIG_ARCH_USE_MEMTEST=y
> >   9750:CONFIG_MEMTEST=y
>
> Try this entirely untested patch?
>
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index f6a88e52ac2..f350661a928 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -1593,6 +1593,15 @@ static void arm_cpu_realizefn(DeviceState *dev,
> Error **errp)
>          }
>      }
>
> +    /*
> +     * For TCG, we can only present MTE to the guest if the board gave us
> +     * tag RAM. Set has_mte appropriately so code below doesn't need to
> +     * care whether we're TCG or KVM when deciding if MTE is present.
> +     */
> +    if (tcg_enabled() || qtest_enabled()) {
> +        cpu->has_mte = cpu->tag_memory != NULL;
> +    }
> +
>      if (!tcg_enabled() && !qtest_enabled()) {
>          /*
>           * We assume that no accelerator except TCG (and the "not really an

Hmm, actually I don't think that's the only fix needed. It's OK for
TCG, but for KVM I can't see anywhere in the code that ever sets
has_mte to false. We default it to on in the cpu.c code, but
then the board code only sets it to true if MTE is enabled
(via kvm_arm_enable_mte()).

Let's just revert the patch while we figure out the right logic.
I'll send a revert patch in a moment.

thanks
-- PMM
Cornelia Huck May 22, 2023, 9:48 a.m. UTC | #4
On Fri, May 19 2023, Peter Maydell <peter.maydell@linaro.org> wrote:

> On Fri, 19 May 2023 at 14:31, Peter Maydell <peter.maydell@linaro.org> wrote:
>>
>> On Fri, 19 May 2023 at 13:55, Alex Bennée <alex.bennee@linaro.org> wrote:
>> >
>> >
>> > Peter Maydell <peter.maydell@linaro.org> writes:
>> >
>> > > From: Cornelia Huck <cohuck@redhat.com>
>> > >
>> > > Extend the 'mte' property for the virt machine to cover KVM as
>> > > well. For KVM, we don't allocate tag memory, but instead enable the
>> > > capability.
>> > >
>> > > If MTE has been enabled, we need to disable migration, as we do not
>> > > yet have a way to migrate the tags as well. Therefore, MTE will stay
>> > > off with KVM unless requested explicitly.
>> > >
>> > > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>> > > Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
>> > > Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> > > Message-id: 20230428095533.21747-2-cohuck@redhat.com
>> > > Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
>> >
>> > I bisected to this commit which causes a segfault on one of my test
>> > kernels (6.3.2 arm64):
>> >
>> >   ➜  ag MTE .config
>> >   486:CONFIG_ARM64_AS_HAS_MTE=y
>> >   487:CONFIG_ARM64_MTE=y
>> >   2949:CONFIG_WLAN_VENDOR_ADMTEK=y
>> >   3573:# CONFIG_I2C_SIMTEC is not set
>> >   5278:# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
>> >   9749:CONFIG_ARCH_USE_MEMTEST=y
>> >   9750:CONFIG_MEMTEST=y

Sigh, this patch seems to be cursed :( Apologies for the fallout.

(I'm wondering what makes this pop up, maybe the CONFIG_MEMTEST?)

>>
>> Try this entirely untested patch?
>>
>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
>> index f6a88e52ac2..f350661a928 100644
>> --- a/target/arm/cpu.c
>> +++ b/target/arm/cpu.c
>> @@ -1593,6 +1593,15 @@ static void arm_cpu_realizefn(DeviceState *dev,
>> Error **errp)
>>          }
>>      }
>>
>> +    /*
>> +     * For TCG, we can only present MTE to the guest if the board gave us
>> +     * tag RAM. Set has_mte appropriately so code below doesn't need to
>> +     * care whether we're TCG or KVM when deciding if MTE is present.
>> +     */
>> +    if (tcg_enabled() || qtest_enabled()) {
>> +        cpu->has_mte = cpu->tag_memory != NULL;
>> +    }
>> +
>>      if (!tcg_enabled() && !qtest_enabled()) {
>>          /*
>>           * We assume that no accelerator except TCG (and the "not really an
>
> Hmm, actually I don't think that's the only fix needed. It's OK for
> TCG, but for KVM I can't see anywhere in the code that ever sets
> has_mte to false. We default it to on in the cpu.c code, but
> then the board code only sets it to true if MTE is enabled
> (via kvm_arm_enable_mte()).

Hrm, do we need explicit init of this field? Probably needless to say
that I didn't hit this problem in any of my tests... I suspect that only
specific kernels hit this?
Alex Bennée May 22, 2023, 10:12 a.m. UTC | #5
Cornelia Huck <cohuck@redhat.com> writes:

> On Fri, May 19 2023, Peter Maydell <peter.maydell@linaro.org> wrote:
>
>> On Fri, 19 May 2023 at 14:31, Peter Maydell <peter.maydell@linaro.org> wrote:
>>>
>>> On Fri, 19 May 2023 at 13:55, Alex Bennée <alex.bennee@linaro.org> wrote:
>>> >
>>> >
>>> > Peter Maydell <peter.maydell@linaro.org> writes:
>>> >
>>> > > From: Cornelia Huck <cohuck@redhat.com>
>>> > >
>>> > > Extend the 'mte' property for the virt machine to cover KVM as
>>> > > well. For KVM, we don't allocate tag memory, but instead enable the
>>> > > capability.
>>> > >
>>> > > If MTE has been enabled, we need to disable migration, as we do not
>>> > > yet have a way to migrate the tags as well. Therefore, MTE will stay
>>> > > off with KVM unless requested explicitly.
>>> > >
>>> > > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>> > > Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
>>> > > Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>> > > Message-id: 20230428095533.21747-2-cohuck@redhat.com
>>> > > Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
>>> >
>>> > I bisected to this commit which causes a segfault on one of my test
>>> > kernels (6.3.2 arm64):
>>> >
>>> >   ➜  ag MTE .config
>>> >   486:CONFIG_ARM64_AS_HAS_MTE=y
>>> >   487:CONFIG_ARM64_MTE=y
>>> >   2949:CONFIG_WLAN_VENDOR_ADMTEK=y
>>> >   3573:# CONFIG_I2C_SIMTEC is not set
>>> >   5278:# CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set
>>> >   9749:CONFIG_ARCH_USE_MEMTEST=y
>>> >   9750:CONFIG_MEMTEST=y
>
> Sigh, this patch seems to be cursed :( Apologies for the fallout.
>
> (I'm wondering what makes this pop up, maybe the CONFIG_MEMTEST?)
>
>>>
>>> Try this entirely untested patch?
>>>
>>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
>>> index f6a88e52ac2..f350661a928 100644
>>> --- a/target/arm/cpu.c
>>> +++ b/target/arm/cpu.c
>>> @@ -1593,6 +1593,15 @@ static void arm_cpu_realizefn(DeviceState *dev,
>>> Error **errp)
>>>          }
>>>      }
>>>
>>> +    /*
>>> +     * For TCG, we can only present MTE to the guest if the board gave us
>>> +     * tag RAM. Set has_mte appropriately so code below doesn't need to
>>> +     * care whether we're TCG or KVM when deciding if MTE is present.
>>> +     */
>>> +    if (tcg_enabled() || qtest_enabled()) {
>>> +        cpu->has_mte = cpu->tag_memory != NULL;
>>> +    }
>>> +
>>>      if (!tcg_enabled() && !qtest_enabled()) {
>>>          /*
>>>           * We assume that no accelerator except TCG (and the "not really an
>>
>> Hmm, actually I don't think that's the only fix needed. It's OK for
>> TCG, but for KVM I can't see anywhere in the code that ever sets
>> has_mte to false. We default it to on in the cpu.c code, but
>> then the board code only sets it to true if MTE is enabled
>> (via kvm_arm_enable_mte()).
>
> Hrm, do we need explicit init of this field? Probably needless to say
> that I didn't hit this problem in any of my tests... I suspect that only
> specific kernels hit this?

Yes - my local test kernel usually has a rotating case of all the bells
and whistles enabled so catches things like this. I didn't attempt to
bisect the config but here is the current state if its useful:
I think it would be worthwhile adding a specific MTE enabled kernel to
the avocado tests if none of the existing ones can be booted up with MTE
enabled.
diff mbox series

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d469a2637b3..c3463e39bcd 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -935,6 +935,9 @@  struct ArchCPU {
      */
     uint32_t psci_conduit;
 
+    /* CPU has Memory Tag Extension */
+    bool has_mte;
+
     /* For v8M, initial value of the Secure VTOR */
     uint32_t init_svtor;
     /* For v8M, initial value of the Non-secure VTOR */
@@ -1053,6 +1056,7 @@  struct ArchCPU {
     bool prop_pauth;
     bool prop_pauth_impdef;
     bool prop_lpa2;
+    OnOffAuto prop_mte;
 
     /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
     uint32_t dcz_blocksize;
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 330fbe5c722..2083547bf60 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -313,6 +313,13 @@  bool kvm_arm_pmu_supported(void);
  */
 bool kvm_arm_sve_supported(void);
 
+/**
+ * kvm_arm_mte_supported:
+ *
+ * Returns: true if KVM can enable MTE, and false otherwise.
+ */
+bool kvm_arm_mte_supported(void);
+
 /**
  * kvm_arm_get_max_vm_ipa_size:
  * @ms: Machine state handle
@@ -377,6 +384,8 @@  void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa);
 
 int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
 
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp);
+
 #else
 
 /*
@@ -403,6 +412,11 @@  static inline bool kvm_arm_steal_time_supported(void)
     return false;
 }
 
+static inline bool kvm_arm_mte_supported(void)
+{
+    return false;
+}
+
 /*
  * These functions should never actually be called without KVM support.
  */
@@ -451,6 +465,11 @@  static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs)
     g_assert_not_reached();
 }
 
+static inline void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
+{
+    g_assert_not_reached();
+}
+
 #endif
 
 static inline const char *gic_class_name(void)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index b99ae185016..06b514b25c3 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2146,7 +2146,7 @@  static void machvirt_init(MachineState *machine)
         exit(1);
     }
 
-    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
+    if (vms->mte && hvf_enabled()) {
         error_report("mach-virt: %s does not support providing "
                      "MTE to the guest CPU",
                      current_accel_name());
@@ -2216,39 +2216,48 @@  static void machvirt_init(MachineState *machine)
         }
 
         if (vms->mte) {
-            /* Create the memory region only once, but link to all cpus. */
-            if (!tag_sysmem) {
-                /*
-                 * The property exists only if MemTag is supported.
-                 * If it is, we must allocate the ram to back that up.
-                 */
-                if (!object_property_find(cpuobj, "tag-memory")) {
-                    error_report("MTE requested, but not supported "
-                                 "by the guest CPU");
+            if (tcg_enabled()) {
+                /* Create the memory region only once, but link to all cpus. */
+                if (!tag_sysmem) {
+                    /*
+                     * The property exists only if MemTag is supported.
+                     * If it is, we must allocate the ram to back that up.
+                     */
+                    if (!object_property_find(cpuobj, "tag-memory")) {
+                        error_report("MTE requested, but not supported "
+                                     "by the guest CPU");
+                        exit(1);
+                    }
+
+                    tag_sysmem = g_new(MemoryRegion, 1);
+                    memory_region_init(tag_sysmem, OBJECT(machine),
+                                       "tag-memory", UINT64_MAX / 32);
+
+                    if (vms->secure) {
+                        secure_tag_sysmem = g_new(MemoryRegion, 1);
+                        memory_region_init(secure_tag_sysmem, OBJECT(machine),
+                                           "secure-tag-memory",
+                                           UINT64_MAX / 32);
+
+                        /* As with ram, secure-tag takes precedence over tag. */
+                        memory_region_add_subregion_overlap(secure_tag_sysmem,
+                                                            0, tag_sysmem, -1);
+                    }
+                }
+
+                object_property_set_link(cpuobj, "tag-memory",
+                                         OBJECT(tag_sysmem), &error_abort);
+                if (vms->secure) {
+                    object_property_set_link(cpuobj, "secure-tag-memory",
+                                             OBJECT(secure_tag_sysmem),
+                                             &error_abort);
+                }
+            } else if (kvm_enabled()) {
+                if (!kvm_arm_mte_supported()) {
+                    error_report("MTE requested, but not supported by KVM");
                     exit(1);
                 }
-
-                tag_sysmem = g_new(MemoryRegion, 1);
-                memory_region_init(tag_sysmem, OBJECT(machine),
-                                   "tag-memory", UINT64_MAX / 32);
-
-                if (vms->secure) {
-                    secure_tag_sysmem = g_new(MemoryRegion, 1);
-                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
-                                       "secure-tag-memory", UINT64_MAX / 32);
-
-                    /* As with ram, secure-tag takes precedence over tag.  */
-                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
-                                                        tag_sysmem, -1);
-                }
-            }
-
-            object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem),
-                                     &error_abort);
-            if (vms->secure) {
-                object_property_set_link(cpuobj, "secure-tag-memory",
-                                         OBJECT(secure_tag_sysmem),
-                                         &error_abort);
+                kvm_arm_enable_mte(cpuobj, &error_abort);
             }
         }
 
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 5182ed0c911..f6a88e52ac2 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1480,6 +1480,7 @@  void arm_cpu_post_init(Object *obj)
                                      qdev_prop_allow_set_link_before_realize,
                                      OBJ_PROP_LINK_STRONG);
         }
+        cpu->has_mte = true;
     }
 #endif
 }
@@ -1616,7 +1617,7 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         }
         if (cpu->tag_memory) {
             error_setg(errp,
-                       "Cannot enable %s when guest CPUs has MTE enabled",
+                       "Cannot enable %s when guest CPUs has tag memory enabled",
                        current_accel_name());
             return;
         }
@@ -1996,10 +1997,10 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     }
 
 #ifndef CONFIG_USER_ONLY
-    if (cpu->tag_memory == NULL && cpu_isar_feature(aa64_mte, cpu)) {
+    if (!cpu->has_mte && cpu_isar_feature(aa64_mte, cpu)) {
         /*
-         * Disable the MTE feature bits if we do not have tag-memory
-         * provided by the machine.
+         * Disable the MTE feature bits if we do not have the feature
+         * setup by the machine.
          */
         cpu->isar.id_aa64pfr1 =
             FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 84da49332c4..9553488ecd1 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -31,6 +31,7 @@ 
 #include "hw/boards.h"
 #include "hw/irq.h"
 #include "qemu/log.h"
+#include "migration/blocker.h"
 
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
@@ -1064,3 +1065,37 @@  bool kvm_arch_cpu_check_are_resettable(void)
 void kvm_arch_accel_class_init(ObjectClass *oc)
 {
 }
+
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
+{
+    static bool tried_to_enable;
+    static bool succeeded_to_enable;
+    Error *mte_migration_blocker = NULL;
+    int ret;
+
+    if (!tried_to_enable) {
+        /*
+         * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make
+         * sense), and we only want a single migration blocker as well.
+         */
+        tried_to_enable = true;
+
+        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0);
+        if (ret) {
+            error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE");
+            return;
+        }
+
+        /* TODO: add proper migration support with MTE enabled */
+        error_setg(&mte_migration_blocker,
+                   "Live migration disabled due to MTE enabled");
+        if (migrate_add_blocker(mte_migration_blocker, errp)) {
+            error_free(mte_migration_blocker);
+            return;
+        }
+        succeeded_to_enable = true;
+    }
+    if (succeeded_to_enable) {
+        object_property_set_bool(cpuobj, "has_mte", true, NULL);
+    }
+}
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 810db33ccbd..1893f387936 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -756,6 +756,11 @@  bool kvm_arm_steal_time_supported(void)
     return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME);
 }
 
+bool kvm_arm_mte_supported(void)
+{
+    return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE);
+}
+
 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
 
 uint32_t kvm_arm_sve_get_vls(CPUState *cs)