diff mbox series

[v5,5/5] i386/pc: restrict AMD only enforcing of valid IOVAs to new machine type

Message ID 20220520104532.9816-6-joao.m.martins@oracle.com
State New
Headers show
Series [v5,1/5] hw/i386: add 4g boundary start to X86MachineState | expand

Commit Message

Joao Martins May 20, 2022, 10:45 a.m. UTC
The added enforcing is only relevant in the case of AMD where the
range right before the 1TB is restricted and cannot be DMA mapped
by the kernel consequently leading to IOMMU INVALID_DEVICE_REQUEST
or possibly other kinds of IOMMU events in the AMD IOMMU.

Although, there's a case where it may make sense to disable the
IOVA relocation/validation when migrating from a
non-valid-IOVA-aware qemu to one that supports it.

Relocating RAM regions to after the 1Tb hole has consequences for
guest ABI because we are changing the memory mapping, so make
sure that only new machine enforce but not older ones.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 hw/i386/pc.c         | 7 +++++--
 hw/i386/pc_piix.c    | 2 ++
 hw/i386/pc_q35.c     | 2 ++
 include/hw/i386/pc.h | 1 +
 4 files changed, 10 insertions(+), 2 deletions(-)

Comments

Igor Mammedov June 16, 2022, 2:27 p.m. UTC | #1
On Fri, 20 May 2022 11:45:32 +0100
Joao Martins <joao.m.martins@oracle.com> wrote:

> The added enforcing is only relevant in the case of AMD where the
> range right before the 1TB is restricted and cannot be DMA mapped
> by the kernel consequently leading to IOMMU INVALID_DEVICE_REQUEST
> or possibly other kinds of IOMMU events in the AMD IOMMU.
> 
> Although, there's a case where it may make sense to disable the
> IOVA relocation/validation when migrating from a
> non-valid-IOVA-aware qemu to one that supports it.
> 
> Relocating RAM regions to after the 1Tb hole has consequences for
> guest ABI because we are changing the memory mapping, so make
> sure that only new machine enforce but not older ons.

is old machine with so much ram going to work and not explode
even without iommu?

> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  hw/i386/pc.c         | 7 +++++--
>  hw/i386/pc_piix.c    | 2 ++
>  hw/i386/pc_q35.c     | 2 ++
>  include/hw/i386/pc.h | 1 +
>  4 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 652ae8ff9ccf..62f9af91f19f 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -862,6 +862,7 @@ static hwaddr x86_max_phys_addr(PCMachineState *pcms,
>  static void x86_update_above_4g_mem_start(PCMachineState *pcms,
>                                            uint64_t pci_hole64_size)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>      X86MachineState *x86ms = X86_MACHINE(pcms);
>      CPUX86State *env = &X86_CPU(first_cpu)->env;
>      hwaddr start = x86ms->above_4g_mem_start;
> @@ -870,9 +871,10 @@ static void x86_update_above_4g_mem_start(PCMachineState *pcms,
>      /*
>       * The HyperTransport range close to the 1T boundary is unique to AMD
>       * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
> -     * to above 1T to AMD vCPUs only.
> +     * to above 1T to AMD vCPUs only. @enforce_valid_iova is only false in
> +     * older machine types (<= 7.0) for compatibility purposes.
>       */
> -    if (!IS_AMD_CPU(env)) {
> +    if (!IS_AMD_CPU(env) || !pcmc->enforce_valid_iova) {
>          return;
>      }
>  
> @@ -1881,6 +1883,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
>      pcmc->has_reserved_memory = true;
>      pcmc->kvmclock_enabled = true;
>      pcmc->enforce_aligned_dimm = true;
> +    pcmc->enforce_valid_iova = true;
>      /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
>       * to be used at the moment, 32K should be enough for a while.  */
>      pcmc->acpi_data_size = 0x20000 + 0x8000;
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 57bb5b8f2aea..74176a210d56 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -437,9 +437,11 @@ DEFINE_I440FX_MACHINE(v7_1, "pc-i440fx-7.1", NULL,
>  
>  static void pc_i440fx_7_0_machine_options(MachineClass *m)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>      pc_i440fx_7_1_machine_options(m);
>      m->alias = NULL;
>      m->is_default = false;
> +    pcmc->enforce_valid_iova = false;
>      compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
>      compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
>  }
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index 4d5c2fbd976b..bc38a6ba4c67 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -381,8 +381,10 @@ DEFINE_Q35_MACHINE(v7_1, "pc-q35-7.1", NULL,
>  
>  static void pc_q35_7_0_machine_options(MachineClass *m)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>      pc_q35_7_1_machine_options(m);
>      m->alias = NULL;
> +    pcmc->enforce_valid_iova = false;
>      compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
>      compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
>  }
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 9c847faea2f8..22119131eca7 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -117,6 +117,7 @@ struct PCMachineClass {
>      bool has_reserved_memory;
>      bool enforce_aligned_dimm;
>      bool broken_reserved_end;
> +    bool enforce_valid_iova;
>  
>      /* generate legacy CPU hotplug AML */
>      bool legacy_cpu_hotplug;
Joao Martins June 17, 2022, 1:36 p.m. UTC | #2
On 6/16/22 15:27, Igor Mammedov wrote:
> On Fri, 20 May 2022 11:45:32 +0100
> Joao Martins <joao.m.martins@oracle.com> wrote:
> 
>> The added enforcing is only relevant in the case of AMD where the
>> range right before the 1TB is restricted and cannot be DMA mapped
>> by the kernel consequently leading to IOMMU INVALID_DEVICE_REQUEST
>> or possibly other kinds of IOMMU events in the AMD IOMMU.
>>
>> Although, there's a case where it may make sense to disable the
>> IOVA relocation/validation when migrating from a
>> non-valid-IOVA-aware qemu to one that supports it.
>>
>> Relocating RAM regions to after the 1Tb hole has consequences for
>> guest ABI because we are changing the memory mapping, so make
>> sure that only new machine enforce but not older ons.
> 
> is old machine with so much ram going to work and not explode
> even without iommu?
> 
Depends on your definition of work.

And that's the purpose of this patch, to still allow graceful
failures on hosts with different hypervisor kernel versions that
would use versioned machine (like pc-q35-7.0 or older)

e.g. if you boot a guest with pc-q35-7.0 on a 4.19 kernel it will boot
whereas on a v5.14 kernel with same pc-q35-7.0, the memory map would
stay the same, but it would fail as a >= 5.4 kernel will validate
whether IOVA.

It will 'work' as before for old machine, meaning you are dependent on the
kernel to validate IOVAs and prevent dma maps or not. Without IOMMU enabled
you don't need this, but you also can't do VFIO (or the like vDPA)

>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>  hw/i386/pc.c         | 7 +++++--
>>  hw/i386/pc_piix.c    | 2 ++
>>  hw/i386/pc_q35.c     | 2 ++
>>  include/hw/i386/pc.h | 1 +
>>  4 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 652ae8ff9ccf..62f9af91f19f 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -862,6 +862,7 @@ static hwaddr x86_max_phys_addr(PCMachineState *pcms,
>>  static void x86_update_above_4g_mem_start(PCMachineState *pcms,
>>                                            uint64_t pci_hole64_size)
>>  {
>> +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>>      X86MachineState *x86ms = X86_MACHINE(pcms);
>>      CPUX86State *env = &X86_CPU(first_cpu)->env;
>>      hwaddr start = x86ms->above_4g_mem_start;
>> @@ -870,9 +871,10 @@ static void x86_update_above_4g_mem_start(PCMachineState *pcms,
>>      /*
>>       * The HyperTransport range close to the 1T boundary is unique to AMD
>>       * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
>> -     * to above 1T to AMD vCPUs only.
>> +     * to above 1T to AMD vCPUs only. @enforce_valid_iova is only false in
>> +     * older machine types (<= 7.0) for compatibility purposes.
>>       */
>> -    if (!IS_AMD_CPU(env)) {
>> +    if (!IS_AMD_CPU(env) || !pcmc->enforce_valid_iova) {
>>          return;
>>      }
>>  
>> @@ -1881,6 +1883,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
>>      pcmc->has_reserved_memory = true;
>>      pcmc->kvmclock_enabled = true;
>>      pcmc->enforce_aligned_dimm = true;
>> +    pcmc->enforce_valid_iova = true;
>>      /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
>>       * to be used at the moment, 32K should be enough for a while.  */
>>      pcmc->acpi_data_size = 0x20000 + 0x8000;
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 57bb5b8f2aea..74176a210d56 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -437,9 +437,11 @@ DEFINE_I440FX_MACHINE(v7_1, "pc-i440fx-7.1", NULL,
>>  
>>  static void pc_i440fx_7_0_machine_options(MachineClass *m)
>>  {
>> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>>      pc_i440fx_7_1_machine_options(m);
>>      m->alias = NULL;
>>      m->is_default = false;
>> +    pcmc->enforce_valid_iova = false;
>>      compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
>>      compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
>>  }
>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>> index 4d5c2fbd976b..bc38a6ba4c67 100644
>> --- a/hw/i386/pc_q35.c
>> +++ b/hw/i386/pc_q35.c
>> @@ -381,8 +381,10 @@ DEFINE_Q35_MACHINE(v7_1, "pc-q35-7.1", NULL,
>>  
>>  static void pc_q35_7_0_machine_options(MachineClass *m)
>>  {
>> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>>      pc_q35_7_1_machine_options(m);
>>      m->alias = NULL;
>> +    pcmc->enforce_valid_iova = false;
>>      compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
>>      compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
>>  }
>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>> index 9c847faea2f8..22119131eca7 100644
>> --- a/include/hw/i386/pc.h
>> +++ b/include/hw/i386/pc.h
>> @@ -117,6 +117,7 @@ struct PCMachineClass {
>>      bool has_reserved_memory;
>>      bool enforce_aligned_dimm;
>>      bool broken_reserved_end;
>> +    bool enforce_valid_iova;
>>  
>>      /* generate legacy CPU hotplug AML */
>>      bool legacy_cpu_hotplug;
>
diff mbox series

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 652ae8ff9ccf..62f9af91f19f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -862,6 +862,7 @@  static hwaddr x86_max_phys_addr(PCMachineState *pcms,
 static void x86_update_above_4g_mem_start(PCMachineState *pcms,
                                           uint64_t pci_hole64_size)
 {
+    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     X86MachineState *x86ms = X86_MACHINE(pcms);
     CPUX86State *env = &X86_CPU(first_cpu)->env;
     hwaddr start = x86ms->above_4g_mem_start;
@@ -870,9 +871,10 @@  static void x86_update_above_4g_mem_start(PCMachineState *pcms,
     /*
      * The HyperTransport range close to the 1T boundary is unique to AMD
      * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
-     * to above 1T to AMD vCPUs only.
+     * to above 1T to AMD vCPUs only. @enforce_valid_iova is only false in
+     * older machine types (<= 7.0) for compatibility purposes.
      */
-    if (!IS_AMD_CPU(env)) {
+    if (!IS_AMD_CPU(env) || !pcmc->enforce_valid_iova) {
         return;
     }
 
@@ -1881,6 +1883,7 @@  static void pc_machine_class_init(ObjectClass *oc, void *data)
     pcmc->has_reserved_memory = true;
     pcmc->kvmclock_enabled = true;
     pcmc->enforce_aligned_dimm = true;
+    pcmc->enforce_valid_iova = true;
     /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
      * to be used at the moment, 32K should be enough for a while.  */
     pcmc->acpi_data_size = 0x20000 + 0x8000;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 57bb5b8f2aea..74176a210d56 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -437,9 +437,11 @@  DEFINE_I440FX_MACHINE(v7_1, "pc-i440fx-7.1", NULL,
 
 static void pc_i440fx_7_0_machine_options(MachineClass *m)
 {
+    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     pc_i440fx_7_1_machine_options(m);
     m->alias = NULL;
     m->is_default = false;
+    pcmc->enforce_valid_iova = false;
     compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
     compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 4d5c2fbd976b..bc38a6ba4c67 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -381,8 +381,10 @@  DEFINE_Q35_MACHINE(v7_1, "pc-q35-7.1", NULL,
 
 static void pc_q35_7_0_machine_options(MachineClass *m)
 {
+    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     pc_q35_7_1_machine_options(m);
     m->alias = NULL;
+    pcmc->enforce_valid_iova = false;
     compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
     compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
 }
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9c847faea2f8..22119131eca7 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -117,6 +117,7 @@  struct PCMachineClass {
     bool has_reserved_memory;
     bool enforce_aligned_dimm;
     bool broken_reserved_end;
+    bool enforce_valid_iova;
 
     /* generate legacy CPU hotplug AML */
     bool legacy_cpu_hotplug;