[RFC,v3,05/15] hw/arm/virt: handle max_vm_phys_shift conflicts on migration
diff mbox series

Message ID 1530602398-16127-6-git-send-email-eric.auger@redhat.com
State New
Headers show
Series
  • ARM virt: PCDIMM/NVDIMM at 2TB
Related show

Commit Message

Auger Eric July 3, 2018, 7:19 a.m. UTC
When migrating a VM, we must make sure the destination host
supports as many IPA bits as the source. Otherwise the migration
must fail.

We add a VMState infrastructure to machvirt. On pre_save(),
the current source max_vm_phys_shift is saved.

On destination, we cannot use this information when creating the
VM. The VM is created using the max value reported by the
destination host - or the kvm_type inherited value -. However on
post_load() we can check that this value is compatible with the
source saved value.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 hw/arm/virt.c         | 37 +++++++++++++++++++++++++++++++++++++
 include/hw/arm/virt.h |  2 ++
 2 files changed, 39 insertions(+)

Comments

David Hildenbrand July 3, 2018, 6:41 p.m. UTC | #1
On 03.07.2018 09:19, Eric Auger wrote:
> When migrating a VM, we must make sure the destination host
> supports as many IPA bits as the source. Otherwise the migration
> must fail.
> 
> We add a VMState infrastructure to machvirt. On pre_save(),
> the current source max_vm_phys_shift is saved.
> 
> On destination, we cannot use this information when creating the
> VM. The VM is created using the max value reported by the
> destination host - or the kvm_type inherited value -. However on
> post_load() we can check that this value is compatible with the
> source saved value.

Just wondering, how exactly is the guest able to detect the 42b (e.g. vs
42b) configuration?

> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> ---
>  hw/arm/virt.c         | 37 +++++++++++++++++++++++++++++++++++++
>  include/hw/arm/virt.h |  2 ++
>  2 files changed, 39 insertions(+)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 04a32de..5a4d0bf 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1316,6 +1316,40 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
>      return arm_cpu_mp_affinity(idx, clustersz);
>  }
>  
> +static int virt_post_load(void *opaque, int version_id)
> +{
> +    VirtMachineState *vms = (VirtMachineState *)opaque;
> +
> +    if (vms->max_vm_phys_shift < vms->source_max_vm_phys_shift) {
> +        error_report("This host kernel only supports %d IPA bits whereas "
> +                     "the guest requires %d GPA bits", vms->max_vm_phys_shift,
> +                     vms->source_max_vm_phys_shift);
> +        return -1;
> +    }
> +    return 0;
> +}
> +
> +static int virt_pre_save(void *opaque)
> +{
> +    VirtMachineState *vms = (VirtMachineState *)opaque;
> +
> +    vms->source_max_vm_phys_shift = vms->max_vm_phys_shift;
> +    return 0;
> +}
> +
> +static const VMStateDescription vmstate_virt = {
> +    .name = "virt",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .post_load = virt_post_load,
> +    .pre_save = virt_pre_save,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_INT32(source_max_vm_phys_shift, VirtMachineState),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
> +
>  static void machvirt_init(MachineState *machine)
>  {
>      VirtMachineState *vms = VIRT_MACHINE(machine);
> @@ -1537,6 +1571,7 @@ static void machvirt_init(MachineState *machine)
>  
>      vms->machine_done.notify = virt_machine_done;
>      qemu_add_machine_init_done_notifier(&vms->machine_done);
> +    vmstate_register(NULL, 0, &vmstate_virt, vms);
>  }
>  
>  static bool virt_get_secure(Object *obj, Error **errp)
> @@ -1727,6 +1762,7 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
>  
>  static int virt_kvm_type(MachineState *ms, const char *type_str)
>  {
> +    VirtMachineState *vms = VIRT_MACHINE(ms);
>      int max_vm_phys_shift, ret = 0;
>      uint64_t type;
>  
> @@ -1747,6 +1783,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
>      }
>      ret = max_vm_phys_shift;
>  out:
> +    vms->max_vm_phys_shift = (max_vm_phys_shift > 0) ? ret : 40;
>      return ret;
>  }
>  
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index 1a90ffc..91f6de2 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -125,6 +125,8 @@ typedef struct {
>      uint32_t iommu_phandle;
>      int psci_conduit;
>      char *kvm_type;
> +    int32_t max_vm_phys_shift;
> +    int32_t source_max_vm_phys_shift;
>  } VirtMachineState;
>  
>  #define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM)
>
Auger Eric July 3, 2018, 7:32 p.m. UTC | #2
Hi David,
On 07/03/2018 08:41 PM, David Hildenbrand wrote:
> On 03.07.2018 09:19, Eric Auger wrote:
>> When migrating a VM, we must make sure the destination host
>> supports as many IPA bits as the source. Otherwise the migration
>> must fail.
>>
>> We add a VMState infrastructure to machvirt. On pre_save(),
>> the current source max_vm_phys_shift is saved.
>>
>> On destination, we cannot use this information when creating the
>> VM. The VM is created using the max value reported by the
>> destination host - or the kvm_type inherited value -. However on
>> post_load() we can check that this value is compatible with the
>> source saved value.
> 
> Just wondering, how exactly is the guest able to detect the 42b (e.g. vs
> 42b) configuration?

the source IPA size is saved in the VMState. When restoring it on
post_load we check against the current IPA size (corresponding to the
max the destination KVM does support). The destination IPA size is
chosen before creating the destination VM. If the destination IPA size
is less than the source IPA size, we fail the migration.

Hope this helps

Thanks

Eric

> 
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> ---
>>  hw/arm/virt.c         | 37 +++++++++++++++++++++++++++++++++++++
>>  include/hw/arm/virt.h |  2 ++
>>  2 files changed, 39 insertions(+)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 04a32de..5a4d0bf 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -1316,6 +1316,40 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
>>      return arm_cpu_mp_affinity(idx, clustersz);
>>  }
>>  
>> +static int virt_post_load(void *opaque, int version_id)
>> +{
>> +    VirtMachineState *vms = (VirtMachineState *)opaque;
>> +
>> +    if (vms->max_vm_phys_shift < vms->source_max_vm_phys_shift) {
>> +        error_report("This host kernel only supports %d IPA bits whereas "
>> +                     "the guest requires %d GPA bits", vms->max_vm_phys_shift,
>> +                     vms->source_max_vm_phys_shift);
>> +        return -1;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int virt_pre_save(void *opaque)
>> +{
>> +    VirtMachineState *vms = (VirtMachineState *)opaque;
>> +
>> +    vms->source_max_vm_phys_shift = vms->max_vm_phys_shift;
>> +    return 0;
>> +}
>> +
>> +static const VMStateDescription vmstate_virt = {
>> +    .name = "virt",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .post_load = virt_post_load,
>> +    .pre_save = virt_pre_save,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_INT32(source_max_vm_phys_shift, VirtMachineState),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>> +
>>  static void machvirt_init(MachineState *machine)
>>  {
>>      VirtMachineState *vms = VIRT_MACHINE(machine);
>> @@ -1537,6 +1571,7 @@ static void machvirt_init(MachineState *machine)
>>  
>>      vms->machine_done.notify = virt_machine_done;
>>      qemu_add_machine_init_done_notifier(&vms->machine_done);
>> +    vmstate_register(NULL, 0, &vmstate_virt, vms);
>>  }
>>  
>>  static bool virt_get_secure(Object *obj, Error **errp)
>> @@ -1727,6 +1762,7 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
>>  
>>  static int virt_kvm_type(MachineState *ms, const char *type_str)
>>  {
>> +    VirtMachineState *vms = VIRT_MACHINE(ms);
>>      int max_vm_phys_shift, ret = 0;
>>      uint64_t type;
>>  
>> @@ -1747,6 +1783,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
>>      }
>>      ret = max_vm_phys_shift;
>>  out:
>> +    vms->max_vm_phys_shift = (max_vm_phys_shift > 0) ? ret : 40;
>>      return ret;
>>  }
>>  
>> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
>> index 1a90ffc..91f6de2 100644
>> --- a/include/hw/arm/virt.h
>> +++ b/include/hw/arm/virt.h
>> @@ -125,6 +125,8 @@ typedef struct {
>>      uint32_t iommu_phandle;
>>      int psci_conduit;
>>      char *kvm_type;
>> +    int32_t max_vm_phys_shift;
>> +    int32_t source_max_vm_phys_shift;
>>  } VirtMachineState;
>>  
>>  #define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM)
>>
> 
>
David Hildenbrand July 4, 2018, 11:53 a.m. UTC | #3
On 03.07.2018 21:32, Auger Eric wrote:
> Hi David,
> On 07/03/2018 08:41 PM, David Hildenbrand wrote:
>> On 03.07.2018 09:19, Eric Auger wrote:
>>> When migrating a VM, we must make sure the destination host
>>> supports as many IPA bits as the source. Otherwise the migration
>>> must fail.
>>>
>>> We add a VMState infrastructure to machvirt. On pre_save(),
>>> the current source max_vm_phys_shift is saved.
>>>
>>> On destination, we cannot use this information when creating the
>>> VM. The VM is created using the max value reported by the
>>> destination host - or the kvm_type inherited value -. However on
>>> post_load() we can check that this value is compatible with the
>>> source saved value.
>>
>> Just wondering, how exactly is the guest able to detect the 42b (e.g. vs
>> 42b) configuration?
> 
> the source IPA size is saved in the VMState. When restoring it on
> post_load we check against the current IPA size (corresponding to the
> max the destination KVM does support). The destination IPA size is
> chosen before creating the destination VM. If the destination IPA size
> is less than the source IPA size, we fail the migration.
> 
> Hope this helps

No, I asked if the *guest* is able to distinguish e.g. 43 from 44 or if
the device memory setup is sufficient.

Once you create the machine, you setup device memory (using the maxmem
parameter).

From that, you directly know how big the largest guest physical address
will be (e.g. 2TB + (maxram_size - ram_size)). You can check that
against max_vm_phys_shift and error out.

During migration, source and destination have to have the same qemu
cmdline, especially same maxmem parameter. So you would catch an invalid
setup on the destination, without manually migrating and checking
max_vm_phys_shift.

However (that's why I am asking) if the guest can spot the difference
between e.g. 43 and 44, then you should migrate and check. If it is
implicitly handled by device memory position and size, you should not
migrate it.

> 
> Thanks
> 
> Eric
> 
>>
Auger Eric July 4, 2018, 12:50 p.m. UTC | #4
Hi David,
On 07/04/2018 01:53 PM, David Hildenbrand wrote:
> On 03.07.2018 21:32, Auger Eric wrote:
>> Hi David,
>> On 07/03/2018 08:41 PM, David Hildenbrand wrote:
>>> On 03.07.2018 09:19, Eric Auger wrote:
>>>> When migrating a VM, we must make sure the destination host
>>>> supports as many IPA bits as the source. Otherwise the migration
>>>> must fail.
>>>>
>>>> We add a VMState infrastructure to machvirt. On pre_save(),
>>>> the current source max_vm_phys_shift is saved.
>>>>
>>>> On destination, we cannot use this information when creating the
>>>> VM. The VM is created using the max value reported by the
>>>> destination host - or the kvm_type inherited value -. However on
>>>> post_load() we can check that this value is compatible with the
>>>> source saved value.
>>>
>>> Just wondering, how exactly is the guest able to detect the 42b (e.g. vs
>>> 42b) configuration?
>>
>> the source IPA size is saved in the VMState. When restoring it on
>> post_load we check against the current IPA size (corresponding to the
>> max the destination KVM does support). The destination IPA size is
>> chosen before creating the destination VM. If the destination IPA size
>> is less than the source IPA size, we fail the migration.
>>
>> Hope this helps
> 
> No, I asked if the *guest* is able to distinguish e.g. 43 from 44 or if
> the device memory setup is sufficient.
> 
> Once you create the machine, you setup device memory (using the maxmem
> parameter).
> 
> From that, you directly know how big the largest guest physical address
> will be (e.g. 2TB + (maxram_size - ram_size)). You can check that
> against max_vm_phys_shift and error out.

Ah OK I didn't catch your question. Yes indeed you method is simpler. At
the moment I don't think the guest can make any difference. But the
guest sees the CPU PARange which is fixed currently, as far as I
understand it; also the guest is GPA limited at compilation time with a
given CONFIG_ARM64_PA_BITS_=X config.

So we come back to Dave's remark, if we make CPU PARange match the
max_vm_phys_shift and make the former dynamic, then the guest can see it.

Thanks

Eric
> 
> During migration, source and destination have to have the same qemu
> cmdline, especially same maxmem parameter. So you would catch an invalid
> setup on the destination, without manually migrating and checking
> max_vm_phys_shift.
> 
> However (that's why I am asking) if the guest can spot the difference
> between e.g. 43 and 44, then you should migrate and check. If it is
> implicitly handled by device memory position and size, you should not
> migrate it.
> 
>>
>> Thanks
>>
>> Eric
>>
>>>
> 
>

Patch
diff mbox series

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 04a32de..5a4d0bf 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1316,6 +1316,40 @@  static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
     return arm_cpu_mp_affinity(idx, clustersz);
 }
 
+static int virt_post_load(void *opaque, int version_id)
+{
+    VirtMachineState *vms = (VirtMachineState *)opaque;
+
+    if (vms->max_vm_phys_shift < vms->source_max_vm_phys_shift) {
+        error_report("This host kernel only supports %d IPA bits whereas "
+                     "the guest requires %d GPA bits", vms->max_vm_phys_shift,
+                     vms->source_max_vm_phys_shift);
+        return -1;
+    }
+    return 0;
+}
+
+static int virt_pre_save(void *opaque)
+{
+    VirtMachineState *vms = (VirtMachineState *)opaque;
+
+    vms->source_max_vm_phys_shift = vms->max_vm_phys_shift;
+    return 0;
+}
+
+static const VMStateDescription vmstate_virt = {
+    .name = "virt",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = virt_post_load,
+    .pre_save = virt_pre_save,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(source_max_vm_phys_shift, VirtMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+
 static void machvirt_init(MachineState *machine)
 {
     VirtMachineState *vms = VIRT_MACHINE(machine);
@@ -1537,6 +1571,7 @@  static void machvirt_init(MachineState *machine)
 
     vms->machine_done.notify = virt_machine_done;
     qemu_add_machine_init_done_notifier(&vms->machine_done);
+    vmstate_register(NULL, 0, &vmstate_virt, vms);
 }
 
 static bool virt_get_secure(Object *obj, Error **errp)
@@ -1727,6 +1762,7 @@  static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
 
 static int virt_kvm_type(MachineState *ms, const char *type_str)
 {
+    VirtMachineState *vms = VIRT_MACHINE(ms);
     int max_vm_phys_shift, ret = 0;
     uint64_t type;
 
@@ -1747,6 +1783,7 @@  static int virt_kvm_type(MachineState *ms, const char *type_str)
     }
     ret = max_vm_phys_shift;
 out:
+    vms->max_vm_phys_shift = (max_vm_phys_shift > 0) ? ret : 40;
     return ret;
 }
 
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 1a90ffc..91f6de2 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -125,6 +125,8 @@  typedef struct {
     uint32_t iommu_phandle;
     int psci_conduit;
     char *kvm_type;
+    int32_t max_vm_phys_shift;
+    int32_t source_max_vm_phys_shift;
 } VirtMachineState;
 
 #define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM)