diff mbox series

[PATCH/RFC,3/3] s390x/ais: disable ais for compat machines

Message ID 20170922083855.102341-4-borntraeger@de.ibm.com
State New
Headers show
Series ais fixups for 2.11 | expand

Commit Message

Christian Borntraeger Sept. 22, 2017, 8:38 a.m. UTC
With newer kernels that do support the ais feature (4.13) a qemu 2.11
will not only enable the ais feature for the 2.11 machine, but also
for a <=2.10 compat machine. As this feature is not available in
QEMU <=2.9 (and QEMU 2.10.1), this guest will fail to migrate
back to an older qemu like 2.9 with:

_snip_
error while loading state for instance 0x0 of device 's390-flic'
_snip_

making the whole compat machine dis-functional. As a permanent fix, we
need to fence the ais feature for machines <= 2.10

Due to ais being enabled on 2.10.0 (fixed in 2.10.1) this will prevent
migration of ais-enabled guests from 2.10.0 with

_snip_
qemu-system-s390x: Failed to load s390-flic/ais:tmp
qemu-system-s390x: error while loading state for instance 0x0 of device 's390-flic'
qemu-system-s390x: load of migration failed: Function not implemented
_snip_

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 hw/intc/s390_flic_kvm.c            |  4 +++-
 hw/s390x/s390-virtio-ccw.c         | 10 ++++++++++
 include/hw/s390x/s390-virtio-ccw.h |  3 +++
 3 files changed, 16 insertions(+), 1 deletion(-)

Comments

David Hildenbrand Sept. 26, 2017, 12:26 p.m. UTC | #1
On 22.09.2017 10:38, Christian Borntraeger wrote:
> With newer kernels that do support the ais feature (4.13) a qemu 2.11
> will not only enable the ais feature for the 2.11 machine, but also
> for a <=2.10 compat machine. As this feature is not available in
> QEMU <=2.9 (and QEMU 2.10.1), this guest will fail to migrate
> back to an older qemu like 2.9 with:
> 
> _snip_
> error while loading state for instance 0x0 of device 's390-flic'
> _snip_
> 
> making the whole compat machine dis-functional. As a permanent fix, we
> need to fence the ais feature for machines <= 2.10
> 
> Due to ais being enabled on 2.10.0 (fixed in 2.10.1) this will prevent
> migration of ais-enabled guests from 2.10.0 with
> 
> _snip_
> qemu-system-s390x: Failed to load s390-flic/ais:tmp
> qemu-system-s390x: error while loading state for instance 0x0 of device 's390-flic'
> qemu-system-s390x: load of migration failed: Function not implemented
> _snip_
> 
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> Cc: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  hw/intc/s390_flic_kvm.c            |  4 +++-
>  hw/s390x/s390-virtio-ccw.c         | 10 ++++++++++
>  include/hw/s390x/s390-virtio-ccw.h |  3 +++
>  3 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
> index a655567..2a94bfc 100644
> --- a/hw/intc/s390_flic_kvm.c
> +++ b/hw/intc/s390_flic_kvm.c
> @@ -22,6 +22,7 @@
>  #include "hw/s390x/s390_flic.h"
>  #include "hw/s390x/adapter.h"
>  #include "hw/s390x/css.h"
> +#include "hw/s390x/s390-virtio-ccw.h"
>  #include "trace.h"
>  
>  #define FLIC_SAVE_INITIAL_SIZE getpagesize()
> @@ -559,7 +560,8 @@ static void kvm_s390_flic_realize(DeviceState *dev, Error **errp)
>                                              KVM_HAS_DEVICE_ATTR, test_attr);
>      /* try enable the AIS facility */
>      test_attr.group = KVM_DEV_FLIC_AISM_ALL;
> -    if (!ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
> +    if (ais_allowed() &&
> +        !ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
>              kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_AIS, 0);
>      }
>  

Wondering if this is really necessary. Shouldn't the CPU model feature
make sure that migration works?
Christian Borntraeger Sept. 26, 2017, 12:45 p.m. UTC | #2
On 09/26/2017 02:26 PM, David Hildenbrand wrote:
> On 22.09.2017 10:38, Christian Borntraeger wrote:
>> With newer kernels that do support the ais feature (4.13) a qemu 2.11
>> will not only enable the ais feature for the 2.11 machine, but also
>> for a <=2.10 compat machine. As this feature is not available in
>> QEMU <=2.9 (and QEMU 2.10.1), this guest will fail to migrate
>> back to an older qemu like 2.9 with:
>>
>> _snip_
>> error while loading state for instance 0x0 of device 's390-flic'
>> _snip_
>>
>> making the whole compat machine dis-functional. As a permanent fix, we
>> need to fence the ais feature for machines <= 2.10
>>
>> Due to ais being enabled on 2.10.0 (fixed in 2.10.1) this will prevent
>> migration of ais-enabled guests from 2.10.0 with
>>
>> _snip_
>> qemu-system-s390x: Failed to load s390-flic/ais:tmp
>> qemu-system-s390x: error while loading state for instance 0x0 of device 's390-flic'
>> qemu-system-s390x: load of migration failed: Function not implemented
>> _snip_
>>
>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>> Cc: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>>  hw/intc/s390_flic_kvm.c            |  4 +++-
>>  hw/s390x/s390-virtio-ccw.c         | 10 ++++++++++
>>  include/hw/s390x/s390-virtio-ccw.h |  3 +++
>>  3 files changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
>> index a655567..2a94bfc 100644
>> --- a/hw/intc/s390_flic_kvm.c
>> +++ b/hw/intc/s390_flic_kvm.c
>> @@ -22,6 +22,7 @@
>>  #include "hw/s390x/s390_flic.h"
>>  #include "hw/s390x/adapter.h"
>>  #include "hw/s390x/css.h"
>> +#include "hw/s390x/s390-virtio-ccw.h"
>>  #include "trace.h"
>>  
>>  #define FLIC_SAVE_INITIAL_SIZE getpagesize()
>> @@ -559,7 +560,8 @@ static void kvm_s390_flic_realize(DeviceState *dev, Error **errp)
>>                                              KVM_HAS_DEVICE_ATTR, test_attr);
>>      /* try enable the AIS facility */
>>      test_attr.group = KVM_DEV_FLIC_AISM_ALL;
>> -    if (!ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
>> +    if (ais_allowed() &&
>> +        !ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
>>              kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_AIS, 0);
>>      }
>>  
> 
> Wondering if this is really necessary. Shouldn't the CPU model feature
> make sure that migration works?

older QEMUs complain like when migrating a 2.9 machine to 2.9

qemu-system-s390x: Failed to load s390-flic/ais:tmp
qemu-system-s390x: error while loading state for instance 0x0 of device 's390-flic'
qemu-system-s390x: load of migration failed: Function not implemented

e.g. when using the host model.
David Hildenbrand Sept. 26, 2017, 1 p.m. UTC | #3
On 26.09.2017 14:45, Christian Borntraeger wrote:
> 
> 
> On 09/26/2017 02:26 PM, David Hildenbrand wrote:
>> On 22.09.2017 10:38, Christian Borntraeger wrote:
>>> With newer kernels that do support the ais feature (4.13) a qemu 2.11
>>> will not only enable the ais feature for the 2.11 machine, but also
>>> for a <=2.10 compat machine. As this feature is not available in
>>> QEMU <=2.9 (and QEMU 2.10.1), this guest will fail to migrate
>>> back to an older qemu like 2.9 with:
>>>
>>> _snip_
>>> error while loading state for instance 0x0 of device 's390-flic'
>>> _snip_
>>>
>>> making the whole compat machine dis-functional. As a permanent fix, we
>>> need to fence the ais feature for machines <= 2.10
>>>
>>> Due to ais being enabled on 2.10.0 (fixed in 2.10.1) this will prevent
>>> migration of ais-enabled guests from 2.10.0 with
>>>
>>> _snip_
>>> qemu-system-s390x: Failed to load s390-flic/ais:tmp
>>> qemu-system-s390x: error while loading state for instance 0x0 of device 's390-flic'
>>> qemu-system-s390x: load of migration failed: Function not implemented
>>> _snip_
>>>
>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>>> Cc: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> ---
>>>  hw/intc/s390_flic_kvm.c            |  4 +++-
>>>  hw/s390x/s390-virtio-ccw.c         | 10 ++++++++++
>>>  include/hw/s390x/s390-virtio-ccw.h |  3 +++
>>>  3 files changed, 16 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
>>> index a655567..2a94bfc 100644
>>> --- a/hw/intc/s390_flic_kvm.c
>>> +++ b/hw/intc/s390_flic_kvm.c
>>> @@ -22,6 +22,7 @@
>>>  #include "hw/s390x/s390_flic.h"
>>>  #include "hw/s390x/adapter.h"
>>>  #include "hw/s390x/css.h"
>>> +#include "hw/s390x/s390-virtio-ccw.h"
>>>  #include "trace.h"
>>>  
>>>  #define FLIC_SAVE_INITIAL_SIZE getpagesize()
>>> @@ -559,7 +560,8 @@ static void kvm_s390_flic_realize(DeviceState *dev, Error **errp)
>>>                                              KVM_HAS_DEVICE_ATTR, test_attr);
>>>      /* try enable the AIS facility */
>>>      test_attr.group = KVM_DEV_FLIC_AISM_ALL;
>>> -    if (!ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
>>> +    if (ais_allowed() &&
>>> +        !ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
>>>              kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_AIS, 0);
>>>      }
>>>  
>>
>> Wondering if this is really necessary. Shouldn't the CPU model feature
>> make sure that migration works?
> 
> older QEMUs complain like when migrating a 2.9 machine to 2.9
> 
> qemu-system-s390x: Failed to load s390-flic/ais:tmp
> qemu-system-s390x: error while loading state for instance 0x0 of device 's390-flic'
> qemu-system-s390x: load of migration failed: Function not implemented
> 
> e.g. when using the host model.
> 

Wonder when we can finally let go of these hacks. The host cpu model is
not migration safe, therefore such things are expected to not work. Just
think about trying to migrate from a kernel without AIS support to a
kernel with AIS support. It is broken.

AIS is just one feature that actually tells you that you are currently
doing something evil. Other CPU features you lose on the way simply
don't result in an error, but still migration could break silently
afterwards, when the guest assumes it has certain CPU features.


The main problem is that we have machines <= 2.7 that had no CPU model
support. For these machines, migration should work just fine, as
	s390_has_feat(S390_FEAT_ADAPTER_INT_SUPPRESSION)
will always return false.
However, the guest will be presented the AIS bit, as soon as the
capability is enabled (as we then don't manually set the stfle bitmap
from QEMU), which is bad.

So my point would be: don't turn on these new facilities if the cpu
model is not allowed (<=2.7), but don't add ais_allowed() compat
handling for any newer machines. If they use the host model, they have
to assume that migration can break.

I think using cpu_model_allowed() would be just fine to be used instead
of ais_allowed().
Christian Borntraeger Sept. 26, 2017, 1:32 p.m. UTC | #4
On 09/26/2017 03:00 PM, David Hildenbrand wrote:

> 
> Wonder when we can finally let go of these hacks. The host cpu model is
> not migration safe, therefore such things are expected to not work. Just
> think about trying to migrate from a kernel without AIS support to a
> kernel with AIS support. It is broken.
> 
> AIS is just one feature that actually tells you that you are currently
> doing something evil. Other CPU features you lose on the way simply
> don't result in an error, but still migration could break silently
> afterwards, when the guest assumes it has certain CPU features.
> 
> 
> The main problem is that we have machines <= 2.7 that had no CPU model
> support. For these machines, migration should work just fine, as
> 	s390_has_feat(S390_FEAT_ADAPTER_INT_SUPPRESSION)
> will always return false.
> However, the guest will be presented the AIS bit, as soon as the
> capability is enabled (as we then don't manually set the stfle bitmap
> from QEMU), which is bad.
> 
> So my point would be: don't turn on these new facilities if the cpu
> model is not allowed (<=2.7), but don't add ais_allowed() compat
> handling for any newer machines. If they use the host model, they have
> to assume that migration can break.
> 
> I think using cpu_model_allowed() would be just fine to be used instead
> of ais_allowed().

Let me thing about that. I will send 2 (with fs->ais_supported gone) and 3
and we can then decide if we only want to apply 2 or both.

Now: regarding your -cpu host comment:

I checked all the migration stuff now with -cpu z13 and it seems that
migration is now broken for qemu 2.10->qemu 2.11 when using the s390-ccw-virtio-2.9
machine due to the PCI fencing from Conny.

qemu-system-s390x: Unknown savevm section or instance 'PCIBUS' 0
qemu-system-s390x: load of migration failed: Invalid argument
Cornelia Huck Sept. 26, 2017, 1:43 p.m. UTC | #5
On Tue, 26 Sep 2017 15:32:41 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> I checked all the migration stuff now with -cpu z13 and it seems that
> migration is now broken for qemu 2.10->qemu 2.11 when using the s390-ccw-virtio-2.9
> machine due to the PCI fencing from Conny.
> 
> qemu-system-s390x: Unknown savevm section or instance 'PCIBUS' 0
> qemu-system-s390x: load of migration failed: Invalid argument

That's unfortunate... let me look into that.
Christian Borntraeger Sept. 26, 2017, 1:45 p.m. UTC | #6
On 09/26/2017 03:32 PM, Christian Borntraeger wrote:
> 
> 
> On 09/26/2017 03:00 PM, David Hildenbrand wrote:
> 
>>
>> Wonder when we can finally let go of these hacks. The host cpu model is
>> not migration safe, therefore such things are expected to not work. Just
>> think about trying to migrate from a kernel without AIS support to a
>> kernel with AIS support. It is broken.
>>
>> AIS is just one feature that actually tells you that you are currently
>> doing something evil. Other CPU features you lose on the way simply
>> don't result in an error, but still migration could break silently
>> afterwards, when the guest assumes it has certain CPU features.
>>
>>
>> The main problem is that we have machines <= 2.7 that had no CPU model
>> support. For these machines, migration should work just fine, as
>> 	s390_has_feat(S390_FEAT_ADAPTER_INT_SUPPRESSION)
>> will always return false.
>> However, the guest will be presented the AIS bit, as soon as the
>> capability is enabled (as we then don't manually set the stfle bitmap
>> from QEMU), which is bad.
>>
>> So my point would be: don't turn on these new facilities if the cpu
>> model is not allowed (<=2.7), but don't add ais_allowed() compat
>> handling for any newer machines. If they use the host model, they have
>> to assume that migration can break.
>>
>> I think using cpu_model_allowed() would be just fine to be used instead
>> of ais_allowed().
> 
> Let me thing about that. I will send 2 (with fs->ais_supported gone) and 3
> and we can then decide if we only want to apply 2 or both.
> 
> Now: regarding your -cpu host comment:
> 
> I checked all the migration stuff now with -cpu z13 and it seems that
> migration is now broken for qemu 2.10->qemu 2.11 when using the s390-ccw-virtio-2.9
> machine due to the PCI fencing from Conny.
> 
> qemu-system-s390x: Unknown savevm section or instance 'PCIBUS' 0
> qemu-system-s390x: load of migration failed: Invalid argument
> 

A very dirty hack like this seems to do the trick

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 41b770a..3d49044 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -294,7 +294,7 @@ static void ccw_init(MachineState *machine)
                       machine->initrd_filename, "s390-ccw.img",
                       "s390-netboot.img", true);
 
-    if (s390_has_feat(S390_FEAT_ZPCI)) {
+    if (s390_has_feat(S390_FEAT_ZPCI) || !ais_allowed()) {
         DeviceState *dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
         object_property_add_child(qdev_get_machine(),
                                   TYPE_S390_PCI_HOST_BRIDGE,

Basic idea is that for compat_machines <= 2.10 we enable the PCI host bridge
(and I abused ais_allowed to check for <= 2.10). 
Not sure what to do for for CONFIG_PCI = off case, but the normal case is
broken right now.
diff mbox series

Patch

diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
index a655567..2a94bfc 100644
--- a/hw/intc/s390_flic_kvm.c
+++ b/hw/intc/s390_flic_kvm.c
@@ -22,6 +22,7 @@ 
 #include "hw/s390x/s390_flic.h"
 #include "hw/s390x/adapter.h"
 #include "hw/s390x/css.h"
+#include "hw/s390x/s390-virtio-ccw.h"
 #include "trace.h"
 
 #define FLIC_SAVE_INITIAL_SIZE getpagesize()
@@ -559,7 +560,8 @@  static void kvm_s390_flic_realize(DeviceState *dev, Error **errp)
                                             KVM_HAS_DEVICE_ATTR, test_attr);
     /* try enable the AIS facility */
     test_attr.group = KVM_DEV_FLIC_AISM_ALL;
-    if (!ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
+    if (ais_allowed() &&
+        !ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
             kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_AIS, 0);
     }
 
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 11d4dc4..ce8391b 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -435,6 +435,7 @@  static void ccw_machine_class_init(ObjectClass *oc, void *data)
     s390mc->cpu_model_allowed = true;
     s390mc->css_migration_enabled = true;
     s390mc->gs_allowed = true;
+    s390mc->ais_allowed = true;
     mc->init = ccw_init;
     mc->reset = s390_machine_reset;
     mc->hot_add_cpu = s390_hot_add_cpu;
@@ -519,6 +520,11 @@  bool gs_allowed(void)
     return get_machine_class()->gs_allowed;
 }
 
+bool ais_allowed(void)
+{
+    return get_machine_class()->ais_allowed;
+}
+
 static char *machine_get_loadparm(Object *obj, Error **errp)
 {
     S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
@@ -742,6 +748,10 @@  static void ccw_machine_2_10_instance_options(MachineState *machine)
 
 static void ccw_machine_2_10_class_options(MachineClass *mc)
 {
+    S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
+
+    s390mc->ais_allowed = false;
+
     ccw_machine_2_11_class_options(mc);
     SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_10);
 }
diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
index a9a90c2..d25a98f 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -41,6 +41,7 @@  typedef struct S390CcwMachineClass {
     bool cpu_model_allowed;
     bool css_migration_enabled;
     bool gs_allowed;
+    bool ais_allowed;
 } S390CcwMachineClass;
 
 /* runtime-instrumentation allowed by the machine */
@@ -49,6 +50,8 @@  bool ri_allowed(void);
 bool cpu_model_allowed(void);
 /* guarded-storage allowed by the machine */
 bool gs_allowed(void);
+/* ais allowed by the machine */
+bool ais_allowed(void);
 
 /**
  * Returns true if (vmstate based) migration of the channel subsystem