diff mbox

[qom-cpu,2/2] i386: disable PMU passthrough mode by default

Message ID 1374521135-30404-3-git-send-email-ehabkost@redhat.com
State New
Headers show

Commit Message

Eduardo Habkost July 22, 2013, 7:25 p.m. UTC
Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
for CPUID leaf 0xA and passes them directly to the guest. This makes
the guest ABI depend on host kernel and host CPU capabilities, and
breaks live migration if we migrate between host with different
capabilities (e.g. different number of PMU counters).

This patch adds a "pmu-passthrough" property to X86CPU, and set it to
true only on "-cpu host", or on pc-*-1.5 and older machine-types.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
 include/hw/i386/pc.h  |  4 ++++
 target-i386/cpu-qom.h |  7 +++++++
 target-i386/cpu.c     | 11 ++++++++++-
 3 files changed, 21 insertions(+), 1 deletion(-)

Comments

Igor Mammedov July 23, 2013, 6:01 a.m. UTC | #1
On Mon, 22 Jul 2013 16:25:35 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
> for CPUID leaf 0xA and passes them directly to the guest. This makes
> the guest ABI depend on host kernel and host CPU capabilities, and
> breaks live migration if we migrate between host with different
> capabilities (e.g. different number of PMU counters).
> 
> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
> 
> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> ---
>  include/hw/i386/pc.h  |  4 ++++
>  target-i386/cpu-qom.h |  7 +++++++
>  target-i386/cpu.c     | 11 ++++++++++-
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 7fb97b0..3cea83f 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -235,6 +235,10 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
>              .driver   = "virtio-net-pci",\
>              .property = "any_layout",\
>              .value    = "off",\
> +        },{\
> +            .driver = TYPE_X86_CPU,\
> +            .property = "pmu-passthrough",\
> +            .value = "on",\
>          }
>  
>  #define PC_COMPAT_1_4 \
> diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
> index 7e55e5f..b505a45 100644
> --- a/target-i386/cpu-qom.h
> +++ b/target-i386/cpu-qom.h
> @@ -68,6 +68,13 @@ typedef struct X86CPU {
>  
>      /* Features that were filtered out because of missing host capabilities */
>      uint32_t filtered_features[FEATURE_WORDS];
> +
> +    /* Pass all PMU CPUID bits to the guest directly from GET_SUPPORTED_CPUID.
> +     * This can't be enabled by default because it breaks live-migration,
> +     * as it makes the guest ABI change depending on host CPU/kernel
> +     * capabilities.
> +     */
> +    bool pmu_passthrough;
>  } X86CPU;
>  
>  static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 41c81af..e192f63 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -1475,17 +1475,25 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, void *opaque,
>      error_propagate(errp, err);
>  }
>  
> +static Property cpu_x86_properties[] = {
> +    DEFINE_PROP_BOOL("pmu-passthrough", X86CPU, pmu_passthrough, false),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static int cpu_x86_find_by_name(X86CPU *cpu, x86_def_t *x86_cpu_def,
>                                  const char *name)
>  {
>      x86_def_t *def;
>      int i;
> +    Error *err = NULL;
>  
>      if (name == NULL) {
>          return -1;
>      }
>      if (kvm_enabled() && strcmp(name, "host") == 0) {
>          kvm_cpu_fill_host(x86_cpu_def);
> +        object_property_set_bool(OBJECT(cpu), true, "pmu-passthrough", &err);
> +        assert_no_error(err);
Could this hunk be implemented using compat props?
That would spare us dealing with it later.

>          return 0;
>      }
>  
> @@ -2017,7 +2025,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>          break;
>      case 0xA:
>          /* Architectural Performance Monitoring Leaf */
> -        if (kvm_enabled()) {
> +        if (kvm_enabled() && cpu->pmu_passthrough) {
>              KVMState *s = cs->kvm_state;
>  
>              *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
> @@ -2516,6 +2524,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
>      xcc->parent_realize = dc->realize;
>      dc->realize = x86_cpu_realizefn;
>      dc->bus_type = TYPE_ICC_BUS;
> +    dc->props = cpu_x86_properties;
>  
>      xcc->parent_reset = cc->reset;
>      cc->reset = x86_cpu_reset;
Paolo Bonzini July 23, 2013, 9:18 a.m. UTC | #2
Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
> for CPUID leaf 0xA and passes them directly to the guest. This makes
> the guest ABI depend on host kernel and host CPU capabilities, and
> breaks live migration if we migrate between host with different
> capabilities (e.g. different number of PMU counters).
> 
> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
> true only on "-cpu host", or on pc-*-1.5 and older machine-types.

Can we just call the property "pmu"?  It doesn't have to be passthough.

Later we can support setting the right values for leaf 0xA.  This way
migration will work even for -cpu other than "-cpu host", and the same
option will work.

Paolo

> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> ---
>  include/hw/i386/pc.h  |  4 ++++
>  target-i386/cpu-qom.h |  7 +++++++
>  target-i386/cpu.c     | 11 ++++++++++-
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 7fb97b0..3cea83f 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -235,6 +235,10 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
>              .driver   = "virtio-net-pci",\
>              .property = "any_layout",\
>              .value    = "off",\
> +        },{\
> +            .driver = TYPE_X86_CPU,\
> +            .property = "pmu-passthrough",\
> +            .value = "on",\
>          }
>  
>  #define PC_COMPAT_1_4 \
> diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
> index 7e55e5f..b505a45 100644
> --- a/target-i386/cpu-qom.h
> +++ b/target-i386/cpu-qom.h
> @@ -68,6 +68,13 @@ typedef struct X86CPU {
>  
>      /* Features that were filtered out because of missing host capabilities */
>      uint32_t filtered_features[FEATURE_WORDS];
> +
> +    /* Pass all PMU CPUID bits to the guest directly from GET_SUPPORTED_CPUID.
> +     * This can't be enabled by default because it breaks live-migration,
> +     * as it makes the guest ABI change depending on host CPU/kernel
> +     * capabilities.
> +     */
> +    bool pmu_passthrough;
>  } X86CPU;
>  
>  static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 41c81af..e192f63 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -1475,17 +1475,25 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, void *opaque,
>      error_propagate(errp, err);
>  }
>  
> +static Property cpu_x86_properties[] = {
> +    DEFINE_PROP_BOOL("pmu-passthrough", X86CPU, pmu_passthrough, false),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static int cpu_x86_find_by_name(X86CPU *cpu, x86_def_t *x86_cpu_def,
>                                  const char *name)
>  {
>      x86_def_t *def;
>      int i;
> +    Error *err = NULL;
>  
>      if (name == NULL) {
>          return -1;
>      }
>      if (kvm_enabled() && strcmp(name, "host") == 0) {
>          kvm_cpu_fill_host(x86_cpu_def);
> +        object_property_set_bool(OBJECT(cpu), true, "pmu-passthrough", &err);
> +        assert_no_error(err);
>          return 0;
>      }
>  
> @@ -2017,7 +2025,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>          break;
>      case 0xA:
>          /* Architectural Performance Monitoring Leaf */
> -        if (kvm_enabled()) {
> +        if (kvm_enabled() && cpu->pmu_passthrough) {
>              KVMState *s = cs->kvm_state;
>  
>              *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
> @@ -2516,6 +2524,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
>      xcc->parent_realize = dc->realize;
>      dc->realize = x86_cpu_realizefn;
>      dc->bus_type = TYPE_ICC_BUS;
> +    dc->props = cpu_x86_properties;
>  
>      xcc->parent_reset = cc->reset;
>      cc->reset = x86_cpu_reset;
>
Eduardo Habkost July 23, 2013, 2:13 p.m. UTC | #3
On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
> > Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
> > for CPUID leaf 0xA and passes them directly to the guest. This makes
> > the guest ABI depend on host kernel and host CPU capabilities, and
> > breaks live migration if we migrate between host with different
> > capabilities (e.g. different number of PMU counters).
> > 
> > This patch adds a "pmu-passthrough" property to X86CPU, and set it to
> > true only on "-cpu host", or on pc-*-1.5 and older machine-types.
> 
> Can we just call the property "pmu"?  It doesn't have to be passthough.

Yes, but the only options we have today are "no PMU" and "passthrough
PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
implicitly (I don't want things that break live-migration to be enabled
without making it explicit that it is a host-dependent/passthrough
mode).

I considered creating a property named "pmu" and use "pmu=host" to
enable the current passthrough behavior, but:

> 
> Later we can support setting the right values for leaf 0xA.  This way
> migration will work even for -cpu other than "-cpu host", and the same
> option will work.

Yes. I just don't know what will be the best way to specify the PMU
counters in the command-line/properties when we do it, so I thought it
would be better to create a "pmu" property only after we decide how
exactly it will look like.

> 
> Paolo
> 
> > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> > ---
> >  include/hw/i386/pc.h  |  4 ++++
> >  target-i386/cpu-qom.h |  7 +++++++
> >  target-i386/cpu.c     | 11 ++++++++++-
> >  3 files changed, 21 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > index 7fb97b0..3cea83f 100644
> > --- a/include/hw/i386/pc.h
> > +++ b/include/hw/i386/pc.h
> > @@ -235,6 +235,10 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
> >              .driver   = "virtio-net-pci",\
> >              .property = "any_layout",\
> >              .value    = "off",\
> > +        },{\
> > +            .driver = TYPE_X86_CPU,\
> > +            .property = "pmu-passthrough",\
> > +            .value = "on",\
> >          }
> >  
> >  #define PC_COMPAT_1_4 \
> > diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
> > index 7e55e5f..b505a45 100644
> > --- a/target-i386/cpu-qom.h
> > +++ b/target-i386/cpu-qom.h
> > @@ -68,6 +68,13 @@ typedef struct X86CPU {
> >  
> >      /* Features that were filtered out because of missing host capabilities */
> >      uint32_t filtered_features[FEATURE_WORDS];
> > +
> > +    /* Pass all PMU CPUID bits to the guest directly from GET_SUPPORTED_CPUID.
> > +     * This can't be enabled by default because it breaks live-migration,
> > +     * as it makes the guest ABI change depending on host CPU/kernel
> > +     * capabilities.
> > +     */
> > +    bool pmu_passthrough;
> >  } X86CPU;
> >  
> >  static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
> > diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> > index 41c81af..e192f63 100644
> > --- a/target-i386/cpu.c
> > +++ b/target-i386/cpu.c
> > @@ -1475,17 +1475,25 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, void *opaque,
> >      error_propagate(errp, err);
> >  }
> >  
> > +static Property cpu_x86_properties[] = {
> > +    DEFINE_PROP_BOOL("pmu-passthrough", X86CPU, pmu_passthrough, false),
> > +    DEFINE_PROP_END_OF_LIST(),
> > +};
> > +
> >  static int cpu_x86_find_by_name(X86CPU *cpu, x86_def_t *x86_cpu_def,
> >                                  const char *name)
> >  {
> >      x86_def_t *def;
> >      int i;
> > +    Error *err = NULL;
> >  
> >      if (name == NULL) {
> >          return -1;
> >      }
> >      if (kvm_enabled() && strcmp(name, "host") == 0) {
> >          kvm_cpu_fill_host(x86_cpu_def);
> > +        object_property_set_bool(OBJECT(cpu), true, "pmu-passthrough", &err);
> > +        assert_no_error(err);
> >          return 0;
> >      }
> >  
> > @@ -2017,7 +2025,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >          break;
> >      case 0xA:
> >          /* Architectural Performance Monitoring Leaf */
> > -        if (kvm_enabled()) {
> > +        if (kvm_enabled() && cpu->pmu_passthrough) {
> >              KVMState *s = cs->kvm_state;
> >  
> >              *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
> > @@ -2516,6 +2524,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
> >      xcc->parent_realize = dc->realize;
> >      dc->realize = x86_cpu_realizefn;
> >      dc->bus_type = TYPE_ICC_BUS;
> > +    dc->props = cpu_x86_properties;
> >  
> >      xcc->parent_reset = cc->reset;
> >      cc->reset = x86_cpu_reset;
> > 
>
Eduardo Habkost July 23, 2013, 2:18 p.m. UTC | #4
On Tue, Jul 23, 2013 at 08:01:29AM +0200, Igor Mammedov wrote:
> On Mon, 22 Jul 2013 16:25:35 -0300
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
> > Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
> > for CPUID leaf 0xA and passes them directly to the guest. This makes
> > the guest ABI depend on host kernel and host CPU capabilities, and
> > breaks live migration if we migrate between host with different
> > capabilities (e.g. different number of PMU counters).
> > 
> > This patch adds a "pmu-passthrough" property to X86CPU, and set it to
> > true only on "-cpu host", or on pc-*-1.5 and older machine-types.
> > 
> > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> > ---
> >  include/hw/i386/pc.h  |  4 ++++
> >  target-i386/cpu-qom.h |  7 +++++++
> >  target-i386/cpu.c     | 11 ++++++++++-
> >  3 files changed, 21 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > index 7fb97b0..3cea83f 100644
> > --- a/include/hw/i386/pc.h
> > +++ b/include/hw/i386/pc.h
> > @@ -235,6 +235,10 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
> >              .driver   = "virtio-net-pci",\
> >              .property = "any_layout",\
> >              .value    = "off",\
> > +        },{\
> > +            .driver = TYPE_X86_CPU,\
> > +            .property = "pmu-passthrough",\
> > +            .value = "on",\
> >          }
> >  
> >  #define PC_COMPAT_1_4 \
> > diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
> > index 7e55e5f..b505a45 100644
> > --- a/target-i386/cpu-qom.h
> > +++ b/target-i386/cpu-qom.h
> > @@ -68,6 +68,13 @@ typedef struct X86CPU {
> >  
> >      /* Features that were filtered out because of missing host capabilities */
> >      uint32_t filtered_features[FEATURE_WORDS];
> > +
> > +    /* Pass all PMU CPUID bits to the guest directly from GET_SUPPORTED_CPUID.
> > +     * This can't be enabled by default because it breaks live-migration,
> > +     * as it makes the guest ABI change depending on host CPU/kernel
> > +     * capabilities.
> > +     */
> > +    bool pmu_passthrough;
> >  } X86CPU;
> >  
> >  static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
> > diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> > index 41c81af..e192f63 100644
> > --- a/target-i386/cpu.c
> > +++ b/target-i386/cpu.c
> > @@ -1475,17 +1475,25 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, void *opaque,
> >      error_propagate(errp, err);
> >  }
> >  
> > +static Property cpu_x86_properties[] = {
> > +    DEFINE_PROP_BOOL("pmu-passthrough", X86CPU, pmu_passthrough, false),
> > +    DEFINE_PROP_END_OF_LIST(),
> > +};
> > +
> >  static int cpu_x86_find_by_name(X86CPU *cpu, x86_def_t *x86_cpu_def,
> >                                  const char *name)
> >  {
> >      x86_def_t *def;
> >      int i;
> > +    Error *err = NULL;
> >  
> >      if (name == NULL) {
> >          return -1;
> >      }
> >      if (kvm_enabled() && strcmp(name, "host") == 0) {
> >          kvm_cpu_fill_host(x86_cpu_def);
> > +        object_property_set_bool(OBJECT(cpu), true, "pmu-passthrough", &err);
> > +        assert_no_error(err);
> Could this hunk be implemented using compat props?
> That would spare us dealing with it later.

Oh, I forgot we already have the qdev_prop_set_globals_for_type() hack
that would allow us to use model-specific compat_props.

But I never expected to have a "compat" property that will get set on
_all_ machine-types ("-cpu host" will have pmu-passthrough=true on all
machine-types). Would it make sense to do it?

Normally on cases like this we would just set a property default on the
host-x86-cpu class. But we still don't have the per-CPU-model X86CPU
subclasses, so today the defaults are coded in the init functions.

> 
> >          return 0;
> >      }
> >  
> > @@ -2017,7 +2025,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >          break;
> >      case 0xA:
> >          /* Architectural Performance Monitoring Leaf */
> > -        if (kvm_enabled()) {
> > +        if (kvm_enabled() && cpu->pmu_passthrough) {
> >              KVMState *s = cs->kvm_state;
> >  
> >              *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
> > @@ -2516,6 +2524,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
> >      xcc->parent_realize = dc->realize;
> >      dc->realize = x86_cpu_realizefn;
> >      dc->bus_type = TYPE_ICC_BUS;
> > +    dc->props = cpu_x86_properties;
> >  
> >      xcc->parent_reset = cc->reset;
> >      cc->reset = x86_cpu_reset;
>
Paolo Bonzini July 23, 2013, 3:09 p.m. UTC | #5
Il 23/07/2013 16:13, Eduardo Habkost ha scritto:
> On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
>> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
>>> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
>>> for CPUID leaf 0xA and passes them directly to the guest. This makes
>>> the guest ABI depend on host kernel and host CPU capabilities, and
>>> breaks live migration if we migrate between host with different
>>> capabilities (e.g. different number of PMU counters).
>>>
>>> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
>>> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
>>
>> Can we just call the property "pmu"?  It doesn't have to be passthough.
> 
> Yes, but the only options we have today are "no PMU" and "passthrough
> PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
> implicitly (I don't want things that break live-migration to be enabled
> without making it explicit that it is a host-dependent/passthrough
> mode).

I think "passthrough PMU" should be considered a bug except of course
with "-cpu host".

If "-cpu Nehalem,pmu=on" goes from passthrough to Nehalem-compatible in
a future QEMU release, that'll be a bugfix.

Paolo

> I considered creating a property named "pmu" and use "pmu=host" to
> enable the current passthrough behavior, but:
> 
>>
>> Later we can support setting the right values for leaf 0xA.  This way
>> migration will work even for -cpu other than "-cpu host", and the same
>> option will work.
> 
> Yes. I just don't know what will be the best way to specify the PMU
> counters in the command-line/properties when we do it, so I thought it
> would be better to create a "pmu" property only after we decide how
> exactly it will look like.
> 
>>
>> Paolo
>>
>>> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
>>> ---
>>>  include/hw/i386/pc.h  |  4 ++++
>>>  target-i386/cpu-qom.h |  7 +++++++
>>>  target-i386/cpu.c     | 11 ++++++++++-
>>>  3 files changed, 21 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>>> index 7fb97b0..3cea83f 100644
>>> --- a/include/hw/i386/pc.h
>>> +++ b/include/hw/i386/pc.h
>>> @@ -235,6 +235,10 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
>>>              .driver   = "virtio-net-pci",\
>>>              .property = "any_layout",\
>>>              .value    = "off",\
>>> +        },{\
>>> +            .driver = TYPE_X86_CPU,\
>>> +            .property = "pmu-passthrough",\
>>> +            .value = "on",\
>>>          }
>>>  
>>>  #define PC_COMPAT_1_4 \
>>> diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
>>> index 7e55e5f..b505a45 100644
>>> --- a/target-i386/cpu-qom.h
>>> +++ b/target-i386/cpu-qom.h
>>> @@ -68,6 +68,13 @@ typedef struct X86CPU {
>>>  
>>>      /* Features that were filtered out because of missing host capabilities */
>>>      uint32_t filtered_features[FEATURE_WORDS];
>>> +
>>> +    /* Pass all PMU CPUID bits to the guest directly from GET_SUPPORTED_CPUID.
>>> +     * This can't be enabled by default because it breaks live-migration,
>>> +     * as it makes the guest ABI change depending on host CPU/kernel
>>> +     * capabilities.
>>> +     */
>>> +    bool pmu_passthrough;
>>>  } X86CPU;
>>>  
>>>  static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
>>> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
>>> index 41c81af..e192f63 100644
>>> --- a/target-i386/cpu.c
>>> +++ b/target-i386/cpu.c
>>> @@ -1475,17 +1475,25 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, void *opaque,
>>>      error_propagate(errp, err);
>>>  }
>>>  
>>> +static Property cpu_x86_properties[] = {
>>> +    DEFINE_PROP_BOOL("pmu-passthrough", X86CPU, pmu_passthrough, false),
>>> +    DEFINE_PROP_END_OF_LIST(),
>>> +};
>>> +
>>>  static int cpu_x86_find_by_name(X86CPU *cpu, x86_def_t *x86_cpu_def,
>>>                                  const char *name)
>>>  {
>>>      x86_def_t *def;
>>>      int i;
>>> +    Error *err = NULL;
>>>  
>>>      if (name == NULL) {
>>>          return -1;
>>>      }
>>>      if (kvm_enabled() && strcmp(name, "host") == 0) {
>>>          kvm_cpu_fill_host(x86_cpu_def);
>>> +        object_property_set_bool(OBJECT(cpu), true, "pmu-passthrough", &err);
>>> +        assert_no_error(err);
>>>          return 0;
>>>      }
>>>  
>>> @@ -2017,7 +2025,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>>>          break;
>>>      case 0xA:
>>>          /* Architectural Performance Monitoring Leaf */
>>> -        if (kvm_enabled()) {
>>> +        if (kvm_enabled() && cpu->pmu_passthrough) {
>>>              KVMState *s = cs->kvm_state;
>>>  
>>>              *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
>>> @@ -2516,6 +2524,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
>>>      xcc->parent_realize = dc->realize;
>>>      dc->realize = x86_cpu_realizefn;
>>>      dc->bus_type = TYPE_ICC_BUS;
>>> +    dc->props = cpu_x86_properties;
>>>  
>>>      xcc->parent_reset = cc->reset;
>>>      cc->reset = x86_cpu_reset;
>>>
>>
>
Eduardo Habkost July 23, 2013, 3:40 p.m. UTC | #6
On Tue, Jul 23, 2013 at 05:09:02PM +0200, Paolo Bonzini wrote:
> Il 23/07/2013 16:13, Eduardo Habkost ha scritto:
> > On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
> >> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
> >>> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
> >>> for CPUID leaf 0xA and passes them directly to the guest. This makes
> >>> the guest ABI depend on host kernel and host CPU capabilities, and
> >>> breaks live migration if we migrate between host with different
> >>> capabilities (e.g. different number of PMU counters).
> >>>
> >>> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
> >>> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
> >>
> >> Can we just call the property "pmu"?  It doesn't have to be passthough.
> > 
> > Yes, but the only options we have today are "no PMU" and "passthrough
> > PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
> > implicitly (I don't want things that break live-migration to be enabled
> > without making it explicit that it is a host-dependent/passthrough
> > mode).
> 
> I think "passthrough PMU" should be considered a bug except of course
> with "-cpu host".
> 
> If "-cpu Nehalem,pmu=on" goes from passthrough to Nehalem-compatible in
> a future QEMU release, that'll be a bugfix.

Exactly. But then I don't understand your suggestion. We still need a
property to enable pasthrough behavior on old machine-types (not
perfect, but a best-effort way to try to keep compatibility), and I
named that option "pmu-passthrough". How do you think we should name it?

> 
> Paolo
> 
> > I considered creating a property named "pmu" and use "pmu=host" to
> > enable the current passthrough behavior, but:
> > 
> >>
> >> Later we can support setting the right values for leaf 0xA.  This way
> >> migration will work even for -cpu other than "-cpu host", and the same
> >> option will work.
> > 
> > Yes. I just don't know what will be the best way to specify the PMU
> > counters in the command-line/properties when we do it, so I thought it
> > would be better to create a "pmu" property only after we decide how
> > exactly it will look like.
> > 
> >>
> >> Paolo
> >>
> >>> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> >>> ---
> >>>  include/hw/i386/pc.h  |  4 ++++
> >>>  target-i386/cpu-qom.h |  7 +++++++
> >>>  target-i386/cpu.c     | 11 ++++++++++-
> >>>  3 files changed, 21 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> >>> index 7fb97b0..3cea83f 100644
> >>> --- a/include/hw/i386/pc.h
> >>> +++ b/include/hw/i386/pc.h
> >>> @@ -235,6 +235,10 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
> >>>              .driver   = "virtio-net-pci",\
> >>>              .property = "any_layout",\
> >>>              .value    = "off",\
> >>> +        },{\
> >>> +            .driver = TYPE_X86_CPU,\
> >>> +            .property = "pmu-passthrough",\
> >>> +            .value = "on",\
> >>>          }
> >>>  
> >>>  #define PC_COMPAT_1_4 \
> >>> diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
> >>> index 7e55e5f..b505a45 100644
> >>> --- a/target-i386/cpu-qom.h
> >>> +++ b/target-i386/cpu-qom.h
> >>> @@ -68,6 +68,13 @@ typedef struct X86CPU {
> >>>  
> >>>      /* Features that were filtered out because of missing host capabilities */
> >>>      uint32_t filtered_features[FEATURE_WORDS];
> >>> +
> >>> +    /* Pass all PMU CPUID bits to the guest directly from GET_SUPPORTED_CPUID.
> >>> +     * This can't be enabled by default because it breaks live-migration,
> >>> +     * as it makes the guest ABI change depending on host CPU/kernel
> >>> +     * capabilities.
> >>> +     */
> >>> +    bool pmu_passthrough;
> >>>  } X86CPU;
> >>>  
> >>>  static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
> >>> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> >>> index 41c81af..e192f63 100644
> >>> --- a/target-i386/cpu.c
> >>> +++ b/target-i386/cpu.c
> >>> @@ -1475,17 +1475,25 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, void *opaque,
> >>>      error_propagate(errp, err);
> >>>  }
> >>>  
> >>> +static Property cpu_x86_properties[] = {
> >>> +    DEFINE_PROP_BOOL("pmu-passthrough", X86CPU, pmu_passthrough, false),
> >>> +    DEFINE_PROP_END_OF_LIST(),
> >>> +};
> >>> +
> >>>  static int cpu_x86_find_by_name(X86CPU *cpu, x86_def_t *x86_cpu_def,
> >>>                                  const char *name)
> >>>  {
> >>>      x86_def_t *def;
> >>>      int i;
> >>> +    Error *err = NULL;
> >>>  
> >>>      if (name == NULL) {
> >>>          return -1;
> >>>      }
> >>>      if (kvm_enabled() && strcmp(name, "host") == 0) {
> >>>          kvm_cpu_fill_host(x86_cpu_def);
> >>> +        object_property_set_bool(OBJECT(cpu), true, "pmu-passthrough", &err);
> >>> +        assert_no_error(err);
> >>>          return 0;
> >>>      }
> >>>  
> >>> @@ -2017,7 +2025,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >>>          break;
> >>>      case 0xA:
> >>>          /* Architectural Performance Monitoring Leaf */
> >>> -        if (kvm_enabled()) {
> >>> +        if (kvm_enabled() && cpu->pmu_passthrough) {
> >>>              KVMState *s = cs->kvm_state;
> >>>  
> >>>              *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
> >>> @@ -2516,6 +2524,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
> >>>      xcc->parent_realize = dc->realize;
> >>>      dc->realize = x86_cpu_realizefn;
> >>>      dc->bus_type = TYPE_ICC_BUS;
> >>> +    dc->props = cpu_x86_properties;
> >>>  
> >>>      xcc->parent_reset = cc->reset;
> >>>      cc->reset = x86_cpu_reset;
> >>>
> >>
> > 
>
Paolo Bonzini July 23, 2013, 4:23 p.m. UTC | #7
Il 23/07/2013 17:40, Eduardo Habkost ha scritto:
> On Tue, Jul 23, 2013 at 05:09:02PM +0200, Paolo Bonzini wrote:
>> Il 23/07/2013 16:13, Eduardo Habkost ha scritto:
>>> On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
>>>> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
>>>>> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
>>>>> for CPUID leaf 0xA and passes them directly to the guest. This makes
>>>>> the guest ABI depend on host kernel and host CPU capabilities, and
>>>>> breaks live migration if we migrate between host with different
>>>>> capabilities (e.g. different number of PMU counters).
>>>>>
>>>>> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
>>>>> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
>>>>
>>>> Can we just call the property "pmu"?  It doesn't have to be passthough.
>>>
>>> Yes, but the only options we have today are "no PMU" and "passthrough
>>> PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
>>> implicitly (I don't want things that break live-migration to be enabled
>>> without making it explicit that it is a host-dependent/passthrough
>>> mode).
>>
>> I think "passthrough PMU" should be considered a bug except of course
>> with "-cpu host".
>>
>> If "-cpu Nehalem,pmu=on" goes from passthrough to Nehalem-compatible in
>> a future QEMU release, that'll be a bugfix.
> 
> Exactly. But then I don't understand your suggestion. We still need a
> property to enable pasthrough behavior on old machine-types (not
> perfect, but a best-effort way to try to keep compatibility),

Do we?

We only need "pmu=on"---which right now is buggy on old machine types
because it will always passthrough.

Paolo

> and I
> named that option "pmu-passthrough". How do you think we should name it?
Eduardo Habkost July 23, 2013, 5:41 p.m. UTC | #8
On Tue, Jul 23, 2013 at 06:23:08PM +0200, Paolo Bonzini wrote:
> Il 23/07/2013 17:40, Eduardo Habkost ha scritto:
> > On Tue, Jul 23, 2013 at 05:09:02PM +0200, Paolo Bonzini wrote:
> >> Il 23/07/2013 16:13, Eduardo Habkost ha scritto:
> >>> On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
> >>>> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
> >>>>> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
> >>>>> for CPUID leaf 0xA and passes them directly to the guest. This makes
> >>>>> the guest ABI depend on host kernel and host CPU capabilities, and
> >>>>> breaks live migration if we migrate between host with different
> >>>>> capabilities (e.g. different number of PMU counters).
> >>>>>
> >>>>> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
> >>>>> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
> >>>>
> >>>> Can we just call the property "pmu"?  It doesn't have to be passthough.
> >>>
> >>> Yes, but the only options we have today are "no PMU" and "passthrough
> >>> PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
> >>> implicitly (I don't want things that break live-migration to be enabled
> >>> without making it explicit that it is a host-dependent/passthrough
> >>> mode).
> >>
> >> I think "passthrough PMU" should be considered a bug except of course
> >> with "-cpu host".
> >>
> >> If "-cpu Nehalem,pmu=on" goes from passthrough to Nehalem-compatible in
> >> a future QEMU release, that'll be a bugfix.
> > 
> > Exactly. But then I don't understand your suggestion. We still need a
> > property to enable pasthrough behavior on old machine-types (not
> > perfect, but a best-effort way to try to keep compatibility),
> 
> Do we?
> 
> We only need "pmu=on"---which right now is buggy on old machine types
> because it will always passthrough.

I am not sure I understand what you are arguing for.

You agree that pmu=on needs to keep the buggy passthrough behavior on
pc-1.5 and older, right?

In that case, how do you suggest I let QEMU know that only pc-1.5 and
older should have the buggy behavior enabled when pmu=on? I understand
that compat_props is the appropriate place for that, and that's why I
need a "please-enable-the-old-buggy-pmu-passthrough-behavior" property
that I can add to PC_COMPAT_1_5.

> 
> Paolo
> 
> > and I
> > named that option "pmu-passthrough". How do you think we should name it?
>
Paolo Bonzini July 23, 2013, 7:43 p.m. UTC | #9
Il 23/07/2013 19:41, Eduardo Habkost ha scritto:
> On Tue, Jul 23, 2013 at 06:23:08PM +0200, Paolo Bonzini wrote:
>> Il 23/07/2013 17:40, Eduardo Habkost ha scritto:
>>> On Tue, Jul 23, 2013 at 05:09:02PM +0200, Paolo Bonzini wrote:
>>>> Il 23/07/2013 16:13, Eduardo Habkost ha scritto:
>>>>> On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
>>>>>> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
>>>>>>> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
>>>>>>> for CPUID leaf 0xA and passes them directly to the guest. This makes
>>>>>>> the guest ABI depend on host kernel and host CPU capabilities, and
>>>>>>> breaks live migration if we migrate between host with different
>>>>>>> capabilities (e.g. different number of PMU counters).
>>>>>>>
>>>>>>> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
>>>>>>> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
>>>>>>
>>>>>> Can we just call the property "pmu"?  It doesn't have to be passthough.
>>>>>
>>>>> Yes, but the only options we have today are "no PMU" and "passthrough
>>>>> PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
>>>>> implicitly (I don't want things that break live-migration to be enabled
>>>>> without making it explicit that it is a host-dependent/passthrough
>>>>> mode).
>>>>
>>>> I think "passthrough PMU" should be considered a bug except of course
>>>> with "-cpu host".
>>>>
>>>> If "-cpu Nehalem,pmu=on" goes from passthrough to Nehalem-compatible in
>>>> a future QEMU release, that'll be a bugfix.
>>>
>>> Exactly. But then I don't understand your suggestion. We still need a
>>> property to enable pasthrough behavior on old machine-types (not
>>> perfect, but a best-effort way to try to keep compatibility),
>>
>> Do we?
>>
>> We only need "pmu=on"---which right now is buggy on old machine types
>> because it will always passthrough.
> 
> I am not sure I understand what you are arguing for.
> 
> You agree that pmu=on needs to keep the buggy passthrough behavior on
> pc-1.5 and older, right?

I agree it needs to remain enabled on 1.5.  But if, for example, 1.8
makes pmu=on emulate a Nehalem-compatible PMU, I think it is fine if
pc-1.5 moves from a host-compatible PMU to a Nehalem-compatible PMU.

The reason is that pc-1.5 has never guaranteed any feature of the
emulated PMU.

Paolo
Eduardo Habkost July 24, 2013, 1:15 p.m. UTC | #10
On Tue, Jul 23, 2013 at 09:43:06PM +0200, Paolo Bonzini wrote:
> Il 23/07/2013 19:41, Eduardo Habkost ha scritto:
> > On Tue, Jul 23, 2013 at 06:23:08PM +0200, Paolo Bonzini wrote:
> >> Il 23/07/2013 17:40, Eduardo Habkost ha scritto:
> >>> On Tue, Jul 23, 2013 at 05:09:02PM +0200, Paolo Bonzini wrote:
> >>>> Il 23/07/2013 16:13, Eduardo Habkost ha scritto:
> >>>>> On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
> >>>>>> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
> >>>>>>> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
> >>>>>>> for CPUID leaf 0xA and passes them directly to the guest. This makes
> >>>>>>> the guest ABI depend on host kernel and host CPU capabilities, and
> >>>>>>> breaks live migration if we migrate between host with different
> >>>>>>> capabilities (e.g. different number of PMU counters).
> >>>>>>>
> >>>>>>> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
> >>>>>>> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
> >>>>>>
> >>>>>> Can we just call the property "pmu"?  It doesn't have to be passthough.
> >>>>>
> >>>>> Yes, but the only options we have today are "no PMU" and "passthrough
> >>>>> PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
> >>>>> implicitly (I don't want things that break live-migration to be enabled
> >>>>> without making it explicit that it is a host-dependent/passthrough
> >>>>> mode).
> >>>>
> >>>> I think "passthrough PMU" should be considered a bug except of course
> >>>> with "-cpu host".
> >>>>
> >>>> If "-cpu Nehalem,pmu=on" goes from passthrough to Nehalem-compatible in
> >>>> a future QEMU release, that'll be a bugfix.
> >>>
> >>> Exactly. But then I don't understand your suggestion. We still need a
> >>> property to enable pasthrough behavior on old machine-types (not
> >>> perfect, but a best-effort way to try to keep compatibility),
> >>
> >> Do we?
> >>
> >> We only need "pmu=on"---which right now is buggy on old machine types
> >> because it will always passthrough.
> > 
> > I am not sure I understand what you are arguing for.
> > 
> > You agree that pmu=on needs to keep the buggy passthrough behavior on
> > pc-1.5 and older, right?
> 
> I agree it needs to remain enabled on 1.5.  But if, for example, 1.8
> makes pmu=on emulate a Nehalem-compatible PMU, I think it is fine if
> pc-1.5 moves from a host-compatible PMU to a Nehalem-compatible PMU.

That's where I disagree. Today users are (luckily) able to migrate
safely between hosts with the same number of PMU counters. But if we
make, e.g., "qemu-1.6 -machine pc-1.5 -cpu Westmere" present a smaller
number of PMU counters than "qemu-1.5 -machine pc-1.5 -cpu Westmere" on
the same host, we will break an existing setup where everything was
working before, which is something we could have easily avoided.

(Just to clarify what breaking this means in practice: changing the
number of PMU counters under the guest on live-migration means the guest
will crash when trying to use counters that suddenly went away, and it
may crash a very long time after it was migrated.)

> 
> The reason is that pc-1.5 has never guaranteed any feature of the
> emulated PMU.

Right, current behavior is buggy and we never guaranteed anything, but
IMO we shouldn't break on purpose something that is working today.
Paolo Bonzini July 24, 2013, 1:21 p.m. UTC | #11
Il 24/07/2013 15:15, Eduardo Habkost ha scritto:
> On Tue, Jul 23, 2013 at 09:43:06PM +0200, Paolo Bonzini wrote:
>> Il 23/07/2013 19:41, Eduardo Habkost ha scritto:
>>> On Tue, Jul 23, 2013 at 06:23:08PM +0200, Paolo Bonzini wrote:
>>>> Il 23/07/2013 17:40, Eduardo Habkost ha scritto:
>>>>> On Tue, Jul 23, 2013 at 05:09:02PM +0200, Paolo Bonzini wrote:
>>>>>> Il 23/07/2013 16:13, Eduardo Habkost ha scritto:
>>>>>>> On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
>>>>>>>> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
>>>>>>>>> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
>>>>>>>>> for CPUID leaf 0xA and passes them directly to the guest. This makes
>>>>>>>>> the guest ABI depend on host kernel and host CPU capabilities, and
>>>>>>>>> breaks live migration if we migrate between host with different
>>>>>>>>> capabilities (e.g. different number of PMU counters).
>>>>>>>>>
>>>>>>>>> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
>>>>>>>>> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
>>>>>>>>
>>>>>>>> Can we just call the property "pmu"?  It doesn't have to be passthough.
>>>>>>>
>>>>>>> Yes, but the only options we have today are "no PMU" and "passthrough
>>>>>>> PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
>>>>>>> implicitly (I don't want things that break live-migration to be enabled
>>>>>>> without making it explicit that it is a host-dependent/passthrough
>>>>>>> mode).
>>>>>>
>>>>>> I think "passthrough PMU" should be considered a bug except of course
>>>>>> with "-cpu host".
>>>>>>
>>>>>> If "-cpu Nehalem,pmu=on" goes from passthrough to Nehalem-compatible in
>>>>>> a future QEMU release, that'll be a bugfix.
>>>>>
>>>>> Exactly. But then I don't understand your suggestion. We still need a
>>>>> property to enable pasthrough behavior on old machine-types (not
>>>>> perfect, but a best-effort way to try to keep compatibility),
>>>>
>>>> Do we?
>>>>
>>>> We only need "pmu=on"---which right now is buggy on old machine types
>>>> because it will always passthrough.
>>>
>>> I am not sure I understand what you are arguing for.
>>>
>>> You agree that pmu=on needs to keep the buggy passthrough behavior on
>>> pc-1.5 and older, right?
>>
>> I agree it needs to remain enabled on 1.5.  But if, for example, 1.8
>> makes pmu=on emulate a Nehalem-compatible PMU, I think it is fine if
>> pc-1.5 moves from a host-compatible PMU to a Nehalem-compatible PMU.
> 
> That's where I disagree. Today users are (luckily) able to migrate
> safely between hosts with the same number of PMU counters. But if we
> make, e.g., "qemu-1.6 -machine pc-1.5 -cpu Westmere" present a smaller
> number of PMU counters than "qemu-1.5 -machine pc-1.5 -cpu Westmere" on
> the same host, we will break an existing setup where everything was
> working before, which is something we could have easily avoided.

But at the same time we will fix live migration from a Sandy Bridge host
to a Westmere.  So it's a choice we have to make anyway.

> (Just to clarify what breaking this means in practice: changing the
> number of PMU counters under the guest on live-migration means the guest
> will crash when trying to use counters that suddenly went away, and it
> may crash a very long time after it was migrated.)

And at the same time we fix live migration of a Sandy Bridge to a Westmere.

>> The reason is that pc-1.5 has never guaranteed any feature of the
>> emulated PMU.
> 
> Right, current behavior is buggy and we never guaranteed anything, but
> IMO we shouldn't break on purpose something that is working today.

Even if it is to fix something else?

Paolo
Eduardo Habkost July 24, 2013, 1:44 p.m. UTC | #12
On Wed, Jul 24, 2013 at 03:21:48PM +0200, Paolo Bonzini wrote:
> Il 24/07/2013 15:15, Eduardo Habkost ha scritto:
> > On Tue, Jul 23, 2013 at 09:43:06PM +0200, Paolo Bonzini wrote:
> >> Il 23/07/2013 19:41, Eduardo Habkost ha scritto:
> >>> On Tue, Jul 23, 2013 at 06:23:08PM +0200, Paolo Bonzini wrote:
> >>>> Il 23/07/2013 17:40, Eduardo Habkost ha scritto:
> >>>>> On Tue, Jul 23, 2013 at 05:09:02PM +0200, Paolo Bonzini wrote:
> >>>>>> Il 23/07/2013 16:13, Eduardo Habkost ha scritto:
> >>>>>>> On Tue, Jul 23, 2013 at 11:18:03AM +0200, Paolo Bonzini wrote:
> >>>>>>>> Il 22/07/2013 21:25, Eduardo Habkost ha scritto:
> >>>>>>>>> Bug description: QEMU currently gets all bits from GET_SUPPORTED_CPUID
> >>>>>>>>> for CPUID leaf 0xA and passes them directly to the guest. This makes
> >>>>>>>>> the guest ABI depend on host kernel and host CPU capabilities, and
> >>>>>>>>> breaks live migration if we migrate between host with different
> >>>>>>>>> capabilities (e.g. different number of PMU counters).
> >>>>>>>>>
> >>>>>>>>> This patch adds a "pmu-passthrough" property to X86CPU, and set it to
> >>>>>>>>> true only on "-cpu host", or on pc-*-1.5 and older machine-types.
> >>>>>>>>
> >>>>>>>> Can we just call the property "pmu"?  It doesn't have to be passthough.
> >>>>>>>
> >>>>>>> Yes, but the only options we have today are "no PMU" and "passthrough
> >>>>>>> PMU". I wouldn't like to make "pmu=on" enable the passthrough behavior
> >>>>>>> implicitly (I don't want things that break live-migration to be enabled
> >>>>>>> without making it explicit that it is a host-dependent/passthrough
> >>>>>>> mode).
> >>>>>>
> >>>>>> I think "passthrough PMU" should be considered a bug except of course
> >>>>>> with "-cpu host".
> >>>>>>
> >>>>>> If "-cpu Nehalem,pmu=on" goes from passthrough to Nehalem-compatible in
> >>>>>> a future QEMU release, that'll be a bugfix.
> >>>>>
> >>>>> Exactly. But then I don't understand your suggestion. We still need a
> >>>>> property to enable pasthrough behavior on old machine-types (not
> >>>>> perfect, but a best-effort way to try to keep compatibility),
> >>>>
> >>>> Do we?
> >>>>
> >>>> We only need "pmu=on"---which right now is buggy on old machine types
> >>>> because it will always passthrough.
> >>>
> >>> I am not sure I understand what you are arguing for.
> >>>
> >>> You agree that pmu=on needs to keep the buggy passthrough behavior on
> >>> pc-1.5 and older, right?
> >>
> >> I agree it needs to remain enabled on 1.5.  But if, for example, 1.8
> >> makes pmu=on emulate a Nehalem-compatible PMU, I think it is fine if
> >> pc-1.5 moves from a host-compatible PMU to a Nehalem-compatible PMU.
> > 
> > That's where I disagree. Today users are (luckily) able to migrate
> > safely between hosts with the same number of PMU counters. But if we
> > make, e.g., "qemu-1.6 -machine pc-1.5 -cpu Westmere" present a smaller
> > number of PMU counters than "qemu-1.5 -machine pc-1.5 -cpu Westmere" on
> > the same host, we will break an existing setup where everything was
> > working before, which is something we could have easily avoided.
> 
> But at the same time we will fix live migration from a Sandy Bridge host
> to a Westmere.  So it's a choice we have to make anyway.

True.

> 
> > (Just to clarify what breaking this means in practice: changing the
> > number of PMU counters under the guest on live-migration means the guest
> > will crash when trying to use counters that suddenly went away, and it
> > may crash a very long time after it was migrated.)
> 
> And at the same time we fix live migration of a Sandy Bridge to a Westmere.

Something that never worked in the first place. Breaking what is working
today, on the other hand, is a regression.

If users are interested in a fix for the new SandyBrige->Westmere
use-case, we can always say "please upgrade your VM to a newer
machine-type".

> 
> >> The reason is that pc-1.5 has never guaranteed any feature of the
> >> emulated PMU.
> > 
> > Right, current behavior is buggy and we never guaranteed anything, but
> > IMO we shouldn't break on purpose something that is working today.
> 
> Even if it is to fix something else?

I believe so, because machine-types allow us to have both: we can fix
the new use-cases in new machine-types while keeping existing working
setups without regressions on the older machine-types.
diff mbox

Patch

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 7fb97b0..3cea83f 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -235,6 +235,10 @@  int e820_add_entry(uint64_t, uint64_t, uint32_t);
             .driver   = "virtio-net-pci",\
             .property = "any_layout",\
             .value    = "off",\
+        },{\
+            .driver = TYPE_X86_CPU,\
+            .property = "pmu-passthrough",\
+            .value = "on",\
         }
 
 #define PC_COMPAT_1_4 \
diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index 7e55e5f..b505a45 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -68,6 +68,13 @@  typedef struct X86CPU {
 
     /* Features that were filtered out because of missing host capabilities */
     uint32_t filtered_features[FEATURE_WORDS];
+
+    /* Pass all PMU CPUID bits to the guest directly from GET_SUPPORTED_CPUID.
+     * This can't be enabled by default because it breaks live-migration,
+     * as it makes the guest ABI change depending on host CPU/kernel
+     * capabilities.
+     */
+    bool pmu_passthrough;
 } X86CPU;
 
 static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 41c81af..e192f63 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1475,17 +1475,25 @@  static void x86_cpu_get_feature_words(Object *obj, Visitor *v, void *opaque,
     error_propagate(errp, err);
 }
 
+static Property cpu_x86_properties[] = {
+    DEFINE_PROP_BOOL("pmu-passthrough", X86CPU, pmu_passthrough, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static int cpu_x86_find_by_name(X86CPU *cpu, x86_def_t *x86_cpu_def,
                                 const char *name)
 {
     x86_def_t *def;
     int i;
+    Error *err = NULL;
 
     if (name == NULL) {
         return -1;
     }
     if (kvm_enabled() && strcmp(name, "host") == 0) {
         kvm_cpu_fill_host(x86_cpu_def);
+        object_property_set_bool(OBJECT(cpu), true, "pmu-passthrough", &err);
+        assert_no_error(err);
         return 0;
     }
 
@@ -2017,7 +2025,7 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0xA:
         /* Architectural Performance Monitoring Leaf */
-        if (kvm_enabled()) {
+        if (kvm_enabled() && cpu->pmu_passthrough) {
             KVMState *s = cs->kvm_state;
 
             *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
@@ -2516,6 +2524,7 @@  static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
     xcc->parent_realize = dc->realize;
     dc->realize = x86_cpu_realizefn;
     dc->bus_type = TYPE_ICC_BUS;
+    dc->props = cpu_x86_properties;
 
     xcc->parent_reset = cc->reset;
     cc->reset = x86_cpu_reset;