Patchwork [2/2] qemu-kvm: Add svm cpuid features

login
register
mail settings
Submitter Joerg Roedel
Date Sept. 10, 2010, 3:38 p.m.
Message ID <1284133120-19453-3-git-send-email-joerg.roedel@amd.com>
Download mbox | patch
Permalink /patch/64407/
State New
Headers show

Comments

Joerg Roedel - Sept. 10, 2010, 3:38 p.m.
This patch adds the svm cpuid feature flags to the qemu
intialization path.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 target-i386/cpu.h   |   12 +++++++
 target-i386/cpuid.c |   80 ++++++++++++++++++++++++++++++++++++++++-----------
 target-i386/kvm.c   |    3 ++
 3 files changed, 78 insertions(+), 17 deletions(-)
Alexander Graf - Sept. 11, 2010, 1:43 p.m.
On 10.09.2010, at 17:38, Joerg Roedel wrote:

> This patch adds the svm cpuid feature flags to the qemu
> intialization path.
> 
> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
> ---
> target-i386/cpu.h   |   12 +++++++
> target-i386/cpuid.c |   80 ++++++++++++++++++++++++++++++++++++++++-----------
> target-i386/kvm.c   |    3 ++
> 3 files changed, 78 insertions(+), 17 deletions(-)
> 
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 1144d4e..77eeab1 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -405,6 +405,17 @@
> #define CPUID_EXT3_IBS     (1 << 10)
> #define CPUID_EXT3_SKINIT  (1 << 12)
> 
> +#define CPUID_SVM_NPT          (1 << 0)
> +#define CPUID_SVM_LBRV         (1 << 1)
> +#define CPUID_SVM_SVMLOCK      (1 << 2)
> +#define CPUID_SVM_NRIPSAVE     (1 << 3)
> +#define CPUID_SVM_TSCSCALE     (1 << 4)
> +#define CPUID_SVM_VMCBCLEAN    (1 << 5)
> +#define CPUID_SVM_FLUSHASID    (1 << 6)
> +#define CPUID_SVM_DECODEASSIST (1 << 7)
> +#define CPUID_SVM_PAUSEFILTER  (1 << 10)
> +#define CPUID_SVM_PFTHRESHOLD  (1 << 12)
> +
> #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
> #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
> #define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
> @@ -702,6 +713,7 @@ typedef struct CPUX86State {
>     uint8_t has_error_code;
>     uint32_t sipi_vector;
>     uint32_t cpuid_kvm_features;
> +    uint32_t cpuid_svm_features;
> 
>     /* in order to simplify APIC support, we leave this pointer to the
>        user */
> diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
> index 3fcf78f..ea1ac73 100644
> --- a/target-i386/cpuid.c
> +++ b/target-i386/cpuid.c
> @@ -79,6 +79,17 @@ static const char *kvm_feature_name[] = {
>     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
> };
> 
> +static const char *svm_feature_name[] = {
> +    "npt", "lbrv", "svm_lock", "nrip_save",
> +    "tsc_scale", "vmcb_clean",  "flushbyasid", "decodeassists",
> +    NULL, NULL, "pause_filter", NULL,
> +    "pfthreshold", NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +};
> +
> /* collects per-function cpuid data
>  */
> typedef struct model_features_t {
> @@ -192,13 +203,15 @@ static void add_flagname_to_bitmaps(const char *flagname, uint32_t *features,
>                                     uint32_t *ext_features,
>                                     uint32_t *ext2_features,
>                                     uint32_t *ext3_features,
> -                                    uint32_t *kvm_features)
> +                                    uint32_t *kvm_features,
> +                                    uint32_t *svm_features)
> {
>     if (!lookup_feature(features, flagname, NULL, feature_name) &&
>         !lookup_feature(ext_features, flagname, NULL, ext_feature_name) &&
>         !lookup_feature(ext2_features, flagname, NULL, ext2_feature_name) &&
>         !lookup_feature(ext3_features, flagname, NULL, ext3_feature_name) &&
> -        !lookup_feature(kvm_features, flagname, NULL, kvm_feature_name))
> +        !lookup_feature(kvm_features, flagname, NULL, kvm_feature_name) &&
> +        !lookup_feature(svm_features, flagname, NULL, svm_feature_name))
>             fprintf(stderr, "CPU feature %s not found\n", flagname);
> }
> 
> @@ -210,7 +223,8 @@ typedef struct x86_def_t {
>     int family;
>     int model;
>     int stepping;
> -    uint32_t features, ext_features, ext2_features, ext3_features, kvm_features;
> +    uint32_t features, ext_features, ext2_features, ext3_features;
> +    uint32_t kvm_features, svm_features;
>     uint32_t xlevel;
>     char model_id[48];
>     int vendor_override;
> @@ -253,6 +267,7 @@ typedef struct x86_def_t {
>           CPUID_EXT2_PDPE1GB */
> #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
>           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
> +#define TCG_SVM_FEATURES 0
> 
> /* maintains list of cpu model definitions
>  */
> @@ -278,6 +293,8 @@ static x86_def_t builtin_x86_defs[] = {
>             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
>         .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
>             CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
> +        .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
> +            CPUID_SVM_VMCBCLEAN,
>         .xlevel = 0x8000000A,
>         .model_id = "QEMU Virtual CPU version " QEMU_VERSION,
>     },
> @@ -305,6 +322,8 @@ static x86_def_t builtin_x86_defs[] = {
>                     CPUID_EXT3_OSVW, CPUID_EXT3_IBS */
>         .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
>             CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
> +        .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
> +            CPUID_SVM_VMCBCLEAN,

Does that phenom already do all those? It does NPT, but I'm not sure about NRIPSAVE for example.

>         .xlevel = 0x8000001A,
>         .model_id = "AMD Phenom(tm) 9550 Quad-Core Processor"
>     },
> @@ -505,6 +524,15 @@ static int cpu_x86_fill_host(x86_def_t *x86_cpu_def)
>     cpu_x86_fill_model_id(x86_cpu_def->model_id);
>     x86_cpu_def->vendor_override = 0;
> 
> +
> +    /*
> +     * Every SVM feature requires emulation support in KVM - so we can't just
> +     * read the host features here. KVM might even support SVM features not
> +     * available on the host hardware
> +     */
> +    x86_cpu_def->svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV |
> +                                CPUID_SVM_NRIPSAVE | CPUID_SVM_VMCBCLEAN;

Hrm. Wouldn't it make more sense to declare this to -1? This will still go through the kernel space matcher which tells us which features are available anyways, right?


> +
>     return 0;
> }
> 
> @@ -560,8 +588,14 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
> 
>     char *s = strdup(cpu_model);
>     char *featurestr, *name = strtok(s, ",");
> -    uint32_t plus_features = 0, plus_ext_features = 0, plus_ext2_features = 0, plus_ext3_features = 0, plus_kvm_features = 0;
> -    uint32_t minus_features = 0, minus_ext_features = 0, minus_ext2_features = 0, minus_ext3_features = 0, minus_kvm_features = 0;
> +    /* Features to be added*/
> +    uint32_t plus_features = 0, plus_ext_features = 0;
> +    uint32_t plus_ext2_features = 0, plus_ext3_features = 0;
> +    uint32_t plus_kvm_features = 0, plus_svm_features = 0;
> +    /* Features to be removed */
> +    uint32_t minus_features = 0, minus_ext_features = 0;
> +    uint32_t minus_ext2_features = 0, minus_ext3_features = 0;
> +    uint32_t minus_kvm_features = 0, minus_svm_features = 0;
>     uint32_t numvalue;
> 
>     for (def = x86_defs; def; def = def->next)
> @@ -579,16 +613,22 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
> 
>     add_flagname_to_bitmaps("hypervisor", &plus_features,
>         &plus_ext_features, &plus_ext2_features, &plus_ext3_features,
> -        &plus_kvm_features);
> +        &plus_kvm_features, &plus_svm_features);
> 
>     featurestr = strtok(NULL, ",");
> 
>     while (featurestr) {
>         char *val;
>         if (featurestr[0] == '+') {
> -            add_flagname_to_bitmaps(featurestr + 1, &plus_features, &plus_ext_features, &plus_ext2_features, &plus_ext3_features, &plus_kvm_features);
> +            add_flagname_to_bitmaps(featurestr + 1, &plus_features,
> +                            &plus_ext_features, &plus_ext2_features,
> +                            &plus_ext3_features, &plus_kvm_features,
> +                            &plus_svm_features);
>         } else if (featurestr[0] == '-') {
> -            add_flagname_to_bitmaps(featurestr + 1, &minus_features, &minus_ext_features, &minus_ext2_features, &minus_ext3_features, &minus_kvm_features);
> +            add_flagname_to_bitmaps(featurestr + 1, &minus_features,
> +                            &minus_ext_features, &minus_ext2_features,
> +                            &minus_ext3_features, &minus_kvm_features,
> +                            &minus_svm_features);
>         } else if ((val = strchr(featurestr, '='))) {
>             *val = 0; val++;
>             if (!strcmp(featurestr, "family")) {
> @@ -670,11 +710,13 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
>     x86_cpu_def->ext2_features |= plus_ext2_features;
>     x86_cpu_def->ext3_features |= plus_ext3_features;
>     x86_cpu_def->kvm_features |= plus_kvm_features;
> +    x86_cpu_def->svm_features |= plus_svm_features;
>     x86_cpu_def->features &= ~minus_features;
>     x86_cpu_def->ext_features &= ~minus_ext_features;
>     x86_cpu_def->ext2_features &= ~minus_ext2_features;
>     x86_cpu_def->ext3_features &= ~minus_ext3_features;
>     x86_cpu_def->kvm_features &= ~minus_kvm_features;
> +    x86_cpu_def->svm_features &= ~minus_svm_features;
>     if (check_cpuid) {
>         if (check_features_against_host(x86_cpu_def) && enforce_cpuid)
>             goto error;
> @@ -816,6 +858,7 @@ int cpu_x86_register (CPUX86State *env, const char *cpu_model)
>     env->cpuid_ext3_features = def->ext3_features;
>     env->cpuid_xlevel = def->xlevel;
>     env->cpuid_kvm_features = def->kvm_features;
> +    env->cpuid_svm_features = def->svm_features;
>     if (!kvm_enabled()) {
>         env->cpuid_features &= TCG_FEATURES;
>         env->cpuid_ext_features &= TCG_EXT_FEATURES;
> @@ -825,6 +868,7 @@ int cpu_x86_register (CPUX86State *env, const char *cpu_model)
> #endif
>             );
>         env->cpuid_ext3_features &= TCG_EXT3_FEATURES;
> +        env->cpuid_svm_features &= TCG_SVM_FEATURES;
>     }
>     {
>         const char *model_id = def->model_id;
> @@ -1135,11 +1179,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>                 *ecx |= 1 << 1;    /* CmpLegacy bit */
>             }
>         }
> -
> -        if (kvm_enabled()) {
> -            /* Nested SVM not yet supported in upstream QEMU */
> -            *ecx &= ~CPUID_EXT3_SVM;
> -        }

Have you made sure that the default cpu type doesn't enable the SVM bit? I couldn't find any trace of an override to "kvm64" as default type when KVM is used.

The rest looks good :). Thanks a lot for this patch set!


Alex
Joerg Roedel - Sept. 11, 2010, 2:20 p.m.
On Sat, Sep 11, 2010 at 03:43:02PM +0200, Alexander Graf wrote:
> > @@ -305,6 +322,8 @@ static x86_def_t builtin_x86_defs[] = {
> >                     CPUID_EXT3_OSVW, CPUID_EXT3_IBS */
> >         .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
> >             CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
> > +        .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
> > +            CPUID_SVM_VMCBCLEAN,
> 
> Does that phenom already do all those? It does NPT, but I'm not sure
> about NRIPSAVE for example.

Depends on which Phenom you have. A Phenom II has NRIPSAVE but the old
Phenoms don't have it. For the SVM features it is not that important
what the host hardware supports but what KVM can emulate. VMCBCLEAN can
be emulated without supporting it in the host for example.

> > +    /*
> > +     * Every SVM feature requires emulation support in KVM - so we can't just
> > +     * read the host features here. KVM might even support SVM features not
> > +     * available on the host hardware
> > +     */
> > +    x86_cpu_def->svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV |
> > +                                CPUID_SVM_NRIPSAVE | CPUID_SVM_VMCBCLEAN;
> 
> Hrm. Wouldn't it make more sense to declare this to -1? This will
> still go through the kernel space matcher which tells us which
> features are available anyways, right?

Yeah, that would make sense. I thought about it while porting the
patches but could not actually made me do it because I am not entirely
sure that this is a good idea. But I may revisit that, especially after
your question :-)

> > -
> > -        if (kvm_enabled()) {
> > -            /* Nested SVM not yet supported in upstream QEMU */
> > -            *ecx &= ~CPUID_EXT3_SVM;
> > -        }
> 
> Have you made sure that the default cpu type doesn't enable the SVM
> bit? I couldn't find any trace of an override to "kvm64" as default
> type when KVM is used.

No, the default CPU type has SVM still enabled by default. I thought
about removing the SVM flag from the qemu64 cpu definition but that
breaks on TCG where SVM is emulated too.
What I implemented in this patch is to enable SVM by default and mask it
out if KVM does not support it on the given machine. Problem here is
that KVM is currently buggy because it always reports support for SVM,
even on Intel machines. I fixed that with patch 29 of my npt-virt
patch-set. The patch will hopefully make it into the various stable
trees and then we have a clean solution.

> The rest looks good :). Thanks a lot for this patch set!

Great, thanks :-)

	Joerg
Alexander Graf - Sept. 11, 2010, 2:29 p.m.
On 11.09.2010, at 16:20, Joerg Roedel wrote:

> On Sat, Sep 11, 2010 at 03:43:02PM +0200, Alexander Graf wrote:
>>> @@ -305,6 +322,8 @@ static x86_def_t builtin_x86_defs[] = {
>>>                    CPUID_EXT3_OSVW, CPUID_EXT3_IBS */
>>>        .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
>>>            CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
>>> +        .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
>>> +            CPUID_SVM_VMCBCLEAN,
>> 
>> Does that phenom already do all those? It does NPT, but I'm not sure
>> about NRIPSAVE for example.
> 
> Depends on which Phenom you have. A Phenom II has NRIPSAVE but the old
> Phenoms don't have it. For the SVM features it is not that important
> what the host hardware supports but what KVM can emulate. VMCBCLEAN can
> be emulated without supporting it in the host for example.

That particular one was my workstation - a Phenom 9550 which is one of the early 4-core ones.

> 
>>> +    /*
>>> +     * Every SVM feature requires emulation support in KVM - so we can't just
>>> +     * read the host features here. KVM might even support SVM features not
>>> +     * available on the host hardware
>>> +     */
>>> +    x86_cpu_def->svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV |
>>> +                                CPUID_SVM_NRIPSAVE | CPUID_SVM_VMCBCLEAN;
>> 
>> Hrm. Wouldn't it make more sense to declare this to -1? This will
>> still go through the kernel space matcher which tells us which
>> features are available anyways, right?
> 
> Yeah, that would make sense. I thought about it while porting the
> patches but could not actually made me do it because I am not entirely
> sure that this is a good idea. But I may revisit that, especially after
> your question :-)
> 
>>> -
>>> -        if (kvm_enabled()) {
>>> -            /* Nested SVM not yet supported in upstream QEMU */
>>> -            *ecx &= ~CPUID_EXT3_SVM;
>>> -        }
>> 
>> Have you made sure that the default cpu type doesn't enable the SVM
>> bit? I couldn't find any trace of an override to "kvm64" as default
>> type when KVM is used.
> 
> No, the default CPU type has SVM still enabled by default. I thought
> about removing the SVM flag from the qemu64 cpu definition but that
> breaks on TCG where SVM is emulated too.
> What I implemented in this patch is to enable SVM by default and mask it
> out if KVM does not support it on the given machine. Problem here is
> that KVM is currently buggy because it always reports support for SVM,
> even on Intel machines. I fixed that with patch 29 of my npt-virt
> patch-set. The patch will hopefully make it into the various stable
> trees and then we have a clean solution.

It still won't be clean as it breaks cross vendor migration :(. The real fix would be to set the default machine to "kvm64" instead of "qemu64" in pc.c when kvm_enabled().


Alex
Joerg Roedel - Sept. 11, 2010, 2:36 p.m.
On Sat, Sep 11, 2010 at 04:29:18PM +0200, Alexander Graf wrote:
> > Depends on which Phenom you have. A Phenom II has NRIPSAVE but the old
> > Phenoms don't have it. For the SVM features it is not that important
> > what the host hardware supports but what KVM can emulate. VMCBCLEAN can
> > be emulated without supporting it in the host for example.
> 
> That particular one was my workstation - a Phenom 9550 which is one of
> the early 4-core ones.

Yes, the 9550 don't have the nripsave feature.

> > No, the default CPU type has SVM still enabled by default. I thought
> > about removing the SVM flag from the qemu64 cpu definition but that
> > breaks on TCG where SVM is emulated too.
> > What I implemented in this patch is to enable SVM by default and mask it
> > out if KVM does not support it on the given machine. Problem here is
> > that KVM is currently buggy because it always reports support for SVM,
> > even on Intel machines. I fixed that with patch 29 of my npt-virt
> > patch-set. The patch will hopefully make it into the various stable
> > trees and then we have a clean solution.
> 
> It still won't be clean as it breaks cross vendor migration :(. The
> real fix would be to set the default machine to "kvm64" instead of
> "qemu64" in pc.c when kvm_enabled().

I am not sure that I am the right person to do such an invasive change.
At least not in this patch-set. I could think of removing SVM from the
qemu64 definition and add it again in the TCG specific path.

	Joerg
Alexander Graf - Sept. 11, 2010, 2:38 p.m.
On 11.09.2010, at 16:36, Joerg Roedel wrote:

> On Sat, Sep 11, 2010 at 04:29:18PM +0200, Alexander Graf wrote:
>>> Depends on which Phenom you have. A Phenom II has NRIPSAVE but the old
>>> Phenoms don't have it. For the SVM features it is not that important
>>> what the host hardware supports but what KVM can emulate. VMCBCLEAN can
>>> be emulated without supporting it in the host for example.
>> 
>> That particular one was my workstation - a Phenom 9550 which is one of
>> the early 4-core ones.
> 
> Yes, the 9550 don't have the nripsave feature.
> 
>>> No, the default CPU type has SVM still enabled by default. I thought
>>> about removing the SVM flag from the qemu64 cpu definition but that
>>> breaks on TCG where SVM is emulated too.
>>> What I implemented in this patch is to enable SVM by default and mask it
>>> out if KVM does not support it on the given machine. Problem here is
>>> that KVM is currently buggy because it always reports support for SVM,
>>> even on Intel machines. I fixed that with patch 29 of my npt-virt
>>> patch-set. The patch will hopefully make it into the various stable
>>> trees and then we have a clean solution.
>> 
>> It still won't be clean as it breaks cross vendor migration :(. The
>> real fix would be to set the default machine to "kvm64" instead of
>> "qemu64" in pc.c when kvm_enabled().
> 
> I am not sure that I am the right person to do such an invasive change.
> At least not in this patch-set. I could think of removing SVM from the
> qemu64 definition and add it again in the TCG specific path.

It's not an invasive change and IMHO the only correct one. I'm not even sure why it's not done yet - after all the reason for the kvm* cpu types is exactly that. Please just add it as an early patch in your series.

Alex
Joerg Roedel - Sept. 11, 2010, 2:42 p.m.
On Sat, Sep 11, 2010 at 04:38:51PM +0200, Alexander Graf wrote:
> > I am not sure that I am the right person to do such an invasive change.
> > At least not in this patch-set. I could think of removing SVM from the
> > qemu64 definition and add it again in the TCG specific path.
> 
> It's not an invasive change and IMHO the only correct one. I'm not
> even sure why it's not done yet - after all the reason for the kvm*
> cpu types is exactly that. Please just add it as an early patch in
> your series.

Okay, if you say its ok I will change it. But if anyone comes to me with
regressions I will send them straight to you :-P

	Joerg
Alexander Graf - Sept. 11, 2010, 2:45 p.m.
On 11.09.2010, at 16:42, Joerg Roedel wrote:

> On Sat, Sep 11, 2010 at 04:38:51PM +0200, Alexander Graf wrote:
>>> I am not sure that I am the right person to do such an invasive change.
>>> At least not in this patch-set. I could think of removing SVM from the
>>> qemu64 definition and add it again in the TCG specific path.
>> 
>> It's not an invasive change and IMHO the only correct one. I'm not
>> even sure why it's not done yet - after all the reason for the kvm*
>> cpu types is exactly that. Please just add it as an early patch in
>> your series.
> 
> Okay, if you say its ok I will change it. But if anyone comes to me with
> regressions I will send them straight to you :-P

Feel free to do so :). We really need to have a different default CPU for "migration safe KVM" and "This is what TCG can emulate". They don't match semantically.


Alex
Avi Kivity - Sept. 12, 2010, 6:05 a.m.
On 09/11/2010 05:20 PM, Joerg Roedel wrote:
> On Sat, Sep 11, 2010 at 03:43:02PM +0200, Alexander Graf wrote:
>>> @@ -305,6 +322,8 @@ static x86_def_t builtin_x86_defs[] = {
>>>                      CPUID_EXT3_OSVW, CPUID_EXT3_IBS */
>>>          .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
>>>              CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
>>> +        .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
>>> +            CPUID_SVM_VMCBCLEAN,
>> Does that phenom already do all those? It does NPT, but I'm not sure
>> about NRIPSAVE for example.
> Depends on which Phenom you have. A Phenom II has NRIPSAVE but the old
> Phenoms don't have it. For the SVM features it is not that important
> what the host hardware supports but what KVM can emulate. VMCBCLEAN can
> be emulated without supporting it in the host for example.

Well, let's have a phenom2 type for those new features (and any other 
features the phenom 2 has).  What's the point of using the name of 
existing hardware if it doesn't match that hardware?
Alexander Graf - Sept. 12, 2010, 7:16 a.m.
On 12.09.2010, at 08:05, Avi Kivity wrote:

> On 09/11/2010 05:20 PM, Joerg Roedel wrote:
>> On Sat, Sep 11, 2010 at 03:43:02PM +0200, Alexander Graf wrote:
>>>> @@ -305,6 +322,8 @@ static x86_def_t builtin_x86_defs[] = {
>>>>                     CPUID_EXT3_OSVW, CPUID_EXT3_IBS */
>>>>         .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
>>>>             CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
>>>> +        .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
>>>> +            CPUID_SVM_VMCBCLEAN,
>>> Does that phenom already do all those? It does NPT, but I'm not sure
>>> about NRIPSAVE for example.
>> Depends on which Phenom you have. A Phenom II has NRIPSAVE but the old
>> Phenoms don't have it. For the SVM features it is not that important
>> what the host hardware supports but what KVM can emulate. VMCBCLEAN can
>> be emulated without supporting it in the host for example.
> 
> Well, let's have a phenom2 type for those new features (and any other features the phenom 2 has).  What's the point of using the name of existing hardware if it doesn't match that hardware?

Isn't that what cpu=host is there for? I don't want to see cpu type cluttering like we have it on ppc. I added the core2duo type for Mac OS X guests for which those are basically the oldest supported CPUs.

For the Phenom type, I honestly don't remember why, but there was also a good reason to add it. In fact, I use it today to have nested virt without -cpu host on hardware that's too new for my guests.

Either way, I don't think we need a phenom2 type. The features additional are minor enough to not really matter and all use cases I can come up with require either -cpu host (local virt) or -cpu phenom (migration).


Alex
Avi Kivity - Sept. 12, 2010, 8:01 a.m.
On 09/12/2010 09:16 AM, Alexander Graf wrote:
>>>
>>> Depends on which Phenom you have. A Phenom II has NRIPSAVE but the old
>>> Phenoms don't have it. For the SVM features it is not that important
>>> what the host hardware supports but what KVM can emulate. VMCBCLEAN can
>>> be emulated without supporting it in the host for example.
>> Well, let's have a phenom2 type for those new features (and any other features the phenom 2 has).  What's the point of using the name of existing hardware if it doesn't match that hardware?
> Isn't that what cpu=host is there for? I don't want to see cpu type cluttering like we have it on ppc. I added the core2duo type for Mac OS X guests for which those are basically the oldest supported CPUs.

-cpu host is to all supported features into your guest.
-cpu phenom is to pretend you are running on a phenom cpu.  This is 
useful for a migration farm for which the greatest common denominator is 
a phenom.

Those are separate use cases.

> For the Phenom type, I honestly don't remember why, but there was also a good reason to add it. In fact, I use it today to have nested virt without -cpu host on hardware that's too new for my guests.

Curious, what guests balk at modern hardware but are fine with phenom?

> Either way, I don't think we need a phenom2 type. The features additional are minor enough to not really matter and all use cases I can come up with require either -cpu host (local virt) or -cpu phenom (migration).

I'm fine with this (or with adding phenom2).  But don't make phenom 
contain flags that real phenoms don't have.
Alexander Graf - Sept. 12, 2010, 10:06 a.m.
Am 12.09.2010 um 10:01 schrieb Avi Kivity <avi@redhat.com>:

> On 09/12/2010 09:16 AM, Alexander Graf wrote:
>>>> 
>>>> Depends on which Phenom you have. A Phenom II has NRIPSAVE but the old
>>>> Phenoms don't have it. For the SVM features it is not that important
>>>> what the host hardware supports but what KVM can emulate. VMCBCLEAN can
>>>> be emulated without supporting it in the host for example.
>>> Well, let's have a phenom2 type for those new features (and any other features the phenom 2 has).  What's the point of using the name of existing hardware if it doesn't match that hardware?
>> Isn't that what cpu=host is there for? I don't want to see cpu type cluttering like we have it on ppc. I added the core2duo type for Mac OS X guests for which those are basically the oldest supported CPUs.
> 
> -cpu host is to all supported features into your guest.
> -cpu phenom is to pretend you are running on a phenom cpu.  This is useful for a migration farm for which the greatest common denominator is a phenom.
> 
> Those are separate use cases.

Exactly. And the benefit from phenom -> phenom2 is so minor that it doesn't make sense.

> 
>> For the Phenom type, I honestly don't remember why, but there was also a good reason to add it. In fact, I use it today to have nested virt without -cpu host on hardware that's too new for my guests.
> 
> Curious, what guests balk at modern hardware but are fine with phenom?

Sles11 GA ;).

> 
>> Either way, I don't think we need a phenom2 type. The features additional are minor enough to not really matter and all use cases I can come up with require either -cpu host (local virt) or -cpu phenom (migration).
> 
> I'm fine with this (or with adding phenom2).  But don't make phenom contain flags that real phenoms don't have.

Those were my words :).


Alex
>
Avi Kivity - Sept. 12, 2010, 10:22 a.m.
On 09/12/2010 12:06 PM, Alexander Graf wrote:
>>> For the Phenom type, I honestly don't remember why, but there was also a good reason to add it. In fact, I use it today to have nested virt without -cpu host on hardware that's too new for my guests.
>> Curious, what guests balk at modern hardware but are fine with phenom?
> Sles11 GA ;).

Still curious, how does -cpu host break it?

>>> Either way, I don't think we need a phenom2 type. The features additional are minor enough to not really matter and all use cases I can come up with require either -cpu host (local virt) or -cpu phenom (migration).
>> I'm fine with this (or with adding phenom2).  But don't make phenom contain flags that real phenoms don't have.
> Those were my words :).

Then we are in agreement.
Alexander Graf - Sept. 12, 2010, 11:14 a.m.
Am 12.09.2010 um 12:22 schrieb Avi Kivity <avi@redhat.com>:

> On 09/12/2010 12:06 PM, Alexander Graf wrote:
>>>> For the Phenom type, I honestly don't remember why, but there was also a good reason to add it. In fact, I use it today to have nested virt without -cpu host on hardware that's too new for my guests.
>>> Curious, what guests balk at modern hardware but are fine with phenom?
>> Sles11 GA ;).
> 
> Still curious, how does -cpu host break it?

Uh, something in the power management module. Just give it a try on an Istambul system. Just use the 11.1 iso - the kernels are close enough.

Alex

>
Avi Kivity - Sept. 12, 2010, 12:02 p.m.
On 09/12/2010 01:14 PM, Alexander Graf wrote:
> Uh, something in the power management module. Just give it a try on an Istambul system. Just use the 11.1 iso - the kernels are close enough.

Oh, probably some silly msr.
Joerg Roedel - Sept. 12, 2010, 2:30 p.m.
On Sun, Sep 12, 2010 at 10:01:28AM +0200, Avi Kivity wrote:
>  On 09/12/2010 09:16 AM, Alexander Graf wrote:

>> Either way, I don't think we need a phenom2 type. The features
>> additional are minor enough to not really matter and all use cases I
>> can come up with require either -cpu host (local virt) or -cpu phenom
>> (migration).
>
> I'm fine with this (or with adding phenom2).  But don't make phenom  
> contain flags that real phenoms don't have.

How about features that are not supported by the hardware but can be
supported in emulation? The VMCBCLEAN feature is one of those which
makes a lot of sense to reduce the emulated world-switch times. I guess
its ok to enable those with -cpu host?

	Joerg
Avi Kivity - Sept. 12, 2010, 2:36 p.m.
On 09/12/2010 04:30 PM, Joerg Roedel wrote:
>
>>> Either way, I don't think we need a phenom2 type. The features
>>> additional are minor enough to not really matter and all use cases I
>>> can come up with require either -cpu host (local virt) or -cpu phenom
>>> (migration).
>> I'm fine with this (or with adding phenom2).  But don't make phenom
>> contain flags that real phenoms don't have.
> How about features that are not supported by the hardware but can be
> supported in emulation? The VMCBCLEAN feature is one of those which
> makes a lot of sense to reduce the emulated world-switch times. I guess
> its ok to enable those with -cpu host?
>
>

It's a good question.  We do that with x2apic, so yes.

Userspace can do four things with KVM_GET_SUPPORTED_CPUID:
- feed it right back to KVM_SET_CPUID2, getting maximum features and 
hopefully performance
- use it to mask the real cpuid which it fetches directly, getting 
something as similar as possible to the host
- use it to mask a predefined cpu type, getting something as similar as 
possible to that cpu type
- use it to verify that a predefined cpu type is supported, getting 
somthing exactly the same as that cpu type

-cpu host is the first option.  If the need comes for the second option, 
we can provide it.

I'll note that KVM_GET_SUPPORTED_CPUID can return values not present in 
the host cpuid in the documentation.

Patch

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 1144d4e..77eeab1 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -405,6 +405,17 @@ 
 #define CPUID_EXT3_IBS     (1 << 10)
 #define CPUID_EXT3_SKINIT  (1 << 12)
 
+#define CPUID_SVM_NPT          (1 << 0)
+#define CPUID_SVM_LBRV         (1 << 1)
+#define CPUID_SVM_SVMLOCK      (1 << 2)
+#define CPUID_SVM_NRIPSAVE     (1 << 3)
+#define CPUID_SVM_TSCSCALE     (1 << 4)
+#define CPUID_SVM_VMCBCLEAN    (1 << 5)
+#define CPUID_SVM_FLUSHASID    (1 << 6)
+#define CPUID_SVM_DECODEASSIST (1 << 7)
+#define CPUID_SVM_PAUSEFILTER  (1 << 10)
+#define CPUID_SVM_PFTHRESHOLD  (1 << 12)
+
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
 #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
 #define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
@@ -702,6 +713,7 @@  typedef struct CPUX86State {
     uint8_t has_error_code;
     uint32_t sipi_vector;
     uint32_t cpuid_kvm_features;
+    uint32_t cpuid_svm_features;
     
     /* in order to simplify APIC support, we leave this pointer to the
        user */
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 3fcf78f..ea1ac73 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -79,6 +79,17 @@  static const char *kvm_feature_name[] = {
     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 };
 
+static const char *svm_feature_name[] = {
+    "npt", "lbrv", "svm_lock", "nrip_save",
+    "tsc_scale", "vmcb_clean",  "flushbyasid", "decodeassists",
+    NULL, NULL, "pause_filter", NULL,
+    "pfthreshold", NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+};
+
 /* collects per-function cpuid data
  */
 typedef struct model_features_t {
@@ -192,13 +203,15 @@  static void add_flagname_to_bitmaps(const char *flagname, uint32_t *features,
                                     uint32_t *ext_features,
                                     uint32_t *ext2_features,
                                     uint32_t *ext3_features,
-                                    uint32_t *kvm_features)
+                                    uint32_t *kvm_features,
+                                    uint32_t *svm_features)
 {
     if (!lookup_feature(features, flagname, NULL, feature_name) &&
         !lookup_feature(ext_features, flagname, NULL, ext_feature_name) &&
         !lookup_feature(ext2_features, flagname, NULL, ext2_feature_name) &&
         !lookup_feature(ext3_features, flagname, NULL, ext3_feature_name) &&
-        !lookup_feature(kvm_features, flagname, NULL, kvm_feature_name))
+        !lookup_feature(kvm_features, flagname, NULL, kvm_feature_name) &&
+        !lookup_feature(svm_features, flagname, NULL, svm_feature_name))
             fprintf(stderr, "CPU feature %s not found\n", flagname);
 }
 
@@ -210,7 +223,8 @@  typedef struct x86_def_t {
     int family;
     int model;
     int stepping;
-    uint32_t features, ext_features, ext2_features, ext3_features, kvm_features;
+    uint32_t features, ext_features, ext2_features, ext3_features;
+    uint32_t kvm_features, svm_features;
     uint32_t xlevel;
     char model_id[48];
     int vendor_override;
@@ -253,6 +267,7 @@  typedef struct x86_def_t {
           CPUID_EXT2_PDPE1GB */
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
+#define TCG_SVM_FEATURES 0
 
 /* maintains list of cpu model definitions
  */
@@ -278,6 +293,8 @@  static x86_def_t builtin_x86_defs[] = {
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
             CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
+        .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
+            CPUID_SVM_VMCBCLEAN,
         .xlevel = 0x8000000A,
         .model_id = "QEMU Virtual CPU version " QEMU_VERSION,
     },
@@ -305,6 +322,8 @@  static x86_def_t builtin_x86_defs[] = {
                     CPUID_EXT3_OSVW, CPUID_EXT3_IBS */
         .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
             CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
+        .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
+            CPUID_SVM_VMCBCLEAN,
         .xlevel = 0x8000001A,
         .model_id = "AMD Phenom(tm) 9550 Quad-Core Processor"
     },
@@ -505,6 +524,15 @@  static int cpu_x86_fill_host(x86_def_t *x86_cpu_def)
     cpu_x86_fill_model_id(x86_cpu_def->model_id);
     x86_cpu_def->vendor_override = 0;
 
+
+    /*
+     * Every SVM feature requires emulation support in KVM - so we can't just
+     * read the host features here. KVM might even support SVM features not
+     * available on the host hardware
+     */
+    x86_cpu_def->svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV |
+                                CPUID_SVM_NRIPSAVE | CPUID_SVM_VMCBCLEAN;
+
     return 0;
 }
 
@@ -560,8 +588,14 @@  static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
 
     char *s = strdup(cpu_model);
     char *featurestr, *name = strtok(s, ",");
-    uint32_t plus_features = 0, plus_ext_features = 0, plus_ext2_features = 0, plus_ext3_features = 0, plus_kvm_features = 0;
-    uint32_t minus_features = 0, minus_ext_features = 0, minus_ext2_features = 0, minus_ext3_features = 0, minus_kvm_features = 0;
+    /* Features to be added*/
+    uint32_t plus_features = 0, plus_ext_features = 0;
+    uint32_t plus_ext2_features = 0, plus_ext3_features = 0;
+    uint32_t plus_kvm_features = 0, plus_svm_features = 0;
+    /* Features to be removed */
+    uint32_t minus_features = 0, minus_ext_features = 0;
+    uint32_t minus_ext2_features = 0, minus_ext3_features = 0;
+    uint32_t minus_kvm_features = 0, minus_svm_features = 0;
     uint32_t numvalue;
 
     for (def = x86_defs; def; def = def->next)
@@ -579,16 +613,22 @@  static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
 
     add_flagname_to_bitmaps("hypervisor", &plus_features,
         &plus_ext_features, &plus_ext2_features, &plus_ext3_features,
-        &plus_kvm_features);
+        &plus_kvm_features, &plus_svm_features);
 
     featurestr = strtok(NULL, ",");
 
     while (featurestr) {
         char *val;
         if (featurestr[0] == '+') {
-            add_flagname_to_bitmaps(featurestr + 1, &plus_features, &plus_ext_features, &plus_ext2_features, &plus_ext3_features, &plus_kvm_features);
+            add_flagname_to_bitmaps(featurestr + 1, &plus_features,
+                            &plus_ext_features, &plus_ext2_features,
+                            &plus_ext3_features, &plus_kvm_features,
+                            &plus_svm_features);
         } else if (featurestr[0] == '-') {
-            add_flagname_to_bitmaps(featurestr + 1, &minus_features, &minus_ext_features, &minus_ext2_features, &minus_ext3_features, &minus_kvm_features);
+            add_flagname_to_bitmaps(featurestr + 1, &minus_features,
+                            &minus_ext_features, &minus_ext2_features,
+                            &minus_ext3_features, &minus_kvm_features,
+                            &minus_svm_features);
         } else if ((val = strchr(featurestr, '='))) {
             *val = 0; val++;
             if (!strcmp(featurestr, "family")) {
@@ -670,11 +710,13 @@  static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
     x86_cpu_def->ext2_features |= plus_ext2_features;
     x86_cpu_def->ext3_features |= plus_ext3_features;
     x86_cpu_def->kvm_features |= plus_kvm_features;
+    x86_cpu_def->svm_features |= plus_svm_features;
     x86_cpu_def->features &= ~minus_features;
     x86_cpu_def->ext_features &= ~minus_ext_features;
     x86_cpu_def->ext2_features &= ~minus_ext2_features;
     x86_cpu_def->ext3_features &= ~minus_ext3_features;
     x86_cpu_def->kvm_features &= ~minus_kvm_features;
+    x86_cpu_def->svm_features &= ~minus_svm_features;
     if (check_cpuid) {
         if (check_features_against_host(x86_cpu_def) && enforce_cpuid)
             goto error;
@@ -816,6 +858,7 @@  int cpu_x86_register (CPUX86State *env, const char *cpu_model)
     env->cpuid_ext3_features = def->ext3_features;
     env->cpuid_xlevel = def->xlevel;
     env->cpuid_kvm_features = def->kvm_features;
+    env->cpuid_svm_features = def->svm_features;
     if (!kvm_enabled()) {
         env->cpuid_features &= TCG_FEATURES;
         env->cpuid_ext_features &= TCG_EXT_FEATURES;
@@ -825,6 +868,7 @@  int cpu_x86_register (CPUX86State *env, const char *cpu_model)
 #endif
             );
         env->cpuid_ext3_features &= TCG_EXT3_FEATURES;
+        env->cpuid_svm_features &= TCG_SVM_FEATURES;
     }
     {
         const char *model_id = def->model_id;
@@ -1135,11 +1179,6 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 *ecx |= 1 << 1;    /* CmpLegacy bit */
             }
         }
-
-        if (kvm_enabled()) {
-            /* Nested SVM not yet supported in upstream QEMU */
-            *ecx &= ~CPUID_EXT3_SVM;
-        }
         break;
     case 0x80000002:
     case 0x80000003:
@@ -1184,10 +1223,17 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         }
         break;
     case 0x8000000A:
-        *eax = 0x00000001; /* SVM Revision */
-        *ebx = 0x00000010; /* nr of ASIDs */
-        *ecx = 0;
-        *edx = 0; /* optional features */
+	if (env->cpuid_ext3_features & CPUID_EXT3_SVM) {
+		*eax = 0x00000001; /* SVM Revision */
+		*ebx = 0x00000010; /* nr of ASIDs */
+		*ecx = 0;
+		*edx = env->cpuid_svm_features; /* optional features */
+	} else {
+		*eax = 0;
+		*ebx = 0;
+		*ecx = 0;
+		*edx = 0;
+	}
         break;
     default:
         /* reserved values: zero */
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index a33d2fa..74e7b4f 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -192,6 +192,9 @@  int kvm_arch_init_vcpu(CPUState *env)
                                                              0, R_EDX);
     env->cpuid_ext3_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
                                                              0, R_ECX);
+    env->cpuid_svm_features  &= kvm_arch_get_supported_cpuid(env, 0x8000000A,
+                                                             0, R_EDX);
+
 
     cpuid_i = 0;