[v3,2/3] kvm: Add support to KVM_GET_MSR_FEATURE_INDEX_LIST and KVM_GET_MSRS system ioctl

Message ID 1533909989-56115-3-git-send-email-robert.hu@linux.intel.com
State New
Headers show
Series
  • x86: QEMU side support on MSR based features
Related show

Commit Message

Robert Hoo Aug. 10, 2018, 2:06 p.m.
Add kvm_get_supported_feature_msrs() to get supported MSR feature index list.
Add kvm_arch_get_supported_msr_feature() to get each MSR features value.

kvm_get_supported_feature_msrs() is called in kvm_arch_init().
kvm_arch_get_supported_msr_feature() is called by
x86_cpu_get_supported_feature_word().

Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
---
 include/sysemu/kvm.h |  2 ++
 target/i386/kvm.c    | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

Comments

Eduardo Habkost Aug. 17, 2018, 1:18 p.m. | #1
Thanks for the patch, comments below:

On Fri, Aug 10, 2018 at 10:06:28PM +0800, Robert Hoo wrote:
> Add kvm_get_supported_feature_msrs() to get supported MSR feature index list.
> Add kvm_arch_get_supported_msr_feature() to get each MSR features value.
> 
> kvm_get_supported_feature_msrs() is called in kvm_arch_init().
> kvm_arch_get_supported_msr_feature() is called by
> x86_cpu_get_supported_feature_word().
> 
> Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> ---
>  include/sysemu/kvm.h |  2 ++
>  target/i386/kvm.c    | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 81 insertions(+)
> 
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index 0b64b8e..97d8d9d 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -463,6 +463,8 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension);
>  
>  uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
>                                        uint32_t index, int reg);
> +uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index);
> +
>  
>  void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
>  
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index 9313602..70d8606 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -107,6 +107,7 @@ static int has_pit_state2;
>  static bool has_msr_mcg_ext_ctl;
>  
>  static struct kvm_cpuid2 *cpuid_cache;
> +static struct kvm_msr_list *kvm_feature_msrs;

I was going to suggest moving this to KVMState, but KVMState is a
arch-independent struct.  I guess we'll have to live with this by
now.

>  
>  int kvm_has_pit_state2(void)
>  {
> @@ -420,6 +421,41 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
>      return ret;
>  }
>  
> +uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
> +{
> +    struct {
> +        struct kvm_msrs info;
> +        struct kvm_msr_entry entries[1];
> +    } msr_data;
> +    uint32_t ret;
> +
> +    /*Check if the requested feature MSR is supported*/
> +    int i;
> +    for (i = 0; i < kvm_feature_msrs->nmsrs; i++) {
> +        if (index == kvm_feature_msrs->indices[i]) {
> +            break;
> +        }
> +    }

We are duplicating the logic that's already inside KVM (checking
the list of supported MSRs and returning an error).  Can't we
make this simpler and just remove this check?  If the MSR is not
supported, KVM_GET_MSRS would just return 0.

> +    if (i >= kvm_feature_msrs->nmsrs) {
> +        fprintf(stderr, "Requested MSR (index= %d) is not supported.\n", index);

This error message is meaningless for QEMU users.  Do we really
need to print it?

> +        return 0;

Returning 0 for MSRs that are not supported is probably OK, but
we need to see this function being used, to be sure (I didn't
look at all the patches in this series yet).

> +    }
> +
> +    msr_data.info.nmsrs = 1;
> +    msr_data.entries[0].index = index;
> +
> +    ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data);
> +
> +    if (ret != 1) {
> +        fprintf(stderr, "KVM get MSR (index=0x%x) feature failed, %s\n",
> +            index, strerror(-ret));
> +        exit(1);

I'm not a fan of APIs that write to stdout/stderr or exit(),
unless they are clearly just initialization functions that should
never fail in normal circumstances.

But if you call KVM_GET_MSRS for all MSRs at
kvm_get_supported_feature_msrs() below, this function would never
need to report an error.

We are already going through the trouble of allocating
kvm_feature_msrs in kvm_get_supported_feature_msrs() anyway.

> +    }
> +
> +    return msr_data.entries[0].data;
> +}
> +
> +
>  typedef struct HWPoisonPage {
>      ram_addr_t ram_addr;
>      QLIST_ENTRY(HWPoisonPage) list;
> @@ -1238,7 +1274,45 @@ void kvm_arch_do_init_vcpu(X86CPU *cpu)
>          env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
>      }
>  }

Nit: please add an extra newline between functions.

> +static int kvm_get_supported_feature_msrs(KVMState *s)
> +{
> +    static int kvm_supported_feature_msrs;
> +    int ret = 0;
> +
> +    if (kvm_supported_feature_msrs == 0) {

Do you really need the kvm_supported_feature_msrs variable?  You
could just check if kvm_feature_msrs is NULL.

I also suggest doing this:

    if (already initialized) {
       return 0;
    }

    /* regular initialization code here */

to reduce one indentation level.


> +        struct kvm_msr_list msr_list;
> +
> +        kvm_supported_feature_msrs++;
> +
> +        msr_list.nmsrs = 0;
> +        ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list);
> +        if (ret < 0 && ret != -E2BIG) {
> +            return ret;

What if the host KVM version doesn't support
KVM_GET_MSR_FEATURE_INDEX_LIST?

> +        }
> +
> +        assert(msr_list.nmsrs > 0);
> +        kvm_feature_msrs = (struct kvm_msr_list *) \
> +            g_malloc0(sizeof(msr_list) +
> +                     msr_list.nmsrs * sizeof(msr_list.indices[0]));
> +        if (kvm_feature_msrs == NULL) {

g_malloc0() never returns NULL on failure.  See
https://developer.gnome.org/glib/2.56/glib-Memory-Allocation.html#glib-Memory-Allocation.description

> +            fprintf(stderr, "Failed to allocate space for KVM Feature MSRs"
> +                "which has %d MSRs\n", msr_list.nmsrs);
> +            return -1;
> +        }
> +
> +        kvm_feature_msrs->nmsrs = msr_list.nmsrs;
> +        ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, kvm_feature_msrs);
>  
> +        if (ret < 0) {  /*ioctl failure*/

Small nit: the "ioctl failure" comment seems unnecessary, I would
just remove it.

> +            fprintf(stderr, "Fetch KVM feature MSRs failed: %s\n",
> +                strerror(-ret));
> +            g_free(kvm_feature_msrs);
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}

Same as above: please add an extra newline between functions.

>  static int kvm_get_supported_msrs(KVMState *s)
>  {
>      static int kvm_supported_msrs;
> @@ -1400,6 +1474,11 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>          return ret;
>      }
>  
> +    ret = kvm_get_supported_feature_msrs(s);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
>      uname(&utsname);
>      lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
>  
> -- 
> 1.8.3.1
> 
>
Robert Hoo Aug. 18, 2018, 7:27 a.m. | #2
On Fri, 2018-08-17 at 10:18 -0300, Eduardo Habkost wrote:
> Thanks for the patch, comments below:
> 
> On Fri, Aug 10, 2018 at 10:06:28PM +0800, Robert Hoo wrote:
> > Add kvm_get_supported_feature_msrs() to get supported MSR feature index list.
> > Add kvm_arch_get_supported_msr_feature() to get each MSR features value.
> > 
> > kvm_get_supported_feature_msrs() is called in kvm_arch_init().
> > kvm_arch_get_supported_msr_feature() is called by
> > x86_cpu_get_supported_feature_word().
> > 
> > Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> > ---
> >  include/sysemu/kvm.h |  2 ++
> >  target/i386/kvm.c    | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 81 insertions(+)
> > 
> > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> > index 0b64b8e..97d8d9d 100644
> > --- a/include/sysemu/kvm.h
> > +++ b/include/sysemu/kvm.h
> > @@ -463,6 +463,8 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension);
> >  
> >  uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
> >                                        uint32_t index, int reg);
> > +uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index);
> > +
> >  
> >  void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
> >  
> > diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> > index 9313602..70d8606 100644
> > --- a/target/i386/kvm.c
> > +++ b/target/i386/kvm.c
> > @@ -107,6 +107,7 @@ static int has_pit_state2;
> >  static bool has_msr_mcg_ext_ctl;
> >  
> >  static struct kvm_cpuid2 *cpuid_cache;
> > +static struct kvm_msr_list *kvm_feature_msrs;
> 
> I was going to suggest moving this to KVMState, but KVMState is a
> arch-independent struct.  I guess we'll have to live with this by
> now.
> 
> >  
> >  int kvm_has_pit_state2(void)
> >  {
> > @@ -420,6 +421,41 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
> >      return ret;
> >  }
> >  
> > +uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
> > +{
> > +    struct {
> > +        struct kvm_msrs info;
> > +        struct kvm_msr_entry entries[1];
> > +    } msr_data;
> > +    uint32_t ret;
> > +
> > +    /*Check if the requested feature MSR is supported*/
> > +    int i;
> > +    for (i = 0; i < kvm_feature_msrs->nmsrs; i++) {
> > +        if (index == kvm_feature_msrs->indices[i]) {
> > +            break;
> > +        }
> > +    }
> 
> We are duplicating the logic that's already inside KVM (checking
> the list of supported MSRs and returning an error).  Can't we
> make this simpler and just remove this check?  If the MSR is not
> supported, KVM_GET_MSRS would just return 0.

Yes, seems dup. but if we remove this hunk, the
kvm_get_supported_feature_msrs() is unnecessary at all.
> 
> > +    if (i >= kvm_feature_msrs->nmsrs) {
> > +        fprintf(stderr, "Requested MSR (index= %d) is not supported.\n", index);
> 
> This error message is meaningless for QEMU users.  Do we really
> need to print it?
> 
> > +        return 0;
> 
> Returning 0 for MSRs that are not supported is probably OK, but
> we need to see this function being used, to be sure (I didn't
> look at all the patches in this series yet).
> 
> > +    }
> > +
> > +    msr_data.info.nmsrs = 1;
> > +    msr_data.entries[0].index = index;
> > +
> > +    ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data);
> > +
> > +    if (ret != 1) {
> > +        fprintf(stderr, "KVM get MSR (index=0x%x) feature failed, %s\n",
> > +            index, strerror(-ret));
> > +        exit(1);
> 
> I'm not a fan of APIs that write to stdout/stderr or exit(),
> unless they are clearly just initialization functions that should
> never fail in normal circumstances.
> 
> But if you call KVM_GET_MSRS for all MSRs at
> kvm_get_supported_feature_msrs() below, this function would never
> need to report an error.
> 
> We are already going through the trouble of allocating
> kvm_feature_msrs in kvm_get_supported_feature_msrs() anyway.

I had looked into KVM KVM_GET_MSRS handling, though less likely, still
have changes copy_from/to_user() fail. Therefore I added the check and
warning here, in that seldom case happens.

exit() or not, I'm also not sure. Seems not necessary, while my usual
programming philosophy is never let wrong goes further. For in the case
of ioctl(KVM_GET_MSRS) returns failure, something goes wrong underlying,
I would prefer to let user know this, rather than let it pass and goes
further.
> 
> > +    }
> > +
> > +    return msr_data.entries[0].data;
> > +}
> > +
> > +
> >  typedef struct HWPoisonPage {
> >      ram_addr_t ram_addr;
> >      QLIST_ENTRY(HWPoisonPage) list;
> > @@ -1238,7 +1274,45 @@ void kvm_arch_do_init_vcpu(X86CPU *cpu)
> >          env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
> >      }
> >  }
> 
> Nit: please add an extra newline between functions.
> 
> > +static int kvm_get_supported_feature_msrs(KVMState *s)
> > +{
> > +    static int kvm_supported_feature_msrs;
> > +    int ret = 0;
> > +
> > +    if (kvm_supported_feature_msrs == 0) {
> 
> Do you really need the kvm_supported_feature_msrs variable?  You
> could just check if kvm_feature_msrs is NULL.
> 
> I also suggest doing this:
> 
>     if (already initialized) {
>        return 0;
>     }
> 
>     /* regular initialization code here */
> 
> to reduce one indentation level.
> 
Right.
> 
> > +        struct kvm_msr_list msr_list;
> > +
> > +        kvm_supported_feature_msrs++;
> > +
> > +        msr_list.nmsrs = 0;
> > +        ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list);
> > +        if (ret < 0 && ret != -E2BIG) {
> > +            return ret;
> 
> What if the host KVM version doesn't support
> KVM_GET_MSR_FEATURE_INDEX_LIST?

Going to add kvm_check_extension(s, KVM_CAP_GET_MSR_FEATURES) before
this ioctl. if not support, return -1. (Then the kvm_init will fail and
exit.)
> 
> > +        }
> > +
> > +        assert(msr_list.nmsrs > 0);
> > +        kvm_feature_msrs = (struct kvm_msr_list *) \
> > +            g_malloc0(sizeof(msr_list) +
> > +                     msr_list.nmsrs * sizeof(msr_list.indices[0]));
> > +        if (kvm_feature_msrs == NULL) {
> 
> g_malloc0() never returns NULL on failure.  See
> https://developer.gnome.org/glib/2.56/glib-Memory-Allocation.html#glib-Memory-Allocation.description
> 
Thanks!

> > +            fprintf(stderr, "Failed to allocate space for KVM Feature MSRs"
> > +                "which has %d MSRs\n", msr_list.nmsrs);
> > +            return -1;
> > +        }
> > +
> > +        kvm_feature_msrs->nmsrs = msr_list.nmsrs;
> > +        ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, kvm_feature_msrs);
> >  
> > +        if (ret < 0) {  /*ioctl failure*/
> 
> Small nit: the "ioctl failure" comment seems unnecessary, I would
> just remove it.
> 
> > +            fprintf(stderr, "Fetch KVM feature MSRs failed: %s\n",
> > +                strerror(-ret));
> > +            g_free(kvm_feature_msrs);
> > +            return ret;
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> 
> Same as above: please add an extra newline between functions.
> 
> >  static int kvm_get_supported_msrs(KVMState *s)
> >  {
> >      static int kvm_supported_msrs;
> > @@ -1400,6 +1474,11 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
> >          return ret;
> >      }
> >  
> > +    ret = kvm_get_supported_feature_msrs(s);
> > +    if (ret < 0) {
> > +        return ret;
> > +    }
> > +
> >      uname(&utsname);
> >      lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
> >  
> > -- 
> > 1.8.3.1
> > 
> > 
>
Eduardo Habkost Aug. 18, 2018, 3:05 p.m. | #3
On Sat, Aug 18, 2018 at 03:27:01PM +0800, Robert Hoo wrote:
> On Fri, 2018-08-17 at 10:18 -0300, Eduardo Habkost wrote:
> > Thanks for the patch, comments below:
> > 
> > On Fri, Aug 10, 2018 at 10:06:28PM +0800, Robert Hoo wrote:
> > > Add kvm_get_supported_feature_msrs() to get supported MSR feature index list.
> > > Add kvm_arch_get_supported_msr_feature() to get each MSR features value.
> > > 
> > > kvm_get_supported_feature_msrs() is called in kvm_arch_init().
> > > kvm_arch_get_supported_msr_feature() is called by
> > > x86_cpu_get_supported_feature_word().
> > > 
> > > Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> > > ---
> > >  include/sysemu/kvm.h |  2 ++
> > >  target/i386/kvm.c    | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 81 insertions(+)
> > > 
> > > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> > > index 0b64b8e..97d8d9d 100644
> > > --- a/include/sysemu/kvm.h
> > > +++ b/include/sysemu/kvm.h
> > > @@ -463,6 +463,8 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension);
> > >  
> > >  uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
> > >                                        uint32_t index, int reg);
> > > +uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index);
> > > +
> > >  
> > >  void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
> > >  
> > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> > > index 9313602..70d8606 100644
> > > --- a/target/i386/kvm.c
> > > +++ b/target/i386/kvm.c
> > > @@ -107,6 +107,7 @@ static int has_pit_state2;
> > >  static bool has_msr_mcg_ext_ctl;
> > >  
> > >  static struct kvm_cpuid2 *cpuid_cache;
> > > +static struct kvm_msr_list *kvm_feature_msrs;
> > 
> > I was going to suggest moving this to KVMState, but KVMState is a
> > arch-independent struct.  I guess we'll have to live with this by
> > now.
> > 
> > >  
> > >  int kvm_has_pit_state2(void)
> > >  {
> > > @@ -420,6 +421,41 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
> > >      return ret;
> > >  }
> > >  
> > > +uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
> > > +{
> > > +    struct {
> > > +        struct kvm_msrs info;
> > > +        struct kvm_msr_entry entries[1];
> > > +    } msr_data;
> > > +    uint32_t ret;
> > > +
> > > +    /*Check if the requested feature MSR is supported*/
> > > +    int i;
> > > +    for (i = 0; i < kvm_feature_msrs->nmsrs; i++) {
> > > +        if (index == kvm_feature_msrs->indices[i]) {
> > > +            break;
> > > +        }
> > > +    }
> > 
> > We are duplicating the logic that's already inside KVM (checking
> > the list of supported MSRs and returning an error).  Can't we
> > make this simpler and just remove this check?  If the MSR is not
> > supported, KVM_GET_MSRS would just return 0.
> 
> Yes, seems dup. but if we remove this hunk, the
> kvm_get_supported_feature_msrs() is unnecessary at all.

So maybe kvm_get_supported_feature_msrs() really is unnecessary?

We seem to have two alternatives:
* calling KVM_GET_MSRS for all MSRs only once, at
  kvm_get_supported_feature_msrs() (as I had suggested
  previously);
* calling KVM_GET_MSRS for one MSR unconditionally here, but
  _not_ treating 0 as a fatal error.

I prefer the former, but I would be OK with both alternatives.
Note that we need to check for KVM capabilities in both cases.


> > 
> > > +    if (i >= kvm_feature_msrs->nmsrs) {
> > > +        fprintf(stderr, "Requested MSR (index= %d) is not supported.\n", index);
> > 
> > This error message is meaningless for QEMU users.  Do we really
> > need to print it?
> > 
> > > +        return 0;
> > 
> > Returning 0 for MSRs that are not supported is probably OK, but
> > we need to see this function being used, to be sure (I didn't
> > look at all the patches in this series yet).
> > 
> > > +    }
> > > +
> > > +    msr_data.info.nmsrs = 1;
> > > +    msr_data.entries[0].index = index;
> > > +
> > > +    ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data);
> > > +
> > > +    if (ret != 1) {
> > > +        fprintf(stderr, "KVM get MSR (index=0x%x) feature failed, %s\n",
> > > +            index, strerror(-ret));
> > > +        exit(1);
> > 
> > I'm not a fan of APIs that write to stdout/stderr or exit(),
> > unless they are clearly just initialization functions that should
> > never fail in normal circumstances.
> > 
> > But if you call KVM_GET_MSRS for all MSRs at
> > kvm_get_supported_feature_msrs() below, this function would never
> > need to report an error.
> > 
> > We are already going through the trouble of allocating
> > kvm_feature_msrs in kvm_get_supported_feature_msrs() anyway.
> 
> I had looked into KVM KVM_GET_MSRS handling, though less likely, still
> have changes copy_from/to_user() fail. Therefore I added the check and
> warning here, in that seldom case happens.
> 
> exit() or not, I'm also not sure. Seems not necessary, while my usual
> programming philosophy is never let wrong goes further. For in the case
> of ioctl(KVM_GET_MSRS) returns failure, something goes wrong underlying,
> I would prefer to let user know this, rather than let it pass and goes
> further.

We probably have no option other than exiting if KVM_GET_MSRS
returns an unexpected error.  It's just that I find the code
harder to review, because we need to be sure this won't terminate
QEMU under normal circumstances.

But if you demonstrate that all errors here are truly fatal and
unexpected, calling exit() may be OK.  (See the
KVM_CAP_GET_MSR_FEATURES comment below for one example where
exiting is not going to be OK.)

Proper error reporting using Error** would be even better than
exiting, but considering that none of the functions at i386/cpu.c
return errors using Error**, I won't force you to do that.


> > 
[...]
> > > +        struct kvm_msr_list msr_list;
> > > +
> > > +        kvm_supported_feature_msrs++;
> > > +
> > > +        msr_list.nmsrs = 0;
> > > +        ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list);
> > > +        if (ret < 0 && ret != -E2BIG) {
> > > +            return ret;
> > 
> > What if the host KVM version doesn't support
> > KVM_GET_MSR_FEATURE_INDEX_LIST?
> 
> Going to add kvm_check_extension(s, KVM_CAP_GET_MSR_FEATURES) before
> this ioctl. if not support, return -1. (Then the kvm_init will fail and
> exit.)

We don't want QEMU to refuse to run if the kernel doesn't have
KVM_CAP_GET_MSR_FEATURES.  We can treat missing capability as
equivalent to returning an empty list of MSRs.
Robert Hoo Aug. 23, 2018, 6:28 a.m. | #4
On Sat, 2018-08-18 at 12:05 -0300, Eduardo Habkost wrote:

> > > >  
> > > >  int kvm_has_pit_state2(void)
> > > >  {
> > > > @@ -420,6 +421,41 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
> > > >      return ret;
> > > >  }
> > > >  
> > > > +uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
> > > > +{
> > > > +    struct {
> > > > +        struct kvm_msrs info;
> > > > +        struct kvm_msr_entry entries[1];
> > > > +    } msr_data;
> > > > +    uint32_t ret;
> > > > +
> > > > +    /*Check if the requested feature MSR is supported*/
> > > > +    int i;
> > > > +    for (i = 0; i < kvm_feature_msrs->nmsrs; i++) {
> > > > +        if (index == kvm_feature_msrs->indices[i]) {
> > > > +            break;
> > > > +        }
> > > > +    }
> > > 
> > > We are duplicating the logic that's already inside KVM (checking
> > > the list of supported MSRs and returning an error).  Can't we
> > > make this simpler and just remove this check?  If the MSR is not
> > > supported, KVM_GET_MSRS would just return 0.
> > 
> > Yes, seems dup. but if we remove this hunk, the
> > kvm_get_supported_feature_msrs() is unnecessary at all.
> 
> So maybe kvm_get_supported_feature_msrs() really is unnecessary?

I'll keep it, for 1) check KVM_CAP_GET_MSR_FEATURES capabilities; 2) get
kvm_feature_msrs, so later kvm_arch_get_supported_msr_feature() use it
to check if MSR features are supported.
> 
> We seem to have two alternatives:
> * calling KVM_GET_MSRS for all MSRs only once, at
>   kvm_get_supported_feature_msrs() (as I had suggested
>   previously);
> * calling KVM_GET_MSRS for one MSR unconditionally here, but
>   _not_ treating 0 as a fatal error.
> 
> I prefer the former, but I would be OK with both alternatives.
> Note that we need to check for KVM capabilities in both cases.
> 
> 
I'll choose the latter :).
> > > 
> > > > +    if (i >= kvm_feature_msrs->nmsrs) {
> > > > +        fprintf(stderr, "Requested MSR (index= %d) is not supported.\n", index);
> > > 
> > > This error message is meaningless for QEMU users.  Do we really
> > > need to print it?
> > > 
> > > > +        return 0;
> > > 
> > > Returning 0 for MSRs that are not supported is probably OK, but
> > > we need to see this function being used, to be sure (I didn't
> > > look at all the patches in this series yet).
> > > 
> > > > +    }
> > > > +
> > > > +    msr_data.info.nmsrs = 1;
> > > > +    msr_data.entries[0].index = index;
> > > > +
> > > > +    ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data);
> > > > +
> > > > +    if (ret != 1) {
> > > > +        fprintf(stderr, "KVM get MSR (index=0x%x) feature failed, %s\n",
> > > > +            index, strerror(-ret));
> > > > +        exit(1);
> > > 
> > > I'm not a fan of APIs that write to stdout/stderr or exit(),
> > > unless they are clearly just initialization functions that should
> > > never fail in normal circumstances.
> > > 
> > > But if you call KVM_GET_MSRS for all MSRs at
> > > kvm_get_supported_feature_msrs() below, this function would never
> > > need to report an error.
> > > 
> > > We are already going through the trouble of allocating
> > > kvm_feature_msrs in kvm_get_supported_feature_msrs() anyway.
> > 
> > I had looked into KVM KVM_GET_MSRS handling, though less likely, still
> > have changes copy_from/to_user() fail. Therefore I added the check and
> > warning here, in that seldom case happens.
> > 
> > exit() or not, I'm also not sure. Seems not necessary, while my usual
> > programming philosophy is never let wrong goes further. For in the case
> > of ioctl(KVM_GET_MSRS) returns failure, something goes wrong underlying,
> > I would prefer to let user know this, rather than let it pass and goes
> > further.
> 
> We probably have no option other than exiting if KVM_GET_MSRS
> returns an unexpected error.  It's just that I find the code
> harder to review, because we need to be sure this won't terminate
> QEMU under normal circumstances.
> 
> But if you demonstrate that all errors here are truly fatal and
> unexpected, calling exit() may be OK.  (See the
> KVM_CAP_GET_MSR_FEATURES comment below for one example where
> exiting is not going to be OK.)
> 
> Proper error reporting using Error** would be even better than
> exiting, but considering that none of the functions at i386/cpu.c
> return errors using Error**, I won't force you to do that.
> 
Thanks.
> 
> > > 
> [...]
> > > > +        struct kvm_msr_list msr_list;
> > > > +
> > > > +        kvm_supported_feature_msrs++;
> > > > +
> > > > +        msr_list.nmsrs = 0;
> > > > +        ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list);
> > > > +        if (ret < 0 && ret != -E2BIG) {
> > > > +            return ret;
> > > 
> > > What if the host KVM version doesn't support
> > > KVM_GET_MSR_FEATURE_INDEX_LIST?
> > 
> > Going to add kvm_check_extension(s, KVM_CAP_GET_MSR_FEATURES) before
> > this ioctl. if not support, return -1. (Then the kvm_init will fail and
> > exit.)
> 
> We don't want QEMU to refuse to run if the kernel doesn't have
> KVM_CAP_GET_MSR_FEATURES.  We can treat missing capability as
> equivalent to returning an empty list of MSRs.
Yes. I'll let caller (kvm_arch_init) ignore the return value but a
simple warning.
>
Eduardo Habkost Aug. 23, 2018, 5:11 p.m. | #5
On Thu, Aug 23, 2018 at 02:28:28PM +0800, Robert Hoo wrote:
> On Sat, 2018-08-18 at 12:05 -0300, Eduardo Habkost wrote:
[...]
> > We don't want QEMU to refuse to run if the kernel doesn't have
> > KVM_CAP_GET_MSR_FEATURES.  We can treat missing capability as
> > equivalent to returning an empty list of MSRs.
> Yes. I'll let caller (kvm_arch_init) ignore the return value but a
> simple warning.

Warnings tend to be ignored and are generally a sign that QEMU
isn't doing the right thing.  Sometimes we have no choice, but I
don't think that's the case here.

As far as I can see, we have only two possibilities here:
1) The host can run a VM that behaves exactly as requested on the
   command-line (no warning required).
2) The host can't run the requested configuration (fatal error,
   not a warning).
Paolo Bonzini Aug. 23, 2018, 5:28 p.m. | #6
On 23/08/2018 19:11, Eduardo Habkost wrote:
>>> We don't want QEMU to refuse to run if the kernel doesn't have
>>> KVM_CAP_GET_MSR_FEATURES.  We can treat missing capability as
>>> equivalent to returning an empty list of MSRs.
>> Yes. I'll let caller (kvm_arch_init) ignore the return value but a
>> simple warning.
> Warnings tend to be ignored and are generally a sign that QEMU
> isn't doing the right thing.  Sometimes we have no choice, but I
> don't think that's the case here.
> 
> As far as I can see, we have only two possibilities here:
> 1) The host can run a VM that behaves exactly as requested on the
>    command-line (no warning required).
> 2) The host can't run the requested configuration (fatal error,
>    not a warning).

Right, but if KVM_CAP_GET_MSR_FEATURES is not available I guess you can
assume the MSRs to be zero for everything except "-cpu host".

Paolo
Eduardo Habkost Aug. 23, 2018, 5:36 p.m. | #7
On Thu, Aug 23, 2018 at 07:28:25PM +0200, Paolo Bonzini wrote:
> On 23/08/2018 19:11, Eduardo Habkost wrote:
> >>> We don't want QEMU to refuse to run if the kernel doesn't have
> >>> KVM_CAP_GET_MSR_FEATURES.  We can treat missing capability as
> >>> equivalent to returning an empty list of MSRs.
> >> Yes. I'll let caller (kvm_arch_init) ignore the return value but a
> >> simple warning.
> > Warnings tend to be ignored and are generally a sign that QEMU
> > isn't doing the right thing.  Sometimes we have no choice, but I
> > don't think that's the case here.
> > 
> > As far as I can see, we have only two possibilities here:
> > 1) The host can run a VM that behaves exactly as requested on the
> >    command-line (no warning required).
> > 2) The host can't run the requested configuration (fatal error,
> >    not a warning).
> 
> Right, but if KVM_CAP_GET_MSR_FEATURES is not available I guess you can
> assume the MSRs to be zero for everything except "-cpu host".

Yes, that would be simplest way to implement the above.  Then
QEMU would refuse to run if the feature was part of the requested
configuration (2), or not care at all because the feature was not
set in the configuration (1).

But I'm not sure I follow the suggestion to not consider the MSR
to be zero on "-cpu host".  If we don't need and KVM-side code to
support a given MSR feature, we can enable it on all models.  IF
we need KVM-side code, we can't enable it even on "-cpu host".
This is not different from CPUID-based features.
Paolo Bonzini Aug. 23, 2018, 8:23 p.m. | #8
On 23/08/2018 19:36, Eduardo Habkost wrote:
>> Right, but if KVM_CAP_GET_MSR_FEATURES is not available I guess you can
>> assume the MSRs to be zero for everything except "-cpu host".
> Yes, that would be simplest way to implement the above.  Then
> QEMU would refuse to run if the feature was part of the requested
> configuration (2), or not care at all because the feature was not
> set in the configuration (1).
> 
> But I'm not sure I follow the suggestion to not consider the MSR
> to be zero on "-cpu host".  If we don't need and KVM-side code to
> support a given MSR feature, we can enable it on all models.

The case I was thinking about, is where there is no KVM-side code to
support the MSR feature, but you still need the host CPU to have it.
Without KVM's feature MSR support, you cannot read the host value of the
MSR.

My idea was that feature MSRs will default to the host value returned by
KVM_GET_MSR on the /dev/kvm file descriptor, so "-cpu host" could just
ignore KVM_CAP_GET_MSR_FEATURES and use KVM's default value of the MSRs.

However, I guess we can just use the default value for _all_ CPU models,
because in practice the only effect would be on nested VMX MSRs, and
nested VMX can only be migrated on kernels that have
KVM_CAP_GET_MSR_FEATURES.  So in practice people using nested VMX are
using "-cpu host" anyway, not "-cpu SandyBridge,+vmx".

My proposalis: basically, if KVM_CAP_GET_MSR_FEATURES is unavailable:

- MSR-based features would cause an error if specified on the command line;

- kvm_arch_init_vcpu would skip the KVM_SET_MSR for feature MSRs completely.

What do you think?

Paolo
Eduardo Habkost Aug. 25, 2018, 5:27 p.m. | #9
On Thu, Aug 23, 2018 at 10:23:53PM +0200, Paolo Bonzini wrote:
> On 23/08/2018 19:36, Eduardo Habkost wrote:
> >> Right, but if KVM_CAP_GET_MSR_FEATURES is not available I guess you can
> >> assume the MSRs to be zero for everything except "-cpu host".
> > Yes, that would be simplest way to implement the above.  Then
> > QEMU would refuse to run if the feature was part of the requested
> > configuration (2), or not care at all because the feature was not
> > set in the configuration (1).
> > 
> > But I'm not sure I follow the suggestion to not consider the MSR
> > to be zero on "-cpu host".  If we don't need and KVM-side code to
> > support a given MSR feature, we can enable it on all models.
> 
> The case I was thinking about, is where there is no KVM-side code to
> support the MSR feature, but you still need the host CPU to have it.
> Without KVM's feature MSR support, you cannot read the host value of the
> MSR.

Don't we always need some KVM code to support the feature, even
if it's just to make KVM enable the feature by default?

> 
> My idea was that feature MSRs will default to the host value returned by
> KVM_GET_MSR on the /dev/kvm file descriptor, so "-cpu host" could just
> ignore KVM_CAP_GET_MSR_FEATURES and use KVM's default value of the MSRs.
> 
> However, I guess we can just use the default value for _all_ CPU models,
> because in practice the only effect would be on nested VMX MSRs, and
> nested VMX can only be migrated on kernels that have
> KVM_CAP_GET_MSR_FEATURES.  So in practice people using nested VMX are
> using "-cpu host" anyway, not "-cpu SandyBridge,+vmx".

If the default is to use the host value, I see two problems:
1) we can't use the default on any migration-safe CPU model
(i.e.  all except "host"); 2) it will be unsafe for older QEMU
versions (that don't know yet how to call KVM_SET_MSR for that
MSR).

If this is just about VMX caps, this shouldn't be a problem
because we already treat VMX as not migration-safe.  We will need
to keep that in mind if we want to make VMX migration-safe in the
future, though.

> 
> My proposalis: basically, if KVM_CAP_GET_MSR_FEATURES is unavailable:
> 
> - MSR-based features would cause an error if specified on the command line;
> 
> - kvm_arch_init_vcpu would skip the KVM_SET_MSR for feature MSRs completely.

If we already had some MSR features being enabled by default
before KVM_CAP_GET_MSR_FEATURES was introduced (I didn't know we
had such cases), this sense.  But I don't think we should
do it for new MSRs.
Robert Hoo Aug. 30, 2018, 4:22 a.m. | #10
On Thu, 2018-08-23 at 14:11 -0300, Eduardo Habkost wrote:
> On Thu, Aug 23, 2018 at 02:28:28PM +0800, Robert Hoo wrote:
> > On Sat, 2018-08-18 at 12:05 -0300, Eduardo Habkost wrote:
> [...]
> > > We don't want QEMU to refuse to run if the kernel doesn't have
> > > KVM_CAP_GET_MSR_FEATURES.  We can treat missing capability as
> > > equivalent to returning an empty list of MSRs.
> > Yes. I'll let caller (kvm_arch_init) ignore the return value but a
> > simple warning.
> 
> Warnings tend to be ignored and are generally a sign that QEMU
> isn't doing the right thing.  Sometimes we have no choice, but I
> don't think that's the case here.
> 
> As far as I can see, we have only two possibilities here:
> 1) The host can run a VM that behaves exactly as requested on the
>    command-line (no warning required).
> 2) The host can't run the requested configuration (fatal error,
>    not a warning).
> 
I mean the kvm_arch_init() --> kvm_get_supported_feature_msrs(), when
kvm_get_supported_feature_msrs() returns error, can we ignore it?
The cases of kvm_get_supported_feature_msrs() returning error:
1) underlying KVM doesn't support GET_MSR_FEATURES capability.
2) error in KVM_GET_MSR_FEATURE_INDEX_LIST.

given the principle you guided before, I think we can ignore above error
cases, and later kvm_arch_get_supported_msr_feature() would always
return 0 (of course, except those special cases like
IA32_ARCH_CAPABILITIES.RSBA)
Eduardo Habkost Aug. 30, 2018, 6:28 p.m. | #11
On Thu, Aug 30, 2018 at 12:22:10PM +0800, Robert Hoo wrote:
> On Thu, 2018-08-23 at 14:11 -0300, Eduardo Habkost wrote:
> > On Thu, Aug 23, 2018 at 02:28:28PM +0800, Robert Hoo wrote:
> > > On Sat, 2018-08-18 at 12:05 -0300, Eduardo Habkost wrote:
> > [...]
> > > > We don't want QEMU to refuse to run if the kernel doesn't have
> > > > KVM_CAP_GET_MSR_FEATURES.  We can treat missing capability as
> > > > equivalent to returning an empty list of MSRs.
> > > Yes. I'll let caller (kvm_arch_init) ignore the return value but a
> > > simple warning.
> > 
> > Warnings tend to be ignored and are generally a sign that QEMU
> > isn't doing the right thing.  Sometimes we have no choice, but I
> > don't think that's the case here.
> > 
> > As far as I can see, we have only two possibilities here:
> > 1) The host can run a VM that behaves exactly as requested on the
> >    command-line (no warning required).
> > 2) The host can't run the requested configuration (fatal error,
> >    not a warning).
> > 
> I mean the kvm_arch_init() --> kvm_get_supported_feature_msrs(), when
> kvm_get_supported_feature_msrs() returns error, can we ignore it?
> The cases of kvm_get_supported_feature_msrs() returning error:
> 1) underlying KVM doesn't support GET_MSR_FEATURES capability.
> 2) error in KVM_GET_MSR_FEATURE_INDEX_LIST.
> 
> given the principle you guided before, I think we can ignore above error
> cases, and later kvm_arch_get_supported_msr_feature() would always
> return 0 (of course, except those special cases like
> IA32_ARCH_CAPABILITIES.RSBA)

Making kvm_arch_get_supported_msr_feature() return 0 if the
capability isn't available (except for RSBA) seems to be enough
to ensure we do the right thing on both cases (1 and 2 above).

Now, if KVM_GET_MSR_FEATURE_INDEX_LIST returns an unexpected
error, it sounds like an exceptional situation that can justify a
fatal error (or at least a warning).

Patch

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 0b64b8e..97d8d9d 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -463,6 +463,8 @@  int kvm_vm_check_extension(KVMState *s, unsigned int extension);
 
 uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
                                       uint32_t index, int reg);
+uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index);
+
 
 void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
 
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 9313602..70d8606 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -107,6 +107,7 @@  static int has_pit_state2;
 static bool has_msr_mcg_ext_ctl;
 
 static struct kvm_cpuid2 *cpuid_cache;
+static struct kvm_msr_list *kvm_feature_msrs;
 
 int kvm_has_pit_state2(void)
 {
@@ -420,6 +421,41 @@  uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
     return ret;
 }
 
+uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
+{
+    struct {
+        struct kvm_msrs info;
+        struct kvm_msr_entry entries[1];
+    } msr_data;
+    uint32_t ret;
+
+    /*Check if the requested feature MSR is supported*/
+    int i;
+    for (i = 0; i < kvm_feature_msrs->nmsrs; i++) {
+        if (index == kvm_feature_msrs->indices[i]) {
+            break;
+        }
+    }
+    if (i >= kvm_feature_msrs->nmsrs) {
+        fprintf(stderr, "Requested MSR (index= %d) is not supported.\n", index);
+        return 0;
+    }
+
+    msr_data.info.nmsrs = 1;
+    msr_data.entries[0].index = index;
+
+    ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data);
+
+    if (ret != 1) {
+        fprintf(stderr, "KVM get MSR (index=0x%x) feature failed, %s\n",
+            index, strerror(-ret));
+        exit(1);
+    }
+
+    return msr_data.entries[0].data;
+}
+
+
 typedef struct HWPoisonPage {
     ram_addr_t ram_addr;
     QLIST_ENTRY(HWPoisonPage) list;
@@ -1238,7 +1274,45 @@  void kvm_arch_do_init_vcpu(X86CPU *cpu)
         env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
     }
 }
+static int kvm_get_supported_feature_msrs(KVMState *s)
+{
+    static int kvm_supported_feature_msrs;
+    int ret = 0;
+
+    if (kvm_supported_feature_msrs == 0) {
+        struct kvm_msr_list msr_list;
+
+        kvm_supported_feature_msrs++;
+
+        msr_list.nmsrs = 0;
+        ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list);
+        if (ret < 0 && ret != -E2BIG) {
+            return ret;
+        }
+
+        assert(msr_list.nmsrs > 0);
+        kvm_feature_msrs = (struct kvm_msr_list *) \
+            g_malloc0(sizeof(msr_list) +
+                     msr_list.nmsrs * sizeof(msr_list.indices[0]));
+        if (kvm_feature_msrs == NULL) {
+            fprintf(stderr, "Failed to allocate space for KVM Feature MSRs"
+                "which has %d MSRs\n", msr_list.nmsrs);
+            return -1;
+        }
+
+        kvm_feature_msrs->nmsrs = msr_list.nmsrs;
+        ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, kvm_feature_msrs);
 
+        if (ret < 0) {  /*ioctl failure*/
+            fprintf(stderr, "Fetch KVM feature MSRs failed: %s\n",
+                strerror(-ret));
+            g_free(kvm_feature_msrs);
+            return ret;
+        }
+    }
+
+    return 0;
+}
 static int kvm_get_supported_msrs(KVMState *s)
 {
     static int kvm_supported_msrs;
@@ -1400,6 +1474,11 @@  int kvm_arch_init(MachineState *ms, KVMState *s)
         return ret;
     }
 
+    ret = kvm_get_supported_feature_msrs(s);
+    if (ret < 0) {
+        return ret;
+    }
+
     uname(&utsname);
     lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;