diff mbox series

[v6,10/19] i386: move eVMCS enablement to hyperv_init_vcpu()

Message ID 20210422161130.652779-11-vkuznets@redhat.com
State New
Headers show
Series i386: KVM: expand Hyper-V features early | expand

Commit Message

Vitaly Kuznetsov April 22, 2021, 4:11 p.m. UTC
hyperv_expand_features() will be called before we create vCPU so
evmcs enablement should go away. hyperv_init_vcpu() looks like the
right place.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 target/i386/kvm/kvm.c | 60 ++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 23 deletions(-)

Comments

Eduardo Habkost May 21, 2021, 9:20 p.m. UTC | #1
On Thu, Apr 22, 2021 at 06:11:21PM +0200, Vitaly Kuznetsov wrote:
> hyperv_expand_features() will be called before we create vCPU so
> evmcs enablement should go away. hyperv_init_vcpu() looks like the
> right place.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  target/i386/kvm/kvm.c | 60 ++++++++++++++++++++++++++-----------------
>  1 file changed, 37 insertions(+), 23 deletions(-)
> 
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 6b391db7a030..a2ef2dc154a2 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -962,6 +962,7 @@ static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
>  {
>      struct kvm_cpuid2 *cpuid;
>      int max = 7; /* 0x40000000..0x40000005, 0x4000000A */
> +    int i;
>  
>      /*
>       * When the buffer is too small, KVM_GET_SUPPORTED_HV_CPUID fails with
> @@ -971,6 +972,22 @@ static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
>      while ((cpuid = try_get_hv_cpuid(cs, max)) == NULL) {
>          max++;
>      }
> +
> +    /*
> +     * KVM_GET_SUPPORTED_HV_CPUID does not set EVMCS CPUID bit before
> +     * KVM_CAP_HYPERV_ENLIGHTENED_VMCS is enabled but we want to get the
> +     * information early, just check for the capability and set the bit
> +     * manually.
> +     */

Should we add a comment noting that this hack won't be necessary
if using the system ioctl?  I assume we still want to support
Linux < v5.11 for a while, so simply 


> +    if (kvm_check_extension(cs->kvm_state,
> +                            KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) {
> +        for (i = 0; i < cpuid->nent; i++) {
> +            if (cpuid->entries[i].function == HV_CPUID_ENLIGHTMENT_INFO) {
> +                cpuid->entries[i].eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
> +            }
> +        }
> +    }
> +
>      return cpuid;
>  }
>  
> @@ -1200,24 +1217,6 @@ static int hyperv_expand_features(CPUState *cs)
>      if (!hyperv_enabled(cpu))
>          return 0;
>  
> -    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ||
> -        cpu->hyperv_passthrough) {
> -        uint16_t evmcs_version;
> -
> -        r = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
> -                                (uintptr_t)&evmcs_version);
> -
> -        if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) && r) {
> -            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
> -                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
> -            return -ENOSYS;
> -        }
> -
> -        if (!r) {
> -            cpu->hyperv_nested[0] = evmcs_version;
> -        }
> -    }
> -
>      if (cpu->hyperv_passthrough) {
>          cpu->hyperv_vendor_id[0] =
>              hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EBX);
> @@ -1455,6 +1454,21 @@ static int hyperv_init_vcpu(X86CPU *cpu)
>          }
>      }
>  
> +    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
> +        uint16_t evmcs_version;
> +
> +        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
> +                                  (uintptr_t)&evmcs_version);
> +
> +        if (ret < 0) {
> +            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
> +                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
> +            return ret;
> +        }
> +
> +        cpu->hyperv_nested[0] = evmcs_version;

Wait, won't this break guest ABI?  Do we want to make
HYPERV_FEAT_EVMCS a migration blocker until this is fixed?


> +    }
> +
>      return 0;
>  }
>  
> @@ -1519,6 +1533,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
>      }
>  
>      if (hyperv_enabled(cpu)) {
> +        r = hyperv_init_vcpu(cpu);
> +        if (r) {
> +            return r;
> +        }
> +
>          cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
>          kvm_base = KVM_CPUID_SIGNATURE_NEXT;
>          has_msr_hv_hypercall = true;
> @@ -1868,11 +1887,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
>  
>      kvm_init_msrs(cpu);
>  
> -    r = hyperv_init_vcpu(cpu);
> -    if (r) {
> -        goto fail;
> -    }
> -
>      return 0;

I would move the two hunks above to a separate patch, but not a
big deal.  The guest ABI issue is existing, and the comment
suggestion can be done in a follow up patch, so:

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>

>  
>   fail:
> -- 
> 2.30.2
>
Vitaly Kuznetsov May 24, 2021, noon UTC | #2
Eduardo Habkost <ehabkost@redhat.com> writes:

> On Thu, Apr 22, 2021 at 06:11:21PM +0200, Vitaly Kuznetsov wrote:
>> hyperv_expand_features() will be called before we create vCPU so
>> evmcs enablement should go away. hyperv_init_vcpu() looks like the
>> right place.
>> 
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> ---
>>  target/i386/kvm/kvm.c | 60 ++++++++++++++++++++++++++-----------------
>>  1 file changed, 37 insertions(+), 23 deletions(-)
>> 
>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>> index 6b391db7a030..a2ef2dc154a2 100644
>> --- a/target/i386/kvm/kvm.c
>> +++ b/target/i386/kvm/kvm.c
>> @@ -962,6 +962,7 @@ static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
>>  {
>>      struct kvm_cpuid2 *cpuid;
>>      int max = 7; /* 0x40000000..0x40000005, 0x4000000A */
>> +    int i;
>>  
>>      /*
>>       * When the buffer is too small, KVM_GET_SUPPORTED_HV_CPUID fails with
>> @@ -971,6 +972,22 @@ static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
>>      while ((cpuid = try_get_hv_cpuid(cs, max)) == NULL) {
>>          max++;
>>      }
>> +
>> +    /*
>> +     * KVM_GET_SUPPORTED_HV_CPUID does not set EVMCS CPUID bit before
>> +     * KVM_CAP_HYPERV_ENLIGHTENED_VMCS is enabled but we want to get the
>> +     * information early, just check for the capability and set the bit
>> +     * manually.
>> +     */
>
> Should we add a comment noting that this hack won't be necessary
> if using the system ioctl?  I assume we still want to support
> Linux < v5.11 for a while, so simply 

Not exactly sure what was supposed to be here but yes, the hack is not
needed with system KVM_GET_SUPPORTED_HV_CPUID ioctl but we want to
support older kernels.

>
>
>> +    if (kvm_check_extension(cs->kvm_state,
>> +                            KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) {
>> +        for (i = 0; i < cpuid->nent; i++) {
>> +            if (cpuid->entries[i].function == HV_CPUID_ENLIGHTMENT_INFO) {
>> +                cpuid->entries[i].eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
>> +            }
>> +        }
>> +    }
>> +
>>      return cpuid;
>>  }
>>  
>> @@ -1200,24 +1217,6 @@ static int hyperv_expand_features(CPUState *cs)
>>      if (!hyperv_enabled(cpu))
>>          return 0;
>>  
>> -    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ||
>> -        cpu->hyperv_passthrough) {
>> -        uint16_t evmcs_version;
>> -
>> -        r = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
>> -                                (uintptr_t)&evmcs_version);
>> -
>> -        if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) && r) {
>> -            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
>> -                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
>> -            return -ENOSYS;
>> -        }
>> -
>> -        if (!r) {
>> -            cpu->hyperv_nested[0] = evmcs_version;
>> -        }
>> -    }
>> -
>>      if (cpu->hyperv_passthrough) {
>>          cpu->hyperv_vendor_id[0] =
>>              hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EBX);
>> @@ -1455,6 +1454,21 @@ static int hyperv_init_vcpu(X86CPU *cpu)
>>          }
>>      }
>>  
>> +    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
>> +        uint16_t evmcs_version;
>> +
>> +        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
>> +                                  (uintptr_t)&evmcs_version);
>> +
>> +        if (ret < 0) {
>> +            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
>> +                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
>> +            return ret;
>> +        }
>> +
>> +        cpu->hyperv_nested[0] = evmcs_version;
>
> Wait, won't this break guest ABI?  Do we want to make
> HYPERV_FEAT_EVMCS a migration blocker until this is fixed?
>

Could you please elaborate on the issue? I read the above is: when 
evmcs' feature was requested, make an attempt to enable
KVM_CAP_HYPERV_ENLIGHTENED_VMCS, and bail out if this fails. Propagate
the the acquired evmcs version to 'cpu->hyperv_nested[]' otherwise.

>
>> +    }
>> +
>>      return 0;
>>  }
>>  
>> @@ -1519,6 +1533,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>      }
>>  
>>      if (hyperv_enabled(cpu)) {
>> +        r = hyperv_init_vcpu(cpu);
>> +        if (r) {
>> +            return r;
>> +        }
>> +
>>          cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
>>          kvm_base = KVM_CPUID_SIGNATURE_NEXT;
>>          has_msr_hv_hypercall = true;
>> @@ -1868,11 +1887,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>  
>>      kvm_init_msrs(cpu);
>>  
>> -    r = hyperv_init_vcpu(cpu);
>> -    if (r) {
>> -        goto fail;
>> -    }
>> -
>>      return 0;
>
> I would move the two hunks above to a separate patch, but not a
> big deal.  The guest ABI issue is existing, and the comment
> suggestion can be done in a follow up patch, so:
>
> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
>

Thanks!

>>  
>>   fail:
>> -- 
>> 2.30.2
>>
Eduardo Habkost May 26, 2021, 4:35 p.m. UTC | #3
On Mon, May 24, 2021 at 02:00:37PM +0200, Vitaly Kuznetsov wrote:
[...]
> >> @@ -1455,6 +1454,21 @@ static int hyperv_init_vcpu(X86CPU *cpu)
> >>          }
> >>      }
> >>  
> >> +    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
> >> +        uint16_t evmcs_version;
> >> +
> >> +        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
> >> +                                  (uintptr_t)&evmcs_version);
> >> +
> >> +        if (ret < 0) {
> >> +            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
> >> +                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
> >> +            return ret;
> >> +        }
> >> +
> >> +        cpu->hyperv_nested[0] = evmcs_version;
> >
> > Wait, won't this break guest ABI?  Do we want to make
> > HYPERV_FEAT_EVMCS a migration blocker until this is fixed?
> >
> 
> Could you please elaborate on the issue? I read the above is: when 
> evmcs' feature was requested, make an attempt to enable
> KVM_CAP_HYPERV_ENLIGHTENED_VMCS, and bail out if this fails. Propagate
> the the acquired evmcs version to 'cpu->hyperv_nested[]' otherwise.

This will be visible to the guest at CPUID[0x4000000A].EAX,
correct?  You are initializing CPUID data with a value that
change depending on the host.

What is supposed to happen if live migrating to to a host with a
different evmcs_version?
Vitaly Kuznetsov May 27, 2021, 7:27 a.m. UTC | #4
Eduardo Habkost <ehabkost@redhat.com> writes:

> On Mon, May 24, 2021 at 02:00:37PM +0200, Vitaly Kuznetsov wrote:
> [...]
>> >> @@ -1455,6 +1454,21 @@ static int hyperv_init_vcpu(X86CPU *cpu)
>> >>          }
>> >>      }
>> >>  
>> >> +    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
>> >> +        uint16_t evmcs_version;
>> >> +
>> >> +        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
>> >> +                                  (uintptr_t)&evmcs_version);
>> >> +
>> >> +        if (ret < 0) {
>> >> +            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
>> >> +                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
>> >> +            return ret;
>> >> +        }
>> >> +
>> >> +        cpu->hyperv_nested[0] = evmcs_version;
>> >
>> > Wait, won't this break guest ABI?  Do we want to make
>> > HYPERV_FEAT_EVMCS a migration blocker until this is fixed?
>> >
>> 
>> Could you please elaborate on the issue? I read the above is: when 
>> evmcs' feature was requested, make an attempt to enable
>> KVM_CAP_HYPERV_ENLIGHTENED_VMCS, and bail out if this fails. Propagate
>> the the acquired evmcs version to 'cpu->hyperv_nested[]' otherwise.
>
> This will be visible to the guest at CPUID[0x4000000A].EAX,
> correct?  You are initializing CPUID data with a value that
> change depending on the host.
>
> What is supposed to happen if live migrating to to a host with a
> different evmcs_version?

(Note: 'evmcs_version' here is the 'maximum supported evmcs version',
not 'used evmcs version').

This is a purely theoretical question at this moment as there's only one
existing (and supported) eVMCS version: 1.

In future, when (and if) e.g. EVMCSv2 appears, we'll have to introduce a
different QEMU option for it most likely (or something like
'hv-evmcs=1', 'hv-evmcs=2' ... ) as how else would we prevent migration
to a host which doesn't support certain eVMCS version (e.g. EVMCSv2 ->
EVMCSv1)?

I'd be fine with hardcoding '1' and just checking that the returned
version is >= 1 for now. Migration blocker seems to be an overkill (as
there's no real problem, we're just trying to make the code future
proof).
Eduardo Habkost May 27, 2021, 7:16 p.m. UTC | #5
On Thu, May 27, 2021 at 09:27:01AM +0200, Vitaly Kuznetsov wrote:
> Eduardo Habkost <ehabkost@redhat.com> writes:
> 
> > On Mon, May 24, 2021 at 02:00:37PM +0200, Vitaly Kuznetsov wrote:
> > [...]
> >> >> @@ -1455,6 +1454,21 @@ static int hyperv_init_vcpu(X86CPU *cpu)
> >> >>          }
> >> >>      }
> >> >>  
> >> >> +    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
> >> >> +        uint16_t evmcs_version;
> >> >> +
> >> >> +        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
> >> >> +                                  (uintptr_t)&evmcs_version);
> >> >> +
> >> >> +        if (ret < 0) {
> >> >> +            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
> >> >> +                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
> >> >> +            return ret;
> >> >> +        }
> >> >> +
> >> >> +        cpu->hyperv_nested[0] = evmcs_version;
> >> >
> >> > Wait, won't this break guest ABI?  Do we want to make
> >> > HYPERV_FEAT_EVMCS a migration blocker until this is fixed?
> >> >
> >> 
> >> Could you please elaborate on the issue? I read the above is: when 
> >> evmcs' feature was requested, make an attempt to enable
> >> KVM_CAP_HYPERV_ENLIGHTENED_VMCS, and bail out if this fails. Propagate
> >> the the acquired evmcs version to 'cpu->hyperv_nested[]' otherwise.
> >
> > This will be visible to the guest at CPUID[0x4000000A].EAX,
> > correct?  You are initializing CPUID data with a value that
> > change depending on the host.
> >
> > What is supposed to happen if live migrating to to a host with a
> > different evmcs_version?
> 
> (Note: 'evmcs_version' here is the 'maximum supported evmcs version',
> not 'used evmcs version').
> 
> This is a purely theoretical question at this moment as there's only one
> existing (and supported) eVMCS version: 1.

Good to know.  :)

> 
> In future, when (and if) e.g. EVMCSv2 appears, we'll have to introduce a
> different QEMU option for it most likely (or something like
> 'hv-evmcs=1', 'hv-evmcs=2' ... ) as how else would we prevent migration
> to a host which doesn't support certain eVMCS version (e.g. EVMCSv2 ->
> EVMCSv1)?
> 
> I'd be fine with hardcoding '1' and just checking that the returned
> version is >= 1 for now. Migration blocker seems to be an overkill (as
> there's no real problem, we're just trying to make the code future
> proof). 

Sounds good to me.  I agree a migration blocker is not the right
solution if currently all hosts have evmcs_version==1.
diff mbox series

Patch

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 6b391db7a030..a2ef2dc154a2 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -962,6 +962,7 @@  static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
 {
     struct kvm_cpuid2 *cpuid;
     int max = 7; /* 0x40000000..0x40000005, 0x4000000A */
+    int i;
 
     /*
      * When the buffer is too small, KVM_GET_SUPPORTED_HV_CPUID fails with
@@ -971,6 +972,22 @@  static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
     while ((cpuid = try_get_hv_cpuid(cs, max)) == NULL) {
         max++;
     }
+
+    /*
+     * KVM_GET_SUPPORTED_HV_CPUID does not set EVMCS CPUID bit before
+     * KVM_CAP_HYPERV_ENLIGHTENED_VMCS is enabled but we want to get the
+     * information early, just check for the capability and set the bit
+     * manually.
+     */
+    if (kvm_check_extension(cs->kvm_state,
+                            KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) {
+        for (i = 0; i < cpuid->nent; i++) {
+            if (cpuid->entries[i].function == HV_CPUID_ENLIGHTMENT_INFO) {
+                cpuid->entries[i].eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
+            }
+        }
+    }
+
     return cpuid;
 }
 
@@ -1200,24 +1217,6 @@  static int hyperv_expand_features(CPUState *cs)
     if (!hyperv_enabled(cpu))
         return 0;
 
-    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ||
-        cpu->hyperv_passthrough) {
-        uint16_t evmcs_version;
-
-        r = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
-                                (uintptr_t)&evmcs_version);
-
-        if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) && r) {
-            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
-                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
-            return -ENOSYS;
-        }
-
-        if (!r) {
-            cpu->hyperv_nested[0] = evmcs_version;
-        }
-    }
-
     if (cpu->hyperv_passthrough) {
         cpu->hyperv_vendor_id[0] =
             hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EBX);
@@ -1455,6 +1454,21 @@  static int hyperv_init_vcpu(X86CPU *cpu)
         }
     }
 
+    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
+        uint16_t evmcs_version;
+
+        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
+                                  (uintptr_t)&evmcs_version);
+
+        if (ret < 0) {
+            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
+                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
+            return ret;
+        }
+
+        cpu->hyperv_nested[0] = evmcs_version;
+    }
+
     return 0;
 }
 
@@ -1519,6 +1533,11 @@  int kvm_arch_init_vcpu(CPUState *cs)
     }
 
     if (hyperv_enabled(cpu)) {
+        r = hyperv_init_vcpu(cpu);
+        if (r) {
+            return r;
+        }
+
         cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
         kvm_base = KVM_CPUID_SIGNATURE_NEXT;
         has_msr_hv_hypercall = true;
@@ -1868,11 +1887,6 @@  int kvm_arch_init_vcpu(CPUState *cs)
 
     kvm_init_msrs(cpu);
 
-    r = hyperv_init_vcpu(cpu);
-    if (r) {
-        goto fail;
-    }
-
     return 0;
 
  fail: