diff mbox

[v4,5/5] x86, kvm: support vcpu preempted check

Message ID 1476872416-42752-6-git-send-email-xinhui.pan@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

xinhui Oct. 19, 2016, 10:20 a.m. UTC
This is to fix some lock holder preemption issues. Some other locks
implementation do a spin loop before acquiring the lock itself.
Currently kernel has an interface of bool vcpu_is_preempted(int cpu). It
takes the cpu as parameter and return true if the cpu is preempted.  Then
kernel can break the spin loops upon on the retval of vcpu_is_preempted.

As kernel has used this interface, So lets support it.

We use one field of struct kvm_steal_time to indicate that if one vcpu
is running or not.

unix benchmark result:
host:  kernel 4.8.1, i5-4570, 4 cpus
guest: kernel 4.8.1, 8 vcpus

	test-case			after-patch	  before-patch
Execl Throughput                       |    18307.9 lps  |    11701.6 lps 
File Copy 1024 bufsize 2000 maxblocks  |  1352407.3 KBps |   790418.9 KBps
File Copy 256 bufsize 500 maxblocks    |   367555.6 KBps |   222867.7 KBps
File Copy 4096 bufsize 8000 maxblocks  |  3675649.7 KBps |  1780614.4 KBps
Pipe Throughput                        | 11872208.7 lps  | 11855628.9 lps 
Pipe-based Context Switching           |  1495126.5 lps  |  1490533.9 lps 
Process Creation                       |    29881.2 lps  |    28572.8 lps 
Shell Scripts (1 concurrent)           |    23224.3 lpm  |    22607.4 lpm 
Shell Scripts (8 concurrent)           |     3531.4 lpm  |     3211.9 lpm 
System Call Overhead                   | 10385653.0 lps  | 10419979.0 lps 

Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
 arch/x86/include/asm/paravirt_types.h |  6 ++++++
 arch/x86/include/asm/spinlock.h       |  8 ++++++++
 arch/x86/include/uapi/asm/kvm_para.h  |  3 ++-
 arch/x86/kernel/kvm.c                 | 11 +++++++++++
 arch/x86/kernel/paravirt.c            | 11 +++++++++++
 arch/x86/kvm/x86.c                    | 12 ++++++++++++
 6 files changed, 50 insertions(+), 1 deletion(-)

Comments

Radim Krčmář Oct. 19, 2016, 5:24 p.m. UTC | #1
2016-10-19 06:20-0400, Pan Xinhui:
> This is to fix some lock holder preemption issues. Some other locks
> implementation do a spin loop before acquiring the lock itself.
> Currently kernel has an interface of bool vcpu_is_preempted(int cpu). It
> takes the cpu as parameter and return true if the cpu is preempted.  Then
> kernel can break the spin loops upon on the retval of vcpu_is_preempted.
> 
> As kernel has used this interface, So lets support it.
> 
> We use one field of struct kvm_steal_time to indicate that if one vcpu
> is running or not.
> 
> unix benchmark result:
> host:  kernel 4.8.1, i5-4570, 4 cpus
> guest: kernel 4.8.1, 8 vcpus
> 
> 	test-case			after-patch	  before-patch
> Execl Throughput                       |    18307.9 lps  |    11701.6 lps 
> File Copy 1024 bufsize 2000 maxblocks  |  1352407.3 KBps |   790418.9 KBps
> File Copy 256 bufsize 500 maxblocks    |   367555.6 KBps |   222867.7 KBps
> File Copy 4096 bufsize 8000 maxblocks  |  3675649.7 KBps |  1780614.4 KBps
> Pipe Throughput                        | 11872208.7 lps  | 11855628.9 lps 
> Pipe-based Context Switching           |  1495126.5 lps  |  1490533.9 lps 
> Process Creation                       |    29881.2 lps  |    28572.8 lps 
> Shell Scripts (1 concurrent)           |    23224.3 lpm  |    22607.4 lpm 
> Shell Scripts (8 concurrent)           |     3531.4 lpm  |     3211.9 lpm 
> System Call Overhead                   | 10385653.0 lps  | 10419979.0 lps 
> 
> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
> ---
> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> @@ -98,6 +98,10 @@ struct pv_time_ops {
>  	unsigned long long (*steal_clock)(int cpu);
>  };
>  
> +struct pv_vcpu_ops {
> +	bool (*vcpu_is_preempted)(int cpu);
> +};
> +

(I would put it into pv_lock_ops to save the plumbing.)

> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> @@ -45,7 +45,8 @@ struct kvm_steal_time {
>  	__u64 steal;
>  	__u32 version;
>  	__u32 flags;
> -	__u32 pad[12];
> +	__u32 preempted;

Why __u32 instead of __u8?

> +	__u32 pad[11];
>  };

Please document the change in Documentation/virtual/kvm/msr.txt, section
MSR_KVM_STEAL_TIME.

> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> @@ -415,6 +415,15 @@ void kvm_disable_steal_time(void)
> +static bool kvm_vcpu_is_preempted(int cpu)
> +{
> +	struct kvm_steal_time *src;
> +
> +	src = &per_cpu(steal_time, cpu);
> +
> +	return !!src->preempted;
> +}
> +
>  #ifdef CONFIG_SMP
>  static void __init kvm_smp_prepare_boot_cpu(void)
>  {
> @@ -488,6 +497,8 @@ void __init kvm_guest_init(void)
>  	kvm_guest_cpu_init();
>  #endif
>  
> +	pv_vcpu_ops.vcpu_is_preempted = kvm_vcpu_is_preempted;

Would be nicer to assign conditionally in the KVM_FEATURE_STEAL_TIME
block.  The steal_time structure has to be zeroed, so this code would
work, but the native function (return false) is better if we know that
the kvm_vcpu_is_preempted() would always return false anway.

Old KVMs won't have the feature, so we could also assign only when KVM
reports it, but that requires extra definitions and the performance gain
is fairly small, so I'm ok with this.

> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> @@ -2057,6 +2057,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
>  		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
>  		return;
>  
> +	vcpu->arch.st.steal.preempted = 0;
> +
>  	if (vcpu->arch.st.steal.version & 1)
>  		vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
>  
> @@ -2812,6 +2814,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  
>  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>  {
> +	if (vcpu->arch.st.msr_val & KVM_MSR_ENABLED)
> +		if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
> +					&vcpu->arch.st.steal,
> +					sizeof(struct kvm_steal_time)) == 0) {
> +			vcpu->arch.st.steal.preempted = 1;
> +			kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
> +					&vcpu->arch.st.steal,
> +					sizeof(struct kvm_steal_time));
> +		}

Please name this block of code.  Something like
  kvm_steal_time_set_preempted(vcpu);

Thanks.
xinhui Oct. 19, 2016, 6:45 p.m. UTC | #2
在 2016/10/20 01:24, Radim Krčmář 写道:
> 2016-10-19 06:20-0400, Pan Xinhui:
>> This is to fix some lock holder preemption issues. Some other locks
>> implementation do a spin loop before acquiring the lock itself.
>> Currently kernel has an interface of bool vcpu_is_preempted(int cpu). It
>> takes the cpu as parameter and return true if the cpu is preempted.  Then
>> kernel can break the spin loops upon on the retval of vcpu_is_preempted.
>>
>> As kernel has used this interface, So lets support it.
>>
>> We use one field of struct kvm_steal_time to indicate that if one vcpu
>> is running or not.
>>
>> unix benchmark result:
>> host:  kernel 4.8.1, i5-4570, 4 cpus
>> guest: kernel 4.8.1, 8 vcpus
>>
>> 	test-case			after-patch	  before-patch
>> Execl Throughput                       |    18307.9 lps  |    11701.6 lps
>> File Copy 1024 bufsize 2000 maxblocks  |  1352407.3 KBps |   790418.9 KBps
>> File Copy 256 bufsize 500 maxblocks    |   367555.6 KBps |   222867.7 KBps
>> File Copy 4096 bufsize 8000 maxblocks  |  3675649.7 KBps |  1780614.4 KBps
>> Pipe Throughput                        | 11872208.7 lps  | 11855628.9 lps
>> Pipe-based Context Switching           |  1495126.5 lps  |  1490533.9 lps
>> Process Creation                       |    29881.2 lps  |    28572.8 lps
>> Shell Scripts (1 concurrent)           |    23224.3 lpm  |    22607.4 lpm
>> Shell Scripts (8 concurrent)           |     3531.4 lpm  |     3211.9 lpm
>> System Call Overhead                   | 10385653.0 lps  | 10419979.0 lps
>>
>> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
>> ---
>> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
>> @@ -98,6 +98,10 @@ struct pv_time_ops {
>>  	unsigned long long (*steal_clock)(int cpu);
>>  };
>>
>> +struct pv_vcpu_ops {
>> +	bool (*vcpu_is_preempted)(int cpu);
>> +};
>> +
>
> (I would put it into pv_lock_ops to save the plumbing.)
>
hi, Radim
	thanks for your reply.

yes, a new struct leads patch into unnecessary lines changed.
I do that just because I am not sure which existing xxx_ops I should place the vcpu_is_preempted in.

>> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
>> @@ -45,7 +45,8 @@ struct kvm_steal_time {
>>  	__u64 steal;
>>  	__u32 version;
>>  	__u32 flags;
>> -	__u32 pad[12];
>> +	__u32 preempted;
>
> Why __u32 instead of __u8?
>
I thought it is 32-bits aligned...
yes, u8 is good to store the preempt status.

>> +	__u32 pad[11];
>>  };
>
> Please document the change in Documentation/virtual/kvm/msr.txt, section
> MSR_KVM_STEAL_TIME.
>
okay, I totally forgot to do that. thanks!

>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>> @@ -415,6 +415,15 @@ void kvm_disable_steal_time(void)
>> +static bool kvm_vcpu_is_preempted(int cpu)
>> +{
>> +	struct kvm_steal_time *src;
>> +
>> +	src = &per_cpu(steal_time, cpu);
>> +
>> +	return !!src->preempted;
>> +}
>> +
>>  #ifdef CONFIG_SMP
>>  static void __init kvm_smp_prepare_boot_cpu(void)
>>  {
>> @@ -488,6 +497,8 @@ void __init kvm_guest_init(void)
>>  	kvm_guest_cpu_init();
>>  #endif
>>
>> +	pv_vcpu_ops.vcpu_is_preempted = kvm_vcpu_is_preempted;
>
> Would be nicer to assign conditionally in the KVM_FEATURE_STEAL_TIME
> block.  The steal_time structure has to be zeroed, so this code would
> work, but the native function (return false) is better if we know that
> the kvm_vcpu_is_preempted() would always return false anway.
>
yes, agree. Will do that.

I once thought we can patch the code runtime.
we replace binary code
"call 0xXXXXXXXX #pv_vcpu_ops.vcpu_is_preempted"
with
"xor eax, eax"
however it is not worth doing that. the performace improvements might be very small.

> Old KVMs won't have the feature, so we could also assign only when KVM
> reports it, but that requires extra definitions and the performance gain
> is fairly small, so I'm ok with this.
>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> @@ -2057,6 +2057,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
>>  		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
>>  		return;
>>
>> +	vcpu->arch.st.steal.preempted = 0;
>> +
>>  	if (vcpu->arch.st.steal.version & 1)
>>  		vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
>>
>> @@ -2812,6 +2814,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>
>>  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>>  {
>> +	if (vcpu->arch.st.msr_val & KVM_MSR_ENABLED)
>> +		if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>> +					&vcpu->arch.st.steal,
>> +					sizeof(struct kvm_steal_time)) == 0) {
>> +			vcpu->arch.st.steal.preempted = 1;
>> +			kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>> +					&vcpu->arch.st.steal,
>> +					sizeof(struct kvm_steal_time));
>> +		}
>
> Please name this block of code.  Something like
>   kvm_steal_time_set_preempted(vcpu);
>
yep, my code style is ugly.
will do that.

thanks
xinhui


> Thanks.
>
Paolo Bonzini Oct. 24, 2016, 2:39 p.m. UTC | #3
On 19/10/2016 19:24, Radim Krčmář wrote:
>> > +	if (vcpu->arch.st.msr_val & KVM_MSR_ENABLED)
>> > +		if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>> > +					&vcpu->arch.st.steal,
>> > +					sizeof(struct kvm_steal_time)) == 0) {
>> > +			vcpu->arch.st.steal.preempted = 1;
>> > +			kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>> > +					&vcpu->arch.st.steal,
>> > +					sizeof(struct kvm_steal_time));
>> > +		}
> Please name this block of code.  Something like
>   kvm_steal_time_set_preempted(vcpu);

While at it:

1) the kvm_read_guest_cached is not necessary.  You can rig the call to
kvm_write_guest_cached so that it only writes vcpu->arch.st.steal.preempted.

2) split the patch in host and guest sides.

Paolo
Radim Krčmář Oct. 24, 2016, 3:14 p.m. UTC | #4
2016-10-24 16:39+0200, Paolo Bonzini:
> On 19/10/2016 19:24, Radim Krčmář wrote:
>>> > +	if (vcpu->arch.st.msr_val & KVM_MSR_ENABLED)
>>> > +		if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>>> > +					&vcpu->arch.st.steal,
>>> > +					sizeof(struct kvm_steal_time)) == 0) {
>>> > +			vcpu->arch.st.steal.preempted = 1;
>>> > +			kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>>> > +					&vcpu->arch.st.steal,
>>> > +					sizeof(struct kvm_steal_time));
>>> > +		}
>> Please name this block of code.  Something like
>>   kvm_steal_time_set_preempted(vcpu);
> 
> While at it:
> 
> 1) the kvm_read_guest_cached is not necessary.  You can rig the call to
> kvm_write_guest_cached so that it only writes vcpu->arch.st.steal.preempted.

I agree.  kvm_write_guest_cached() always writes from offset 0, so we'd
want a new function that allows to specify a starting offset.

Using cached vcpu->arch.st.steal to avoid the read wouldn't be as good.
Paolo Bonzini Oct. 24, 2016, 3:18 p.m. UTC | #5
On 24/10/2016 17:14, Radim Krčmář wrote:
> 2016-10-24 16:39+0200, Paolo Bonzini:
>> On 19/10/2016 19:24, Radim Krčmář wrote:
>>>>> +	if (vcpu->arch.st.msr_val & KVM_MSR_ENABLED)
>>>>> +		if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>>>>> +					&vcpu->arch.st.steal,
>>>>> +					sizeof(struct kvm_steal_time)) == 0) {
>>>>> +			vcpu->arch.st.steal.preempted = 1;
>>>>> +			kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>>>>> +					&vcpu->arch.st.steal,
>>>>> +					sizeof(struct kvm_steal_time));
>>>>> +		}
>>> Please name this block of code.  Something like
>>>   kvm_steal_time_set_preempted(vcpu);
>>
>> While at it:
>>
>> 1) the kvm_read_guest_cached is not necessary.  You can rig the call to
>> kvm_write_guest_cached so that it only writes vcpu->arch.st.steal.preempted.
> 
> I agree.  kvm_write_guest_cached() always writes from offset 0, so we'd
> want a new function that allows to specify a starting offset.

Yeah, let's leave it for a follow-up then!

Thanks,

Paolo

> Using cached vcpu->arch.st.steal to avoid the read wouldn't be as good.
>
xinhui Oct. 25, 2016, 1:25 a.m. UTC | #6
在 2016/10/24 23:18, Paolo Bonzini 写道:
>
>
> On 24/10/2016 17:14, Radim Krčmář wrote:
>> 2016-10-24 16:39+0200, Paolo Bonzini:
>>> On 19/10/2016 19:24, Radim Krčmář wrote:
>>>>>> +	if (vcpu->arch.st.msr_val & KVM_MSR_ENABLED)
>>>>>> +		if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>>>>>> +					&vcpu->arch.st.steal,
>>>>>> +					sizeof(struct kvm_steal_time)) == 0) {
>>>>>> +			vcpu->arch.st.steal.preempted = 1;
>>>>>> +			kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
>>>>>> +					&vcpu->arch.st.steal,
>>>>>> +					sizeof(struct kvm_steal_time));
>>>>>> +		}
>>>> Please name this block of code.  Something like
>>>>   kvm_steal_time_set_preempted(vcpu);
>>>
>>> While at it:
>>>
>>> 1) the kvm_read_guest_cached is not necessary.  You can rig the call to
>>> kvm_write_guest_cached so that it only writes vcpu->arch.st.steal.preempted.
>>
>> I agree.  kvm_write_guest_cached() always writes from offset 0, so we'd
>> want a new function that allows to specify a starting offset.
>
> Yeah, let's leave it for a follow-up then!
>
I think I can make a having-offset version. :)

> Thanks,
>
> Paolo
>
>> Using cached vcpu->arch.st.steal to avoid the read wouldn't be as good.
>>
>
diff mbox

Patch

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0f400c0..b1c7937 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -98,6 +98,10 @@  struct pv_time_ops {
 	unsigned long long (*steal_clock)(int cpu);
 };
 
+struct pv_vcpu_ops {
+	bool (*vcpu_is_preempted)(int cpu);
+};
+
 struct pv_cpu_ops {
 	/* hooks for various privileged instructions */
 	unsigned long (*get_debugreg)(int regno);
@@ -318,6 +322,7 @@  struct pv_lock_ops {
 struct paravirt_patch_template {
 	struct pv_init_ops pv_init_ops;
 	struct pv_time_ops pv_time_ops;
+	struct pv_vcpu_ops pv_vcpu_ops;
 	struct pv_cpu_ops pv_cpu_ops;
 	struct pv_irq_ops pv_irq_ops;
 	struct pv_mmu_ops pv_mmu_ops;
@@ -327,6 +332,7 @@  struct paravirt_patch_template {
 extern struct pv_info pv_info;
 extern struct pv_init_ops pv_init_ops;
 extern struct pv_time_ops pv_time_ops;
+extern struct pv_vcpu_ops pv_vcpu_ops;
 extern struct pv_cpu_ops pv_cpu_ops;
 extern struct pv_irq_ops pv_irq_ops;
 extern struct pv_mmu_ops pv_mmu_ops;
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 921bea7..52fd942 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -26,6 +26,14 @@ 
 extern struct static_key paravirt_ticketlocks_enabled;
 static __always_inline bool static_key_false(struct static_key *key);
 
+#ifdef CONFIG_PARAVIRT
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+	return pv_vcpu_ops.vcpu_is_preempted(cpu);
+}
+#endif
+
 #include <asm/qspinlock.h>
 
 /*
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 94dc8ca..e9c12a1 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -45,7 +45,8 @@  struct kvm_steal_time {
 	__u64 steal;
 	__u32 version;
 	__u32 flags;
-	__u32 pad[12];
+	__u32 preempted;
+	__u32 pad[11];
 };
 
 #define KVM_STEAL_ALIGNMENT_BITS 5
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index edbbfc8..0011bef 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -415,6 +415,15 @@  void kvm_disable_steal_time(void)
 	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
 }
 
+static bool kvm_vcpu_is_preempted(int cpu)
+{
+	struct kvm_steal_time *src;
+
+	src = &per_cpu(steal_time, cpu);
+
+	return !!src->preempted;
+}
+
 #ifdef CONFIG_SMP
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
@@ -488,6 +497,8 @@  void __init kvm_guest_init(void)
 	kvm_guest_cpu_init();
 #endif
 
+	pv_vcpu_ops.vcpu_is_preempted = kvm_vcpu_is_preempted;
+
 	/*
 	 * Hard lockup detection is enabled by default. Disable it, as guests
 	 * can get false positives too easily, for example if the host is
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bbf3d59..7adb7e9 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -122,6 +122,7 @@  static void *get_call_destination(u8 type)
 	struct paravirt_patch_template tmpl = {
 		.pv_init_ops = pv_init_ops,
 		.pv_time_ops = pv_time_ops,
+		.pv_vcpu_ops = pv_vcpu_ops,
 		.pv_cpu_ops = pv_cpu_ops,
 		.pv_irq_ops = pv_irq_ops,
 		.pv_mmu_ops = pv_mmu_ops,
@@ -203,6 +204,11 @@  static u64 native_steal_clock(int cpu)
 	return 0;
 }
 
+static bool native_vcpu_is_preempted(int cpu)
+{
+	return 0;
+}
+
 /* These are in entry.S */
 extern void native_iret(void);
 extern void native_usergs_sysret64(void);
@@ -312,6 +318,10 @@  struct pv_time_ops pv_time_ops = {
 	.steal_clock = native_steal_clock,
 };
 
+struct pv_vcpu_ops pv_vcpu_ops = {
+	.vcpu_is_preempted = native_vcpu_is_preempted,
+};
+
 __visible struct pv_irq_ops pv_irq_ops = {
 	.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
 	.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
@@ -458,6 +468,7 @@  struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 };
 
 EXPORT_SYMBOL_GPL(pv_time_ops);
+EXPORT_SYMBOL    (pv_vcpu_ops);
 EXPORT_SYMBOL    (pv_cpu_ops);
 EXPORT_SYMBOL    (pv_mmu_ops);
 EXPORT_SYMBOL_GPL(pv_info);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c633de..0ffc5aa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2057,6 +2057,8 @@  static void record_steal_time(struct kvm_vcpu *vcpu)
 		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
 		return;
 
+	vcpu->arch.st.steal.preempted = 0;
+
 	if (vcpu->arch.st.steal.version & 1)
 		vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
 
@@ -2812,6 +2814,16 @@  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	if (vcpu->arch.st.msr_val & KVM_MSR_ENABLED)
+		if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+					&vcpu->arch.st.steal,
+					sizeof(struct kvm_steal_time)) == 0) {
+			vcpu->arch.st.steal.preempted = 1;
+			kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+					&vcpu->arch.st.steal,
+					sizeof(struct kvm_steal_time));
+		}
+
 	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
 	vcpu->arch.last_host_tsc = rdtsc();