Patchwork [RFC,02/10] KVM: PPC: BOOK3S: PR: Emulate virtual timebase register

login
register
mail settings
Submitter Aneesh Kumar K.V
Date Jan. 28, 2014, 4:44 p.m.
Message ID <1390927455-3312-3-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/314756/
State New
Headers show

Comments

Aneesh Kumar K.V - Jan. 28, 2014, 4:44 p.m.
virtual time base register is a per vm register and need to saved
and restored on vm exit and entry. Writing to VTB is not allowed
in the privileged mode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/include/asm/reg.h      |  7 +++++++
 arch/powerpc/include/asm/time.h     | 12 ++++++++++++
 arch/powerpc/kvm/book3s_emulate.c   |  3 +++
 arch/powerpc/kvm/book3s_pr.c        |  3 +++
 5 files changed, 26 insertions(+)
Alexander Graf - Jan. 29, 2014, 4:39 p.m.
On 01/28/2014 05:44 PM, Aneesh Kumar K.V wrote:
> virtual time base register is a per vm register and need to saved
> and restored on vm exit and entry. Writing to VTB is not allowed
> in the privileged mode.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/kvm_host.h |  1 +
>   arch/powerpc/include/asm/reg.h      |  7 +++++++
>   arch/powerpc/include/asm/time.h     | 12 ++++++++++++
>   arch/powerpc/kvm/book3s_emulate.c   |  3 +++
>   arch/powerpc/kvm/book3s_pr.c        |  3 +++
>   5 files changed, 26 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 0a3785271f34..9ebdd12e50a9 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -508,6 +508,7 @@ struct kvm_vcpu_arch {
>   #endif
>   	/* Time base value when we entered the guest */
>   	u64 entry_tb;
> +	u64 entry_vtb;
>   	u32 tcr;
>   	ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
>   	u32 ivor[64];
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index e789f76c9bc2..6c649355b1e9 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -1161,6 +1161,13 @@
>   #define mtspr(rn, v)	asm volatile("mtspr " __stringify(rn) ",%0" : \
>   				     : "r" ((unsigned long)(v)) \
>   				     : "memory")
> +#ifdef CONFIG_PPC_BOOK3S_64
> +#define mfvtb()		({unsigned long rval;				\
> +			asm volatile("mfspr %0, %1" :			\
> +				     "=r" (rval) : "i" (SPRN_VTB)); rval;})
> +#else
> +#define mfvtb() BUG()
> +#endif

static inline mfvtb(unsigned long)
{
#ifdef CONFIG_PPC_BOOK3S_64
     return mfspr(SPRN_VTB);
#else
     BUG();
#endif
}

is a lot easier to read and get right. But reg.h is Ben's call.

Also could you please give me a pointer to the specification for it? I 
tried to look up vtb in the 2.06 ISA and couldn't find it. Is it a CPU 
specific register?

>   
>   #ifdef __powerpc64__
>   #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
> diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
> index c1f267694acb..1e89dbc665d9 100644
> --- a/arch/powerpc/include/asm/time.h
> +++ b/arch/powerpc/include/asm/time.h
> @@ -101,6 +101,18 @@ static inline u64 get_rtc(void)
>   	return (u64)hi * 1000000000 + lo;
>   }
>   
> +#ifdef CONFIG_PPC_BOOK3S_64
> +static inline u64 get_vtb(void)
> +{
> +	return mfvtb();
> +}
> +#else
> +static inline u64 get_vtb(void)
> +{
> +	return 0;
> +}
> +#endif

Just put the #ifdef inside the function body.

> +
>   #ifdef CONFIG_PPC64
>   static inline u64 get_tb(void)
>   {
> diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
> index e1f1e5e16449..4b58d8a90cb5 100644
> --- a/arch/powerpc/kvm/book3s_emulate.c
> +++ b/arch/powerpc/kvm/book3s_emulate.c
> @@ -528,6 +528,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
>   		 */
>   		*spr_val = vcpu->arch.spurr;
>   		break;
> +	case SPRN_VTB:
> +		*spr_val = vcpu->arch.vtb;
> +		break;
>   	case SPRN_GQR0:
>   	case SPRN_GQR1:
>   	case SPRN_GQR2:
> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
> index 02231f5193c2..b5598e9cdd09 100644
> --- a/arch/powerpc/kvm/book3s_pr.c
> +++ b/arch/powerpc/kvm/book3s_pr.c
> @@ -120,6 +120,8 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
>   	 * to find the guest purr and spurr value.
>   	 */
>   	vcpu->arch.entry_tb = get_tb();
> +	vcpu->arch.entry_vtb = get_vtb();
> +
>   }
>   
>   /* Copy data touched by real-mode code from shadow vcpu back to vcpu */
> @@ -171,6 +173,7 @@ out:
>   	 */
>   	vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
>   	vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
> +	vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb;

I thought it's per vm? That would contradict the per-vcpu logic you're 
implementing here. This way vtb scews with world switches on SMP guests.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt - Jan. 29, 2014, 10:54 p.m.
On Wed, 2014-01-29 at 17:39 +0100, Alexander Graf wrote:
> static inline mfvtb(unsigned long)
> {
> #ifdef CONFIG_PPC_BOOK3S_64
>      return mfspr(SPRN_VTB);
> #else
>      BUG();
> #endif
> }
> 
> is a lot easier to read and get right. But reg.h is Ben's call.

Agreed.

> Also could you please give me a pointer to the specification for it? I 
> tried to look up vtb in the 2.06 ISA and couldn't find it. Is it a CPU 
> specific register?


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt - Jan. 30, 2014, 12:35 a.m.
On Thu, 2014-01-30 at 09:54 +1100, Benjamin Herrenschmidt wrote:
> On Wed, 2014-01-29 at 17:39 +0100, Alexander Graf wrote:
> > static inline mfvtb(unsigned long)
> > {
> > #ifdef CONFIG_PPC_BOOK3S_64
> >      return mfspr(SPRN_VTB);
> > #else
> >      BUG();
> > #endif
> > }
> > 
> > is a lot easier to read and get right. But reg.h is Ben's call.
> 
> Agreed.

I mean I agree with Alex, his version is nicer :-)

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras - Jan. 30, 2014, 5:49 a.m.
On Tue, Jan 28, 2014 at 10:14:07PM +0530, Aneesh Kumar K.V wrote:
> virtual time base register is a per vm register and need to saved
> and restored on vm exit and entry. Writing to VTB is not allowed
> in the privileged mode.
...

> +#ifdef CONFIG_PPC_BOOK3S_64
> +#define mfvtb()		({unsigned long rval;				\
> +			asm volatile("mfspr %0, %1" :			\
> +				     "=r" (rval) : "i" (SPRN_VTB)); rval;})

The mfspr will be a no-op on anything before POWER8, meaning the
result will be whatever value was in the destination GPR before the
mfspr.  I suppose that may not matter if the result is only ever used
when we're running on a POWER8 host, but I would feel more comfortable
if we had explicit feature tests to make sure of that, rather than
possibly doing computations with unpredictable values.

With your patch, a guest on a POWER7 or a PPC970 could do a read from
VTB and get garbage -- first, there is nothing to stop userspace from
requesting POWER8 emulation on an older machine, and secondly, even if
the virtual machine is a PPC970 (say) you don't implement
unimplemented SPR semantics for VTB (no-op if PR=0, illegal
instruction interrupt if PR=1).

On the whole I think it is reasonable to reject an attempt to set the
virtual PVR to a POWER8 PVR value if we are not running on a POWER8
host, because emulating all the new POWER8 features in software
(particularly transactional memory) would not be feasible.  Alex may
disagree. :)

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf - Jan. 30, 2014, 10:04 a.m.
> Am 30.01.2014 um 06:49 schrieb Paul Mackerras <paulus@samba.org>:
> 
>> On Tue, Jan 28, 2014 at 10:14:07PM +0530, Aneesh Kumar K.V wrote:
>> virtual time base register is a per vm register and need to saved
>> and restored on vm exit and entry. Writing to VTB is not allowed
>> in the privileged mode.
> ...
> 
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> +#define mfvtb()        ({unsigned long rval;                \
>> +            asm volatile("mfspr %0, %1" :            \
>> +                     "=r" (rval) : "i" (SPRN_VTB)); rval;})
> 
> The mfspr will be a no-op on anything before POWER8, meaning the
> result will be whatever value was in the destination GPR before the
> mfspr.  I suppose that may not matter if the result is only ever used
> when we're running on a POWER8 host, but I would feel more comfortable
> if we had explicit feature tests to make sure of that, rather than
> possibly doing computations with unpredictable values.
> 
> With your patch, a guest on a POWER7 or a PPC970 could do a read from
> VTB and get garbage -- first, there is nothing to stop userspace from
> requesting POWER8 emulation on an older machine, and secondly, even if
> the virtual machine is a PPC970 (say) you don't implement
> unimplemented SPR semantics for VTB (no-op if PR=0, illegal
> instruction interrupt if PR=1).
> 
> On the whole I think it is reasonable to reject an attempt to set the
> virtual PVR to a POWER8 PVR value if we are not running on a POWER8
> host, because emulating all the new POWER8 features in software
> (particularly transactional memory) would not be feasible.  Alex may
> disagree. :)

We don't have a good feature flag indicator that tells kvm what the guest cpu is capable of. So yes, I think it's reasonable to just not expose p8 registers on p8 for now.

In theory it's of course possible to emulate a lot of p8 features on pre-p8 hardware, but I'm not sure it's worth the effort. If anyone wants to spend the time to work on it I'd be happy to tale patches though ;)

Alex

> 
> Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aneesh Kumar K.V - Jan. 31, 2014, 10:57 a.m.
Paul Mackerras <paulus@samba.org> writes:

> On Tue, Jan 28, 2014 at 10:14:07PM +0530, Aneesh Kumar K.V wrote:
>> virtual time base register is a per vm register and need to saved
>> and restored on vm exit and entry. Writing to VTB is not allowed
>> in the privileged mode.
> ...
>
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> +#define mfvtb()		({unsigned long rval;				\
>> +			asm volatile("mfspr %0, %1" :			\
>> +				     "=r" (rval) : "i" (SPRN_VTB)); rval;})
>
> The mfspr will be a no-op on anything before POWER8, meaning the
> result will be whatever value was in the destination GPR before the
> mfspr.  I suppose that may not matter if the result is only ever used
> when we're running on a POWER8 host, but I would feel more comfortable
> if we had explicit feature tests to make sure of that, rather than
> possibly doing computations with unpredictable values.
>
> With your patch, a guest on a POWER7 or a PPC970 could do a read from
> VTB and get garbage -- first, there is nothing to stop userspace from
> requesting POWER8 emulation on an older machine, and secondly, even if
> the virtual machine is a PPC970 (say) you don't implement
> unimplemented SPR semantics for VTB (no-op if PR=0, illegal
> instruction interrupt if PR=1).

Ok that means we need to do something like  ?

	struct cpu_spec *s = find_cpuspec(vcpu->arch.pvr);
        if (s->cpu_features & CPU_FTR_ARCH_207S) {

        }

        
>
> On the whole I think it is reasonable to reject an attempt to set the
> virtual PVR to a POWER8 PVR value if we are not running on a POWER8
> host, because emulating all the new POWER8 features in software
> (particularly transactional memory) would not be feasible.  Alex may
> disagree. :)

That would make it much simpler.

-aneesh

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0a3785271f34..9ebdd12e50a9 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -508,6 +508,7 @@  struct kvm_vcpu_arch {
 #endif
 	/* Time base value when we entered the guest */
 	u64 entry_tb;
+	u64 entry_vtb;
 	u32 tcr;
 	ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
 	u32 ivor[64];
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e789f76c9bc2..6c649355b1e9 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1161,6 +1161,13 @@ 
 #define mtspr(rn, v)	asm volatile("mtspr " __stringify(rn) ",%0" : \
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
+#ifdef CONFIG_PPC_BOOK3S_64
+#define mfvtb()		({unsigned long rval;				\
+			asm volatile("mfspr %0, %1" :			\
+				     "=r" (rval) : "i" (SPRN_VTB)); rval;})
+#else
+#define mfvtb() BUG()
+#endif
 
 #ifdef __powerpc64__
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index c1f267694acb..1e89dbc665d9 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -101,6 +101,18 @@  static inline u64 get_rtc(void)
 	return (u64)hi * 1000000000 + lo;
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline u64 get_vtb(void)
+{
+	return mfvtb();
+}
+#else
+static inline u64 get_vtb(void)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_PPC64
 static inline u64 get_tb(void)
 {
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index e1f1e5e16449..4b58d8a90cb5 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -528,6 +528,9 @@  int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 		 */
 		*spr_val = vcpu->arch.spurr;
 		break;
+	case SPRN_VTB:
+		*spr_val = vcpu->arch.vtb;
+		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
 	case SPRN_GQR2:
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 02231f5193c2..b5598e9cdd09 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -120,6 +120,8 @@  void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 	 * to find the guest purr and spurr value.
 	 */
 	vcpu->arch.entry_tb = get_tb();
+	vcpu->arch.entry_vtb = get_vtb();
+
 }
 
 /* Copy data touched by real-mode code from shadow vcpu back to vcpu */
@@ -171,6 +173,7 @@  out:
 	 */
 	vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
 	vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
+	vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb;
 }
 
 static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)