Patchwork [2/4] KVM: PPC: Book3S HV: Add a mechanism for recording modified HPTEs

login
register
mail settings
Submitter Paul Mackerras
Date Nov. 14, 2012, 4:32 a.m.
Message ID <20121114043217.GC13832@drongo>
Download mbox | patch
Permalink /patch/198827/
State New
Headers show

Comments

Paul Mackerras - Nov. 14, 2012, 4:32 a.m.
This uses a bit in our record of the guest view of the HPTE to record
when the HPTE gets modified.  We use a reserved bit for this, and ensure
that this bit is always cleared in HPTE values returned to the guest.
The recording of modified HPTEs is only done if other code indicates
its interest by setting kvm->arch.hpte_mod_interest to a non-zero value.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |    6 ++++++
 arch/powerpc/include/asm/kvm_host.h      |    1 +
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   25 ++++++++++++++++++++++---
 3 files changed, 29 insertions(+), 3 deletions(-)
Alexander Graf - Nov. 19, 2012, 12:36 p.m.
On 14.11.2012, at 05:32, Paul Mackerras wrote:

> This uses a bit in our record of the guest view of the HPTE to record
> when the HPTE gets modified.  We use a reserved bit for this, and ensure
> that this bit is always cleared in HPTE values returned to the guest.
> The recording of modified HPTEs is only done if other code indicates
> its interest by setting kvm->arch.hpte_mod_interest to a non-zero value.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> arch/powerpc/include/asm/kvm_book3s_64.h |    6 ++++++
> arch/powerpc/include/asm/kvm_host.h      |    1 +
> arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   25 ++++++++++++++++++++++---
> 3 files changed, 29 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 1472a5b..4ca4f25 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -50,6 +50,12 @@ extern int kvm_hpt_order;		/* order of preallocated HPTs */
> #define HPTE_V_HVLOCK	0x40UL
> #define HPTE_V_ABSENT	0x20UL
> 
> +/*
> + * We use this bit in the guest_rpte field of the revmap entry
> + * to indicate a modified HPTE.
> + */
> +#define HPTE_GR_MODIFIED	(1ul << 62)
> +
> static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
> {
> 	unsigned long tmp, old;
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 3093896..58c7264 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -248,6 +248,7 @@ struct kvm_arch {
> 	atomic_t vcpus_running;
> 	unsigned long hpt_npte;
> 	unsigned long hpt_mask;
> +	atomic_t hpte_mod_interest;
> 	spinlock_t slot_phys_lock;
> 	unsigned short last_vcpu[NR_CPUS];
> 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 362dffe..726231a 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -66,6 +66,18 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
> }
> EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
> 
> +/*
> + * Note modification of an HPTE; set the HPTE modified bit
> + * if it wasn't modified before and anyone is interested.
> + */
> +static inline void note_hpte_modification(struct kvm *kvm,
> +					  struct revmap_entry *rev)
> +{
> +	if (!(rev->guest_rpte & HPTE_GR_MODIFIED) &&
> +	    atomic_read(&kvm->arch.hpte_mod_interest))

Aren't atomic reads expensive? Doing one on every guest HPTE modification (when nobody is listening) sounds quite excessive. Wouldn't it be cheaper to just always update the bit?

> +		rev->guest_rpte |= HPTE_GR_MODIFIED;
> +}
> +
> /* Remove this HPTE from the chain for a real page */
> static void remove_revmap_chain(struct kvm *kvm, long pte_index,
> 				struct revmap_entry *rev,
> @@ -287,8 +299,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
> 	rev = &kvm->arch.revmap[pte_index];
> 	if (realmode)
> 		rev = real_vmalloc_addr(rev);
> -	if (rev)
> +	if (rev) {
> 		rev->guest_rpte = g_ptel;
> +		note_hpte_modification(kvm, rev);
> +	}
> 
> 	/* Link HPTE into reverse-map chain */
> 	if (pteh & HPTE_V_VALID) {
> @@ -392,7 +406,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
> 		/* Read PTE low word after tlbie to get final R/C values */
> 		remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
> 	}
> -	r = rev->guest_rpte;
> +	r = rev->guest_rpte & ~HPTE_GR_MODIFIED;
> +	note_hpte_modification(kvm, rev);
> 	unlock_hpte(hpte, 0);
> 
> 	vcpu->arch.gpr[4] = v;
> @@ -466,6 +481,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
> 
> 			args[j] = ((0x80 | flags) << 56) + pte_index;
> 			rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
> +			note_hpte_modification(kvm, rev);
> 
> 			if (!(hp[0] & HPTE_V_VALID)) {
> 				/* insert R and C bits from PTE */
> @@ -555,6 +571,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
> 	if (rev) {
> 		r = (rev->guest_rpte & ~mask) | bits;
> 		rev->guest_rpte = r;
> +		note_hpte_modification(kvm, rev);
> 	}
> 	r = (hpte[1] & ~mask) | bits;
> 
> @@ -606,8 +623,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
> 			v &= ~HPTE_V_ABSENT;
> 			v |= HPTE_V_VALID;
> 		}
> -		if (v & HPTE_V_VALID)
> +		if (v & HPTE_V_VALID) {
> 			r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
> +			r &= ~HPTE_GR_MODIFIED;

It probably makes sense to make this a RESERVED mask, so that if we ever want to use another bit for host information, we can just add that to the respective mask. Also, you should probably clear the reserved bits (or bail out when set) on h_enter.


Alex

> +		}
> 		vcpu->arch.gpr[4 + i * 2] = v;
> 		vcpu->arch.gpr[5 + i * 2] = r;
> 	}
> -- 
> 1.7.10.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood - Nov. 19, 2012, 5:20 p.m.
On 11/19/2012 06:36:42 AM, Alexander Graf wrote:
> 
> On 14.11.2012, at 05:32, Paul Mackerras wrote:
> 
> > +/*
> > + * Note modification of an HPTE; set the HPTE modified bit
> > + * if it wasn't modified before and anyone is interested.
> > + */
> > +static inline void note_hpte_modification(struct kvm *kvm,
> > +					  struct revmap_entry *rev)
> > +{
> > +	if (!(rev->guest_rpte & HPTE_GR_MODIFIED) &&
> > +	    atomic_read(&kvm->arch.hpte_mod_interest))
> 
> Aren't atomic reads expensive?

No, it's a simple load instruction (done with inline asm so the  
compiler can't break it up into smaller loads).

-Scott
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf - Nov. 19, 2012, 5:40 p.m.
On 19.11.2012, at 18:20, Scott Wood wrote:

> On 11/19/2012 06:36:42 AM, Alexander Graf wrote:
>> On 14.11.2012, at 05:32, Paul Mackerras wrote:
>> > +/*
>> > + * Note modification of an HPTE; set the HPTE modified bit
>> > + * if it wasn't modified before and anyone is interested.
>> > + */
>> > +static inline void note_hpte_modification(struct kvm *kvm,
>> > +					  struct revmap_entry *rev)
>> > +{
>> > +	if (!(rev->guest_rpte & HPTE_GR_MODIFIED) &&
>> > +	    atomic_read(&kvm->arch.hpte_mod_interest))
>> Aren't atomic reads expensive?
> 
> No, it's a simple load instruction (done with inline asm so the compiler can't break it up into smaller loads).

Oh, you're right. Fun. I would've assumed that some cache line locking needs to be done to guarantee atomicity :). But this of course makes things even easier.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras - Nov. 19, 2012, 11:25 p.m.
On Mon, Nov 19, 2012 at 01:36:42PM +0100, Alexander Graf wrote:
> 
> Aren't atomic reads expensive? Doing one on every guest HPTE modification (when nobody is listening) sounds quite excessive. Wouldn't it be cheaper to just always update the bit?

As Scott said, it's just an ordinary load.  The reason for not setting
the bit always is to make the first pass of reading the HPT quicker.
That first pass sends all HPTEs, and clears the modified bits of any
HPTEs that have it set.  If the modified bit is set, we have to lock
the HPTE in order to clear the modified bit, even if the HPTE is
invalid, whereas we don't have to lock invalid HPTEs if they don't
have the modified bit set.

> It probably makes sense to make this a RESERVED mask, so that if we ever want to use another bit for host information, we can just add that to the respective mask. Also, you should probably clear the reserved bits (or bail out when set) on h_enter.

OK, and yes good point about clearing the reserved bits on H_ENTER.

Regards,
Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 1472a5b..4ca4f25 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -50,6 +50,12 @@  extern int kvm_hpt_order;		/* order of preallocated HPTs */
 #define HPTE_V_HVLOCK	0x40UL
 #define HPTE_V_ABSENT	0x20UL
 
+/*
+ * We use this bit in the guest_rpte field of the revmap entry
+ * to indicate a modified HPTE.
+ */
+#define HPTE_GR_MODIFIED	(1ul << 62)
+
 static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
 {
 	unsigned long tmp, old;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3093896..58c7264 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -248,6 +248,7 @@  struct kvm_arch {
 	atomic_t vcpus_running;
 	unsigned long hpt_npte;
 	unsigned long hpt_mask;
+	atomic_t hpte_mod_interest;
 	spinlock_t slot_phys_lock;
 	unsigned short last_vcpu[NR_CPUS];
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 362dffe..726231a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -66,6 +66,18 @@  void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 }
 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
 
+/*
+ * Note modification of an HPTE; set the HPTE modified bit
+ * if it wasn't modified before and anyone is interested.
+ */
+static inline void note_hpte_modification(struct kvm *kvm,
+					  struct revmap_entry *rev)
+{
+	if (!(rev->guest_rpte & HPTE_GR_MODIFIED) &&
+	    atomic_read(&kvm->arch.hpte_mod_interest))
+		rev->guest_rpte |= HPTE_GR_MODIFIED;
+}
+
 /* Remove this HPTE from the chain for a real page */
 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 				struct revmap_entry *rev,
@@ -287,8 +299,10 @@  long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	rev = &kvm->arch.revmap[pte_index];
 	if (realmode)
 		rev = real_vmalloc_addr(rev);
-	if (rev)
+	if (rev) {
 		rev->guest_rpte = g_ptel;
+		note_hpte_modification(kvm, rev);
+	}
 
 	/* Link HPTE into reverse-map chain */
 	if (pteh & HPTE_V_VALID) {
@@ -392,7 +406,8 @@  long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 		/* Read PTE low word after tlbie to get final R/C values */
 		remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
 	}
-	r = rev->guest_rpte;
+	r = rev->guest_rpte & ~HPTE_GR_MODIFIED;
+	note_hpte_modification(kvm, rev);
 	unlock_hpte(hpte, 0);
 
 	vcpu->arch.gpr[4] = v;
@@ -466,6 +481,7 @@  long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 
 			args[j] = ((0x80 | flags) << 56) + pte_index;
 			rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+			note_hpte_modification(kvm, rev);
 
 			if (!(hp[0] & HPTE_V_VALID)) {
 				/* insert R and C bits from PTE */
@@ -555,6 +571,7 @@  long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (rev) {
 		r = (rev->guest_rpte & ~mask) | bits;
 		rev->guest_rpte = r;
+		note_hpte_modification(kvm, rev);
 	}
 	r = (hpte[1] & ~mask) | bits;
 
@@ -606,8 +623,10 @@  long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 			v &= ~HPTE_V_ABSENT;
 			v |= HPTE_V_VALID;
 		}
-		if (v & HPTE_V_VALID)
+		if (v & HPTE_V_VALID) {
 			r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+			r &= ~HPTE_GR_MODIFIED;
+		}
 		vcpu->arch.gpr[4 + i * 2] = v;
 		vcpu->arch.gpr[5 + i * 2] = r;
 	}