Patchwork [RFC,3/3] KVM: Push rmap into kvm_arch_memory_slot

login
register
mail settings
Submitter Takuya Yoshikawa
Date Aug. 1, 2012, 9:03 a.m.
Message ID <20120801180328.d5b48ef3.yoshikawa.takuya@oss.ntt.co.jp>
Download mbox | patch
Permalink /patch/174406/
State New
Headers show

Comments

Takuya Yoshikawa - Aug. 1, 2012, 9:03 a.m.
Two reasons:
 - x86 can integrate rmap and rmap_pde and remove heuristics in
   __gfn_to_rmap().
 - Some architectures do not need rmap.

Since rmap is one of the most memory consuming stuff in KVM, ppc'd
better restrict the allocation to Book3S HV.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_host.h |    1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c |    6 ++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |    4 +-
 arch/powerpc/kvm/powerpc.c          |    8 +++++
 arch/x86/include/asm/kvm_host.h     |    2 +-
 arch/x86/kvm/mmu.c                  |    5 +--
 arch/x86/kvm/x86.c                  |   55 ++++++++++++++++++++--------------
 include/linux/kvm_host.h            |    1 -
 virt/kvm/kvm_main.c                 |   11 +------
 9 files changed, 49 insertions(+), 44 deletions(-)
Paul Mackerras - Aug. 2, 2012, 11:59 p.m.
On Wed, Aug 01, 2012 at 06:03:28PM +0900, Takuya Yoshikawa wrote:
> Two reasons:
>  - x86 can integrate rmap and rmap_pde and remove heuristics in
>    __gfn_to_rmap().
>  - Some architectures do not need rmap.
> 
> Since rmap is one of the most memory consuming stuff in KVM, ppc'd
> better restrict the allocation to Book3S HV.
> 
> Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
> Cc: Paul Mackerras <paulus@samba.org>

Looks like a worthwhile change.  I do have some patches in preparation
that will intersect with this, but I can manage that once these
patches go upstream.

Acked-by: Paul Mackerras <paulus@samba.org>
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 572ad01..a29e091 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -221,6 +221,7 @@  struct revmap_entry {
 #define KVMPPC_GOT_PAGE		0x80
 
 struct kvm_arch_memory_slot {
+	unsigned long *rmap;
 };
 
 struct kvm_arch {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 3c635c0..d95d113 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -705,7 +705,7 @@  int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		goto out_unlock;
 	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 
-	rmap = &memslot->rmap[gfn - memslot->base_gfn];
+	rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
 	lock_rmap(rmap);
 
 	/* Check if we might have been invalidated; let the guest retry if so */
@@ -788,7 +788,7 @@  static int kvm_handle_hva_range(struct kvm *kvm,
 		for (; gfn < gfn_end; ++gfn) {
 			gfn_t gfn_offset = gfn - memslot->base_gfn;
 
-			ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
+			ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
 			retval |= ret;
 		}
 	}
@@ -1036,7 +1036,7 @@  long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 	unsigned long *rmapp, *map;
 
 	preempt_disable();
-	rmapp = memslot->rmap;
+	rmapp = memslot->arch.rmap;
 	map = memslot->dirty_bitmap;
 	for (i = 0; i < memslot->npages; ++i) {
 		if (kvm_test_clear_dirty(kvm, rmapp))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5c70d19..56ac1a5 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -84,7 +84,7 @@  static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return;
 
-	rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
 	lock_rmap(rmap);
 
 	head = *rmap & KVMPPC_RMAP_INDEX;
@@ -180,7 +180,7 @@  long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (!slot_is_aligned(memslot, psize))
 		return H_PARAMETER;
 	slot_fn = gfn - memslot->base_gfn;
-	rmap = &memslot->rmap[slot_fn];
+	rmap = &memslot->arch.rmap[slot_fn];
 
 	if (!kvm->arch.using_mmu_notifiers) {
 		physp = kvm->arch.slot_phys[memslot->id];
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 87f4dc8..879b14a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -302,10 +302,18 @@  long kvm_arch_dev_ioctl(struct file *filp,
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
+	if (!dont || free->arch.rmap != dont->arch.rmap) {
+		vfree(free->arch.rmap);
+		free->arch.rmap = NULL;
+	}
 }
 
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
+	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+	if (!slot->arch.rmap)
+		return -ENOMEM;
+
 	return 0;
 }
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48e7131..1309e69 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -504,7 +504,7 @@  struct kvm_lpage_info {
 };
 
 struct kvm_arch_memory_slot {
-	unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
+	unsigned long *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ee768bb..aa9a987 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -970,11 +970,8 @@  static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 {
 	unsigned long idx;
 
-	if (likely(level == PT_PAGE_TABLE_LEVEL))
-		return &slot->rmap[gfn - slot->base_gfn];
-
 	idx = gfn_to_index(gfn, slot->base_gfn, level);
-	return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
+	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 701dbd4..f9fc9b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6299,14 +6299,18 @@  void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 {
 	int i;
 
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
-		if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
-			kvm_kvfree(free->arch.rmap_pde[i]);
-			free->arch.rmap_pde[i] = NULL;
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
+			kvm_kvfree(free->arch.rmap[i]);
+			free->arch.rmap[i] = NULL;
 		}
-		if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
-			kvm_kvfree(free->arch.lpage_info[i]);
-			free->arch.lpage_info[i] = NULL;
+		if (i == 0)
+			continue;
+
+		if (!dont || free->arch.lpage_info[i - 1] !=
+			     dont->arch.lpage_info[i - 1]) {
+			kvm_kvfree(free->arch.lpage_info[i - 1]);
+			free->arch.lpage_info[i - 1] = NULL;
 		}
 	}
 }
@@ -6315,28 +6319,30 @@  int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
 	int i;
 
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		unsigned long ugfn;
 		int lpages;
-		int level = i + 2;
+		int level = i + 1;
 
 		lpages = gfn_to_index(slot->base_gfn + npages - 1,
 				      slot->base_gfn, level) + 1;
 
-		slot->arch.rmap_pde[i] =
-			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
-		if (!slot->arch.rmap_pde[i])
+		slot->arch.rmap[i] =
+			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
+		if (!slot->arch.rmap[i])
 			goto out_free;
+		if (i == 0)
+			continue;
 
-		slot->arch.lpage_info[i] =
-			kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
-		if (!slot->arch.lpage_info[i])
+		slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
+					sizeof(*slot->arch.lpage_info[i - 1]));
+		if (!slot->arch.lpage_info[i - 1])
 			goto out_free;
 
 		if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
-			slot->arch.lpage_info[i][0].write_count = 1;
+			slot->arch.lpage_info[i - 1][0].write_count = 1;
 		if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
-			slot->arch.lpage_info[i][lpages - 1].write_count = 1;
+			slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
 		ugfn = slot->userspace_addr >> PAGE_SHIFT;
 		/*
 		 * If the gfn and userspace address are not aligned wrt each
@@ -6348,18 +6354,21 @@  int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 			unsigned long j;
 
 			for (j = 0; j < lpages; ++j)
-				slot->arch.lpage_info[i][j].write_count = 1;
+				slot->arch.lpage_info[i - 1][j].write_count = 1;
 		}
 	}
 
 	return 0;
 
 out_free:
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
-		kvm_kvfree(slot->arch.rmap_pde[i]);
-		kvm_kvfree(slot->arch.lpage_info[i]);
-		slot->arch.rmap_pde[i] = NULL;
-		slot->arch.lpage_info[i] = NULL;
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		kvm_kvfree(slot->arch.rmap[i]);
+		slot->arch.rmap[i] = NULL;
+		if (i == 0)
+			continue;
+
+		kvm_kvfree(slot->arch.lpage_info[i - 1]);
+		slot->arch.lpage_info[i - 1] = NULL;
 	}
 	return -ENOMEM;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dbc65f9..3c16f0f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -213,7 +213,6 @@  struct kvm_memory_slot {
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long flags;
-	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
 	struct kvm_arch_memory_slot arch;
 	unsigned long userspace_addr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bcf973e..14ec567 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -550,16 +550,12 @@  static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
-	if (!dont || free->rmap != dont->rmap)
-		vfree(free->rmap);
-
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
 	kvm_arch_free_memslot(free, dont);
 
 	free->npages = 0;
-	free->rmap = NULL;
 }
 
 void kvm_free_physmem(struct kvm *kvm)
@@ -768,11 +764,7 @@  int __kvm_set_memory_region(struct kvm *kvm,
 	if (npages && !old.npages) {
 		new.user_alloc = user_alloc;
 		new.userspace_addr = mem->userspace_addr;
-#ifndef CONFIG_S390
-		new.rmap = vzalloc(npages * sizeof(*new.rmap));
-		if (!new.rmap)
-			goto out_free;
-#endif /* not defined CONFIG_S390 */
+
 		if (kvm_arch_create_memslot(&new, npages))
 			goto out_free;
 	}
@@ -831,7 +823,6 @@  int __kvm_set_memory_region(struct kvm *kvm,
 
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (!npages) {
-		new.rmap = NULL;
 		new.dirty_bitmap = NULL;
 		memset(&new.arch, 0, sizeof(new.arch));
 	}