Patchwork [RFC,4/4] KVM: Decouple rmap_pde from lpage_info write_count

login
register
mail settings
Submitter Takuya Yoshikawa
Date Jan. 23, 2012, 10:45 a.m.
Message ID <20120123194538.e2e75cd3.yoshikawa.takuya@oss.ntt.co.jp>
Download mbox | patch
Permalink /patch/137342/
State New
Headers show

Comments

Takuya Yoshikawa - Jan. 23, 2012, 10:45 a.m.
Though we have one rmap array for every level, those for large pages,
called rmap_pde, are coupled with write_count information and constitute
lpage_info arrays.

To hide this implementation details, we are now using __gfn_to_rmap()
which includes likely(level == PT_PAGE_TABLE_LEVEL) heuristics;  this
is not good because we know that it always fails for higher levels.

Furthermore, when we traverse rmap arrays to write protect pages during
dirty logging, the current layout reduces the locality of their elements
by placing write_count next to rmap_pde in lpage_info.

This patch mitigates this problem by decoupling rmap_pde from lpage_info
write_count and making the rmap array two dimensional which holds the
old rmap_pde elements in it.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
 arch/ia64/kvm/kvm-ia64.c            |    8 ++++----
 arch/powerpc/kvm/book3s_64_mmu_hv.c |    6 +++---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |    4 ++--
 arch/x86/kvm/mmu.c                  |    9 +++------
 arch/x86/kvm/x86.c                  |    4 ++--
 include/linux/kvm_host.h            |    3 +--
 virt/kvm/kvm_main.c                 |   25 ++++++++++++++++---------
 7 files changed, 31 insertions(+), 28 deletions(-)

Patch

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 8ca7261..b17eaa1 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1376,8 +1376,8 @@  static void kvm_release_vm_pages(struct kvm *kvm)
 	kvm_for_each_memslot(memslot, slots) {
 		base_gfn = memslot->base_gfn;
 		for (j = 0; j < memslot->npages; j++) {
-			if (memslot->rmap[j])
-				put_page((struct page *)memslot->rmap[j]);
+			if (memslot->rmap[0][j])
+				put_page((struct page *)memslot->rmap[0][j]);
 		}
 	}
 }
@@ -1591,12 +1591,12 @@  int kvm_arch_prepare_memory_region(struct kvm *kvm,
 			kvm_set_pmt_entry(kvm, base_gfn + i,
 					pfn << PAGE_SHIFT,
 				_PAGE_AR_RWX | _PAGE_MA_WB);
-			memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
+			memslot->rmap[0][i] = (unsigned long)pfn_to_page(pfn);
 		} else {
 			kvm_set_pmt_entry(kvm, base_gfn + i,
 					GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
 					_PAGE_MA_UC);
-			memslot->rmap[i] = 0;
+			memslot->rmap[0][i] = 0;
 			}
 	}
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 783cd35..81f9036 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -631,7 +631,7 @@  int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		goto out_unlock;
 	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 
-	rmap = &memslot->rmap[gfn - memslot->base_gfn];
+	rmap = &memslot->rmap[0][gfn - memslot->base_gfn];
 	lock_rmap(rmap);
 
 	/* Check if we might have been invalidated; let the guest retry if so */
@@ -693,7 +693,7 @@  static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 
-			ret = handler(kvm, &memslot->rmap[gfn_offset],
+			ret = handler(kvm, &memslot->rmap[0][gfn_offset],
 				      memslot->base_gfn + gfn_offset);
 			retval |= ret;
 		}
@@ -928,7 +928,7 @@  long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 	unsigned long *rmapp, *map;
 
 	preempt_disable();
-	rmapp = memslot->rmap;
+	rmapp = memslot->rmap[0];
 	map = memslot->dirty_bitmap;
 	for (i = 0; i < memslot->npages; ++i) {
 		if (kvm_test_clear_dirty(kvm, rmapp))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5f3c60b..4df9b4a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -103,7 +103,7 @@  static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return;
 
-	rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+	rmap = real_vmalloc_addr(&memslot->rmap[0][gfn - memslot->base_gfn]);
 	lock_rmap(rmap);
 
 	head = *rmap & KVMPPC_RMAP_INDEX;
@@ -199,7 +199,7 @@  long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (!slot_is_aligned(memslot, psize))
 		return H_PARAMETER;
 	slot_fn = gfn - memslot->base_gfn;
-	rmap = &memslot->rmap[slot_fn];
+	rmap = &memslot->rmap[0][slot_fn];
 
 	if (!kvm->arch.using_mmu_notifiers) {
 		physp = kvm->arch.slot_phys[memslot->id];
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 12f5c99..61c66d2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -948,13 +948,10 @@  static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
 static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 				    struct kvm_memory_slot *slot)
 {
-	struct kvm_lpage_info *linfo;
-
-	if (likely(level == PT_PAGE_TABLE_LEVEL))
-		return &slot->rmap[gfn - slot->base_gfn];
+	gfn_t idx;
 
-	linfo = lpage_info_slot(gfn, slot, level);
-	return &linfo->rmap_pde;
+	idx = gfn_to_index(gfn, slot->base_gfn, level);
+	return &slot->rmap[level - PT_PAGE_TABLE_LEVEL][idx];
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9c912f0b..136d965 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6137,7 +6137,7 @@  int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	 *x86 needs to hanlde !user_alloc case.
 	 */
 	if (!user_alloc) {
-		if (npages && !old.rmap) {
+		if (npages && !old.rmap[0]) {
 			unsigned long userspace_addr;
 
 			down_write(&current->mm->mmap_sem);
@@ -6167,7 +6167,7 @@  void kvm_arch_commit_memory_region(struct kvm *kvm,
 
 	int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
 
-	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+	if (!user_alloc && !old.user_alloc && old.rmap[0] && !npages) {
 		int ret;
 
 		down_write(&current->mm->mmap_sem);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 06d4e41..1f9eb1d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -172,7 +172,6 @@  static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
 
 struct kvm_lpage_info {
-	unsigned long rmap_pde;
 	int write_count;
 };
 
@@ -180,10 +179,10 @@  struct kvm_memory_slot {
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long flags;
-	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
 	unsigned long *dirty_bitmap_head;
 	unsigned long nr_dirty_pages;
+	unsigned long *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 	unsigned long userspace_addr;
 	int user_alloc;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4f2574f..27e7a89 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -537,8 +537,12 @@  static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 {
 	int i;
 
-	if (!dont || free->rmap != dont->rmap)
-		vfree(free->rmap);
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		if (!dont || free->rmap[i] != dont->rmap[i]) {
+			vfree(free->rmap[i]);
+			free->rmap[i] = NULL;
+		}
+	}
 
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
@@ -552,7 +556,6 @@  static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 	}
 
 	free->npages = 0;
-	free->rmap = NULL;
 }
 
 void kvm_free_physmem(struct kvm *kvm)
@@ -779,10 +782,9 @@  int __kvm_set_memory_region(struct kvm *kvm,
 
 	/* Allocate if a slot is being created */
 #ifndef CONFIG_S390
-	if (npages && !new.rmap) {
-		new.rmap = vzalloc(npages * sizeof(*new.rmap));
-
-		if (!new.rmap)
+	if (npages && !new.rmap[0]) {
+		new.rmap[0] = vzalloc(npages * sizeof(*new.rmap[0]));
+		if (!new.rmap[0])
 			goto out_free;
 
 		new.user_alloc = user_alloc;
@@ -806,10 +808,13 @@  int __kvm_set_memory_region(struct kvm *kvm,
 		lpages = gfn_to_index(base_gfn + npages - 1, base_gfn, level) + 1;
 
 		new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
-
 		if (!new.lpage_info[i])
 			goto out_free;
 
+		new.rmap[i + 1] = vzalloc(lpages * sizeof(*new.rmap[i + 1]));
+		if (!new.rmap[i + 1])
+			goto out_free;
+
 		if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
 			new.lpage_info[i][0].write_count = 1;
 		if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
@@ -886,7 +891,9 @@  skip_lpage:
 
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (!npages) {
-		new.rmap = NULL;
+		for (i = 0; i < KVM_NR_PAGE_SIZES; ++i)
+			new.rmap[i] = NULL;
+
 		new.dirty_bitmap = NULL;
 		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
 			new.lpage_info[i] = NULL;