diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 81ff39f..3629d70 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1689,6 +1689,40 @@ where the guest will clear the flag: when the soft lockup watchdog timer resets
 itself or when a soft lockup is detected.  This ioctl can be called any time
 after pausing the vcpu, but before it is resumed.
 
+4.71 KVM_PPC_ALLOCATE_HTAB
+
+Capability: KVM_CAP_PPC_ALLOC_HTAB
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to u32 containing hash table order (in/out)
+Returns: 0 on success, -1 on error
+
+This requests the host kernel to allocate an MMU hash table for a
+guest using the PAPR paravirtualization interface.  This only does
+anything if the kernel is configured to use the Book 3S HV style of
+virtualization.  Otherwise the capability doesn't exist and the ioctl
+returns an ENOTTY error.  The rest of this description assumes Book 3S
+HV.
+
+There must be no vcpus running when this ioctl is called; if there
+are, it will do nothing and return an EBUSY error.
+
+The parameter is a pointer to a 32-bit unsigned integer variable
+containing the order (log base 2) of the desired size of the hash
+table, which must be between 18 and 46.  On successful return from the
+ioctl, it will have been updated with the order of the hash table that
+was allocated.
+
+If no hash table has been allocated when any vcpu is asked to run
+(with the KVM_RUN ioctl), the host kernel will allocate a
+default-sized hash table (16 MB).
+
+If this ioctl is called when a hash table has already been allocated,
+the kernel will clear out the existing hash table (zero all HPTEs) and
+return the hash table order in the parameter.  (If the guest is using
+the virtualized real-mode area (VRMA) facility, the kernel will
+re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.)
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index b0c08b1..0dd1d86 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -36,11 +36,8 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 #define SPAPR_TCE_SHIFT		12
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
-/* For now use fixed-size 16MB page table */
-#define HPT_ORDER	24
-#define HPT_NPTEG	(1ul << (HPT_ORDER - 7))	/* 128B per pteg */
-#define HPT_NPTE	(HPT_NPTEG << 3)		/* 8 PTEs per PTEG */
-#define HPT_HASH_MASK	(HPT_NPTEG - 1)
+#define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
+extern int kvm_hpt_order;		/* order of preallocated HPTs */
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 42a527e..78f2e27 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -237,6 +237,10 @@ struct kvm_arch {
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
 	int using_mmu_notifiers;
+	u32 hpt_order;
+	atomic_t vcpus_running;
+	unsigned long hpt_npte;
+	unsigned long hpt_mask;
 	struct list_head spapr_tce_tables;
 	spinlock_t slot_phys_lock;
 	unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 7f0a3da..fee7150 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -117,7 +117,8 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
 extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
 
-extern long kvmppc_alloc_hpt(struct kvm *kvm);
+extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
+extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
 extern void kvmppc_free_hpt(struct kvm *kvm);
 extern long kvmppc_prepare_vrma(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8e6401f..ac669a1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,56 +37,115 @@
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970	63
 
-long kvmppc_alloc_hpt(struct kvm *kvm)
+/* Power architecture requires HPT is at least 256kB */
+#define PPC_MIN_HPT_ORDER	18
+
+long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 	unsigned long hpt;
-	long lpid;
 	struct revmap_entry *rev;
 	struct kvmppc_linear_info *li;
+	long order = kvm_hpt_order;
 
-	/* Allocate guest's hashed page table */
-	li = kvm_alloc_hpt();
-	if (li) {
-		/* using preallocated memory */
-		hpt = (ulong)li->base_virt;
-		kvm->arch.hpt_li = li;
-	} else {
-		/* using dynamic memory */
+	if (htab_orderp) {
+		order = *htab_orderp;
+		if (order < PPC_MIN_HPT_ORDER)
+			order = PPC_MIN_HPT_ORDER;
+	}
+
+	/*
+	 * If the user wants a different size from default,
+	 * try first to allocate it from the kernel page allocator.
+	 */
+	hpt = 0;
+	if (order != kvm_hpt_order) {
 		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
-				       __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
+				       __GFP_NOWARN, order - PAGE_SHIFT);
+		if (!hpt)
+			--order;
 	}
 
+	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
-		pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
-		return -ENOMEM;
+		li = kvm_alloc_hpt();
+		if (li) {
+			hpt = (ulong)li->base_virt;
+			kvm->arch.hpt_li = li;
+			order = kvm_hpt_order;
+		}
 	}
+
+	/* Lastly try successively smaller sizes from the page allocator */
+	while (!hpt && order > PPC_MIN_HPT_ORDER) {
+		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
+				       __GFP_NOWARN, order - PAGE_SHIFT);
+		if (!hpt)
+			--order;
+	}
+
+	if (!hpt)
+		return -ENOMEM;
+
 	kvm->arch.hpt_virt = hpt;
+	kvm->arch.hpt_order = order;
+	/* HPTEs are 2**4 bytes long */
+	kvm->arch.hpt_npte = 1ul << (order - 4);
+	/* 128 (2**7) bytes in each HPTEG */
+	kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
 
 	/* Allocate reverse map array */
-	rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
+	rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
 	if (!rev) {
 		pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
 		goto out_freehpt;
 	}
 	kvm->arch.revmap = rev;
+	kvm->arch.sdr1 = __pa(hpt) | (order - 18);
 
-	lpid = kvmppc_alloc_lpid();
-	if (lpid < 0)
-		goto out_freeboth;
-
-	kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
-	kvm->arch.lpid = lpid;
+	pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
+		hpt, order, kvm->arch.lpid);
 
-	pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
+	if (htab_orderp)
+		*htab_orderp = order;
 	return 0;
 
- out_freeboth:
-	vfree(rev);
  out_freehpt:
-	free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
+	if (kvm->arch.hpt_li)
+		kvm_release_hpt(kvm->arch.hpt_li);
+	else
+		free_pages(hpt, order - PAGE_SHIFT);
 	return -ENOMEM;
 }
 
+long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+{
+	long err = -EBUSY;
+	long order;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->arch.rma_setup_done) {
+		kvm->arch.rma_setup_done = 0;
+		/* order rma_setup_done vs. vcpus_running */
+		smp_mb();
+		if (atomic_read(&kvm->arch.vcpus_running)) {
+			kvm->arch.rma_setup_done = 1;
+			goto out;
+		}
+	}
+	if (kvm->arch.hpt_virt) {
+		order = kvm->arch.hpt_order;
+		memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+		*htab_orderp = order;
+		err = 0;
+	} else {
+		err = kvmppc_alloc_hpt(kvm, htab_orderp);
+		order = *htab_orderp;
+	}
+ out:
+	mutex_unlock(&kvm->lock);
+	return err;
+}
+
 void kvmppc_free_hpt(struct kvm *kvm)
 {
 	kvmppc_free_lpid(kvm->arch.lpid);
@@ -94,7 +153,8 @@ void kvmppc_free_hpt(struct kvm *kvm)
 	if (kvm->arch.hpt_li)
 		kvm_release_hpt(kvm->arch.hpt_li);
 	else
-		free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
+		free_pages(kvm->arch.hpt_virt,
+			   kvm->arch.hpt_order - PAGE_SHIFT);
 }
 
 /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
@@ -119,6 +179,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 	unsigned long psize;
 	unsigned long hp0, hp1;
 	long ret;
+	struct kvm *kvm = vcpu->kvm;
 
 	psize = 1ul << porder;
 	npages = memslot->npages >> (porder - PAGE_SHIFT);
@@ -127,8 +188,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 	if (npages > 1ul << (40 - porder))
 		npages = 1ul << (40 - porder);
 	/* Can't use more than 1 HPTE per HPTEG */
-	if (npages > HPT_NPTEG)
-		npages = HPT_NPTEG;
+	if (npages > kvm->arch.hpt_mask + 1)
+		npages = kvm->arch.hpt_mask + 1;
 
 	hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
 		HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
@@ -138,7 +199,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 	for (i = 0; i < npages; ++i) {
 		addr = i << porder;
 		/* can't use hpt_hash since va > 64 bits */
-		hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
+		hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
 		/*
 		 * We assume that the hash table is empty and no
 		 * vcpus are using it at this stage.  Since we create
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9079357..f8d477b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -56,7 +56,7 @@
 /* #define EXIT_DEBUG_INT */
 
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
-static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
@@ -1068,11 +1068,15 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		return -EINTR;
 	}
 
-	/* On the first time here, set up VRMA or RMA */
+	atomic_inc(&vcpu->kvm->arch.vcpus_running);
+	/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
+	smp_mb();
+
+	/* On the first time here, set up HTAB and VRMA or RMA */
 	if (!vcpu->kvm->arch.rma_setup_done) {
-		r = kvmppc_hv_setup_rma(vcpu);
+		r = kvmppc_hv_setup_htab_rma(vcpu);
 		if (r)
-			return r;
+			goto out;
 	}
 
 	flush_fp_to_thread(current);
@@ -1090,6 +1094,9 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			kvmppc_core_prepare_to_enter(vcpu);
 		}
 	} while (r == RESUME_GUEST);
+
+ out:
+	atomic_dec(&vcpu->kvm->arch.vcpus_running);
 	return r;
 }
 
@@ -1384,7 +1391,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
 {
 }
 
-static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
@@ -1403,6 +1410,15 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
 	if (kvm->arch.rma_setup_done)
 		goto out;	/* another vcpu beat us to it */
 
+	/* Allocate hashed page table (if not done already) and reset it */
+	if (!kvm->arch.hpt_virt) {
+		err = kvmppc_alloc_hpt(kvm, NULL);
+		if (err) {
+			pr_err("KVM: Couldn't alloc HPT\n");
+			goto out;
+		}
+	}
+
 	/* Look up the memslot for guest physical address 0 */
 	memslot = gfn_to_memslot(kvm, 0);
 
@@ -1514,13 +1530,14 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
 
 int kvmppc_core_init_vm(struct kvm *kvm)
 {
-	long r;
-	unsigned long lpcr;
+	unsigned long lpcr, lpid;
 
-	/* Allocate hashed page table */
-	r = kvmppc_alloc_hpt(kvm);
-	if (r)
-		return r;
+	/* Allocate the guest's logical partition ID */
+
+	lpid = kvmppc_alloc_lpid();
+	if (lpid < 0)
+		return -ENOMEM;
+	kvm->arch.lpid = lpid;
 
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
 
@@ -1530,7 +1547,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
 	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
 		/* PPC970; HID4 is effectively the LPCR */
-		unsigned long lpid = kvm->arch.lpid;
 		kvm->arch.host_lpid = 0;
 		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
 		lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index e1b60f5..99777eb 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -25,6 +25,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type);
 static struct kvmppc_linear_info *kvm_alloc_linear(int type);
 static void kvm_release_linear(struct kvmppc_linear_info *ri);
 
+int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
+
 /*************** RMA *************/
 
 /*
@@ -209,7 +211,7 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri)
 void __init kvm_linear_init(void)
 {
 	/* HPT */
-	kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
+	kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
 
 	/* RMA */
 	/* Only do this on PPC970 in HV mode */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index def880a..4f4f24e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -237,7 +237,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 
 	/* Find and lock the HPTEG slot to use */
  do_insert:
-	if (pte_index >= HPT_NPTE)
+	if (pte_index >= kvm->arch.hpt_npte)
 		return H_PARAMETER;
 	if (likely((flags & H_EXACT) == 0)) {
 		pte_index &= ~7UL;
@@ -352,7 +352,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 	unsigned long v, r, rb;
 	struct revmap_entry *rev;
 
-	if (pte_index >= HPT_NPTE)
+	if (pte_index >= kvm->arch.hpt_npte)
 		return H_PARAMETER;
 	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
@@ -419,7 +419,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 				i = 4;
 				break;
 			}
-			if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
+			if (req != 1 || flags == 3 ||
+			    pte_index >= kvm->arch.hpt_npte) {
 				/* parameter error */
 				args[j] = ((0xa0 | flags) << 56) + pte_index;
 				ret = H_PARAMETER;
@@ -520,7 +521,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	struct revmap_entry *rev;
 	unsigned long v, r, rb, mask, bits;
 
-	if (pte_index >= HPT_NPTE)
+	if (pte_index >= kvm->arch.hpt_npte)
 		return H_PARAMETER;
 
 	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
@@ -582,7 +583,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 	int i, n = 1;
 	struct revmap_entry *rev = NULL;
 
-	if (pte_index >= HPT_NPTE)
+	if (pte_index >= kvm->arch.hpt_npte)
 		return H_PARAMETER;
 	if (flags & H_READ_4) {
 		pte_index &= ~3;
@@ -677,7 +678,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 		somask = (1UL << 28) - 1;
 		vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
 	}
-	hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
+	hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
 	avpn = slb_v & ~(somask >> 16);	/* also includes B */
 	avpn |= (eaddr & somask) >> 16;
 
@@ -722,7 +723,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 		if (val & HPTE_V_SECONDARY)
 			break;
 		val |= HPTE_V_SECONDARY;
-		hash = hash ^ HPT_HASH_MASK;
+		hash = hash ^ kvm->arch.hpt_mask;
 	}
 	return -1;
 }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 58ad860..c9933fe 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -246,6 +246,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 #endif
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	case KVM_CAP_SPAPR_TCE:
+	case KVM_CAP_PPC_ALLOC_HTAB:
 		r = 1;
 		break;
 	case KVM_CAP_PPC_SMT:
@@ -794,6 +795,23 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = -EFAULT;
 		break;
 	}
+
+	case KVM_PPC_ALLOCATE_HTAB: {
+		struct kvm *kvm = filp->private_data;
+		u32 htab_order;
+
+		r = -EFAULT;
+		if (get_user(htab_order, (u32 __user *)argp))
+			break;
+		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+		if (r)
+			break;
+		r = -EFAULT;
+		if (put_user(htab_order, (u32 __user *)argp))
+			break;
+		r = 0;
+		break;
+	}
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 
 	default:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 7a9dd4b..f57cbcb 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -590,6 +590,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_SYNC_REGS 74
 #define KVM_CAP_PCI_2_3 75
 #define KVM_CAP_KVMCLOCK_CTRL 76
+#define KVM_CAP_PPC_ALLOC_HTAB 77
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -789,6 +790,8 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_PCI_2_3 */
 #define KVM_ASSIGN_SET_INTX_MASK  _IOW(KVMIO,  0xa4, \
 				       struct kvm_assigned_pci_dev)
+/* Available with KVM_CAP_PPC_ALLOC_HTAB */
+#define KVM_PPC_ALLOCATE_HTAB	  _IOWR(KVMIO, 0xa5, __u32)
 
 /*
  * ioctls for vcpu fds
