diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5..f0c51c5 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -18,11 +18,14 @@
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
+/* Use a chunk size of 256kB, since this is the smallest allowed HPT size */
+#define KVM_CHUNK_ORDER		18
+#define KVM_CHUNK_SIZE		(1ul << KVM_CHUNK_ORDER)
+
 #define KVM_LINEAR_RMA		0
 #define KVM_LINEAR_HPT		1
 
-static void __init kvm_linear_init_one(ulong size, int count, int type);
-static struct kvmppc_linear_info *kvm_alloc_linear(int type);
+static struct kvmppc_linear_info *kvm_alloc_linear(int type, int order);
 static void kvm_release_linear(struct kvmppc_linear_info *ri);
 
 int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
@@ -38,7 +41,8 @@ EXPORT_SYMBOL_GPL(kvm_hpt_order);
  * much physically contiguous memory after the system is up and running,
  * we preallocate a set of RMAs in early boot for KVM to use.
  */
-static unsigned long kvm_rma_size = 64 << 20;	/* 64MB */
+#define DEFAULT_RMA_SIZE	(64 << 20)	/* 64MB */
+static unsigned long kvm_rma_size = DEFAULT_RMA_SIZE;
 static unsigned long kvm_rma_count;
 
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
@@ -91,7 +95,7 @@ early_param("kvm_rma_count", early_parse_rma_count);
 
 struct kvmppc_linear_info *kvm_alloc_rma(void)
 {
-	return kvm_alloc_linear(KVM_LINEAR_RMA);
+	return kvm_alloc_linear(KVM_LINEAR_RMA, __ilog2(kvm_rma_size));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 
@@ -124,7 +128,7 @@ early_param("kvm_hpt_count", early_parse_hpt_count);
 
 struct kvmppc_linear_info *kvm_alloc_hpt(void)
 {
-	return kvm_alloc_linear(KVM_LINEAR_HPT);
+	return kvm_alloc_linear(KVM_LINEAR_HPT, kvm_hpt_order);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
@@ -134,73 +138,295 @@ void kvm_release_hpt(struct kvmppc_linear_info *li)
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
-/*************** generic *************/
+/* Bitmap allocator functions */
 
-static LIST_HEAD(free_linears);
-static DEFINE_SPINLOCK(linear_lock);
+/*
+ * Set `nbits' bits in a bitmap starting at bit `start', checking that
+ * the bits are 0 to begin with.  If they aren't, stop and return the
+ * number of bits remaining (not set to 1).
+ */
+static long set_zero_bits(unsigned long *bitmap, unsigned long start,
+			  unsigned long nbits)
+{
+	unsigned long mask;
+	unsigned long offset;
+	unsigned long *p;
+
+	offset = start & (BITS_PER_LONG - 1);
+	mask = ~0UL << offset;
+	nbits += offset;
+	p = bitmap + start / BITS_PER_LONG;
+	while (nbits >= BITS_PER_LONG) {
+		if (*p & mask)
+			return nbits;
+		*p++ |= mask;
+		mask = ~0UL;
+		nbits -= BITS_PER_LONG;
+	}
+	if (nbits) {
+		mask &= ~(~0UL << nbits);
+		if (*p & mask)
+			return nbits;
+		*p |= mask;
+	}
+	return 0;
+}
 
-static void __init kvm_linear_init_one(ulong size, int count, int type)
+/*
+ * Clear `nbits' bits in a bitmap starting at bit `start', checking that
+ * the bits are 1 to begin with.  If they aren't, stop and return the
+ * number of bits remaining (not set to 0).
+ */
+static long clear_one_bits(unsigned long *bitmap, unsigned long start,
+			   unsigned long nbits)
 {
-	unsigned long i;
-	unsigned long j, npages;
-	void *linear;
-	struct page *pg;
-	const char *typestr;
-	struct kvmppc_linear_info *linear_info;
+	unsigned long mask;
+	unsigned long offset;
+	unsigned long *p;
+
+	offset = start & (BITS_PER_LONG - 1);
+	mask = ~0UL << offset;
+	nbits += offset;
+	p = bitmap + start / BITS_PER_LONG;
+	while (nbits >= BITS_PER_LONG) {
+		if (~*p & mask)
+			return nbits;
+		*p++ &= ~mask;
+		mask = ~0UL;
+		nbits -= BITS_PER_LONG;
+	}
+	if (nbits) {
+		mask &= ~(~0UL << nbits);
+		if (~*p & mask)
+			return nbits;
+		*p &= ~mask;
+	}
+	return 0;
+}
 
-	if (!count)
-		return;
+static void bitmap_free(unsigned long *bitmap, unsigned long start,
+			unsigned long n)
+{
+	long nr;
+
+	nr = clear_one_bits(bitmap, start, n);
+	if (nr) {
+		pr_err("KVM: oops, freeing not-in-use linear memory\n");
+		if (nr < n)
+			set_zero_bits(bitmap, start, n - nr);
+	}
+}
+
+static unsigned long lsbs[6] = {
+	~0UL,			/* LSBs of aligned 1-bit fields */
+	~0UL / 3,		/* LSBs of aligned 2-bit fields */
+	~0UL / 0xf,		/* LSBs of aligned 4-bit fields */
+	~0UL / 0xff,		/* LSBs of aligned 8-bit fields */
+	~0UL / 0xffff,		/* LSBs of aligned 16-bit fields */
+	~0UL / 0xffffffff,	/* LSBs of aligned 32-bit fields */
+};
+
+static inline int find_ls_one(unsigned long n)
+{
+	return __ffs(n);
+}
+
+static inline int find_ms_one(unsigned long n)
+{
+	return __fls(n);
+}
 
-	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
-
-	npages = size >> PAGE_SHIFT;
-	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
-	for (i = 0; i < count; ++i) {
-		linear = alloc_bootmem_align(size, size);
-		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
-			 size >> 20);
-		linear_info[i].base_virt = linear;
-		linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
-		linear_info[i].npages = npages;
-		linear_info[i].type = type;
-		list_add_tail(&linear_info[i].list, &free_linears);
-		atomic_set(&linear_info[i].use_count, 0);
-
-		pg = pfn_to_page(linear_info[i].base_pfn);
-		for (j = 0; j < npages; ++j) {
-			atomic_inc(&pg->_count);
-			++pg;
+/*
+ * Allocate a contiguous region of 1 << order bits in a bitmap.
+ * If aligned != 0, require it to be aligned on a multiple of its size.
+ * Returns -1 if no suitable free region could be found.
+ */
+static int bitmap_alloc(unsigned long *bitmap, unsigned long map_length,
+			int order, int aligned)
+{
+	unsigned long n = 1ul << order;
+	int aorder = order;
+	unsigned long map, map2, *p;
+	unsigned long ls, ms;
+	unsigned long i, j, t;
+	unsigned long b, nb, nw, delta;
+
+	if (!order)
+		aligned = 1;
+	else if (!aligned)
+		--aorder;
+
+	/*
+	 * If we are allocating 32 bits or fewer, we can use arithmetic
+	 * tricks to find aligned free regions 2^n bits.
+	 * If we need an aligned allocation, we just look for aligned
+	 * free regions of 2^order bits.
+	 * If we don't need an aligned allocation, we check if each word
+	 * has any aligned free regions of 2^(order - 1) bits, and if it
+	 * does, we find any free regions of n bits by smearing out the
+	 * one (in-use) bits in the word over the following n-1 bits,
+	 * and then looking for any remaining zero bits.
+	 */
+	p = bitmap;
+	if (n < BITS_PER_LONG) {
+		ls = lsbs[aorder];
+		ms = ls << ((1 << aorder) - 1);
+		j = map_length;
+		for (i = 0; i < map_length; i += BITS_PER_LONG) {
+			map = *p++;
+			t = (map - ls) & ~map & ms;
+			if (!t)
+				continue;
+			/* found aligned free space of size 1<<aorder */
+			if (aligned) {
+				j = i + (find_ls_one(t) & ~(n - 1));
+				break;
+			}
+			/* look for unaligned free space */
+			map2 = 0;
+			if (i < map_length - BITS_PER_LONG)
+				map2 = *p;
+			for (b = 1; b < n; b <<= 1) {
+				map |= map >> b;
+				map |= map2 << (BITS_PER_LONG - b);
+				map2 |= map2 >> b;
+			}
+			if (map != ~0UL) {
+				j = i + find_ls_one(~map);
+				break;
+			}
 		}
+	} else if (aligned) {
+		/*
+		 * Aligned allocations of 64 bits or more: we just
+		 * have to find `nw' consecutive 0 words in the bitmap
+		 * starting at multiple of `nw'.
+		 */
+		nw = n / BITS_PER_LONG;
+		j = 0;
+		for (i = 0; i < map_length; i += BITS_PER_LONG) {
+			map = *p++;
+			if (map) {
+				delta = (n - BITS_PER_LONG) & ~i;
+				i += delta;
+				j = i + BITS_PER_LONG;
+				p += delta / BITS_PER_LONG;
+				nw = n / BITS_PER_LONG;
+			} else if (--nw == 0)
+				break;
+		}
+		if (nw)
+			j = map_length;
+	} else {
+		/*
+		 * Unaligned allocations of 64 bits or more: we only
+		 * need to consider groups of consecutive zeros at
+		 * either end of a word, and words that are all zeros.
+		 */
+		nb = 0;
+		j = 0;
+		for (i = 0; i < map_length; i += BITS_PER_LONG) {
+			map = *p++;
+			if (map == 0) {
+				nb += BITS_PER_LONG;
+				if (nb >= n)
+					break;
+				continue;
+			}
+			nb += find_ls_one(map);
+			if (nb >= n)
+				break;
+			j = find_ms_one(map) + 1;
+			nb = BITS_PER_LONG - j;
+			j += i;
+		}
+		if (nb < n)
+			j = map_length;
 	}
+
+	if (j >= map_length)
+		return -1;
+
+	nb = set_zero_bits(bitmap, j, n);
+	if (nb) {
+		pr_err("KVM: Bug in alloc_bitmap\n");
+		if (nb < n)
+			clear_one_bits(bitmap, j, n - nb);
+		return -1;
+	}
+
+	return j;
 }
 
-static struct kvmppc_linear_info *kvm_alloc_linear(int type)
+/*************** generic *************/
+
+static unsigned long *linear_bitmap;
+static unsigned long linear_bitmap_len;
+static char *linear_mem;
+static DEFINE_SPINLOCK(linear_lock);
+
+static struct kvmppc_linear_info *kvm_alloc_linear(int type, int order)
 {
-	struct kvmppc_linear_info *ri, *ret;
+	struct kvmppc_linear_info *ri;
+	int aligned = 1;
+	int index;
+
+	/* Do we have any preallocated memory at all? */
+	if (!linear_bitmap_len)
+		return NULL;
+
+	/* Convert log2(bytes) to log2(chunks) */
+	order -= KVM_CHUNK_ORDER;
+	if (order < 0)
+		order = 0;
+
+	/*
+	 * Assume arch 2.06 means POWER7, which doesn't require
+	 * the HPT to be aligned on a multiple of its size,
+	 * but only on a 256kB boundary.
+	 */
+	if (type == KVM_LINEAR_HPT && cpu_has_feature(CPU_FTR_ARCH_206))
+		aligned = 0;
+
+	ri = kzalloc(sizeof(*ri), GFP_KERNEL);
+	if (!ri)
+		return NULL;
 
-	ret = NULL;
 	spin_lock(&linear_lock);
-	list_for_each_entry(ri, &free_linears, list) {
-		if (ri->type != type)
-			continue;
-
-		list_del(&ri->list);
-		atomic_inc(&ri->use_count);
-		memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
-		ret = ri;
-		break;
-	}
+	index = bitmap_alloc(linear_bitmap, linear_bitmap_len, order, aligned);
 	spin_unlock(&linear_lock);
-	return ret;
+	if (index < 0) {
+		kfree(ri);
+		return NULL;
+	}
+
+	ri->base_virt = linear_mem + index * KVM_CHUNK_SIZE;
+	ri->base_pfn = __pa(ri->base_virt) >> PAGE_SHIFT;
+	ri->npages = (KVM_CHUNK_SIZE >> PAGE_SHIFT) << order;
+	ri->type = type;
+	atomic_set(&ri->use_count, 1);
+
+	memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
+
+	return ri;
 }
 
 static void kvm_release_linear(struct kvmppc_linear_info *ri)
 {
-	if (atomic_dec_and_test(&ri->use_count)) {
-		spin_lock(&linear_lock);
-		list_add_tail(&ri->list, &free_linears);
-		spin_unlock(&linear_lock);
+	unsigned long index, n;
 
+	if (atomic_dec_and_test(&ri->use_count)) {
+		index = ((char *)ri->base_virt - linear_mem) / KVM_CHUNK_SIZE;
+		n = ri->npages / (KVM_CHUNK_SIZE >> PAGE_SHIFT);
+		if (index < linear_bitmap_len &&
+		    index + n < linear_bitmap_len) {
+			spin_lock(&linear_lock);
+			bitmap_free(linear_bitmap, index, n);
+			spin_unlock(&linear_lock);
+		} else {
+			pr_err("KVM: oops, corrupted linear_info %p\n", ri);
+		}
+		kfree(ri);
 	}
 }
 
@@ -211,23 +437,51 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri)
  */
 void __init kvm_linear_init(void)
 {
-	/* HPT */
-	kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
+	unsigned long total, nchunks, align;
+	struct page *pg;
+	unsigned long j, npages;
 
-	/* RMA */
-	/* Only do this on PPC970 in HV mode */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
+	/* only do this if HV KVM is possible on this cpu */
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
 		return;
 
-	if (!kvm_rma_size || !kvm_rma_count)
-		return;
+	/* HPT */
+	total = kvm_hpt_count << kvm_hpt_order;
+
+	/* On PPC970 in HV mode, allow space for RMAs */
+	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+		if (lpcr_rmls(kvm_rma_size) < 0) {
+			pr_err("RMA size of 0x%lx not supported, using 0x%x\n",
+			       kvm_rma_size, DEFAULT_RMA_SIZE);
+			kvm_rma_size = DEFAULT_RMA_SIZE;
+		}
+		total += kvm_rma_count * kvm_rma_size;
+	}
 
-	/* Check that the requested size is one supported in hardware */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+	if (!total)
 		return;
-	}
 
-	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
+	/* round up to multiple of amount represented by a bitmap word (16MB) */
+	total = (total + KVM_CHUNK_SIZE * BITS_PER_LONG - 1) &
+		~(KVM_CHUNK_SIZE * BITS_PER_LONG - 1);
+
+	/* allocate bitmap of in-use chunks */
+	nchunks = total / KVM_CHUNK_SIZE;
+	linear_bitmap = alloc_bootmem(nchunks / BITS_PER_BYTE);
+	memset(linear_bitmap, 0, nchunks / BITS_PER_BYTE);
+	linear_bitmap_len = nchunks;
+
+	/* ask for maximum useful alignment, i.e. max power of 2 <= total */
+	align = 1ul << __ilog2(total);
+
+	linear_mem = alloc_bootmem_align(total, align);
+	pr_info("Allocated KVM memory at %p (%ld MB)\n", linear_mem,
+		total >> 20);
+
+	npages = total >> PAGE_SHIFT;
+	pg = virt_to_page(linear_mem);
+	for (j = 0; j < npages; ++j) {
+		atomic_inc(&pg->_count);
+		++pg;
+	}
 }
