Patchwork [-V3,3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation

login
register
mail settings
Submitter Aneesh Kumar K.V
Date July 2, 2013, 5:45 a.m.
Message ID <1372743918-12293-3-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/256275/
State Not Applicable
Headers show

Comments

Aneesh Kumar K.V - July 2, 2013, 5:45 a.m.
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Older version of power architecture use Real Mode Offset register and Real Mode Limit
Selector for mapping guest Real Mode Area. The guest RMA should be physically
contigous since we use the range when address translation is not enabled.

This patch switch RMA allocation code to use contigous memory allocator. The patch
also remove the the linear allocator which not used any more

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
 arch/powerpc/include/asm/kvm_host.h      |  12 +--
 arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
 arch/powerpc/kernel/setup_64.c           |   2 -
 arch/powerpc/kvm/book3s_hv.c             |  27 +++--
 arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
 6 files changed, 65 insertions(+), 152 deletions(-)
Alexander Graf - July 2, 2013, 3:17 p.m.
On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Older version of power architecture use Real Mode Offset register and Real Mode Limit
> Selector for mapping guest Real Mode Area. The guest RMA should be physically
> contigous since we use the range when address translation is not enabled.
>
> This patch switch RMA allocation code to use contigous memory allocator. The patch
> also remove the the linear allocator which not used any more
>
> Acked-by: Paul Mackerras<paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
>   arch/powerpc/include/asm/kvm_host.h      |  12 +--
>   arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
>   arch/powerpc/kernel/setup_64.c           |   2 -
>   arch/powerpc/kvm/book3s_hv.c             |  27 +++--
>   arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
>   6 files changed, 65 insertions(+), 152 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index f8355a9..76ff0b5 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
>
>   #ifdef CONFIG_KVM_BOOK3S_64_HV
>   #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
> +extern unsigned long kvm_rma_pages;
>   #endif
>
>   #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 0097dab..3328353 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
>   	struct page *pages[0];
>   };
>
> -struct kvmppc_linear_info {
> -	void		*base_virt;
> -	unsigned long	 base_pfn;
> -	unsigned long	 npages;
> -	struct list_head list;
> -	atomic_t	 use_count;
> -	int		 type;
> +struct kvm_rma_info {
> +	atomic_t use_count;
> +	unsigned long base_pfn;
>   };
>
>   /* XICS components, defined in book3s_xics.c */
> @@ -246,7 +242,7 @@ struct kvm_arch {
>   	int tlbie_lock;
>   	unsigned long lpcr;
>   	unsigned long rmor;
> -	struct kvmppc_linear_info *rma;
> +	struct kvm_rma_info *rma;
>   	unsigned long vrma_slb_v;
>   	int rma_setup_done;
>   	int using_mmu_notifiers;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index b5ef7a3..5a26bfc 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
>   			     unsigned long ioba, unsigned long tce);
>   extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
>   				struct kvm_allocate_rma *rma);
> -extern struct kvmppc_linear_info *kvm_alloc_rma(void);
> -extern void kvm_release_rma(struct kvmppc_linear_info *ri);
> +extern struct kvm_rma_info *kvm_alloc_rma(void);
> +extern void kvm_release_rma(struct kvm_rma_info *ri);
>   extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
>   extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
>   extern int kvmppc_core_init_vm(struct kvm *kvm);
> @@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
>   }
>
>   extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
> -extern void kvm_linear_init(void);
>
>   #else
>   static inline void __init kvm_cma_reserve(void)
> @@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void)
>   static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
>   {}
>
> -static inline void kvm_linear_init(void)
> -{}
> -
>   static inline u32 kvmppc_get_xics_latch(void)
>   {
>   	return 0;
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index ee28d1f..8a022f5 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
>   	/* Initialize the MMU context management stuff */
>   	mmu_context_init();
>
> -	kvm_linear_init();
> -
>   	/* Interrupt code needs to be 64K-aligned */
>   	if ((unsigned long)_stext&  0xffff)
>   		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 550f592..55c8519 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>   {
> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>   	struct page *page;
> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>
> -	if (vmf->pgoff>= ri->npages)
> +	if (vmf->pgoff>= kvm_rma_pages)
>   		return VM_FAULT_SIGBUS;
>
>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>
>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>   {
> -	struct kvmppc_linear_info *ri = filp->private_data;
> +	struct kvm_rma_info *ri = filp->private_data;
>
>   	kvm_release_rma(ri);
>   	return 0;
> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>
>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   {
> -	struct kvmppc_linear_info *ri;
>   	long fd;
> +	struct kvm_rma_info *ri;
> +	/*
> +	 * Only do this on PPC970 in HV mode
> +	 */
> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
> +		return -EINVAL;

Is this really what we want? User space may want to use an RMA on POWER7 
systems, no?

> +
> +	if (!kvm_rma_pages)
> +		return -EINVAL;
>
>   	ri = kvm_alloc_rma();
>   	if (!ri)
> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   	if (fd<  0)
>   		kvm_release_rma(ri);
>
> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>   	return fd;
>   }
>
> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   {
>   	int err = 0;
>   	struct kvm *kvm = vcpu->kvm;
> -	struct kvmppc_linear_info *ri = NULL;
> +	struct kvm_rma_info *ri = NULL;
>   	unsigned long hva;
>   	struct kvm_memory_slot *memslot;
>   	struct vm_area_struct *vma;
> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>
>   	} else {
>   		/* Set up to use an RMO region */
> -		rma_size = ri->npages;
> +		rma_size = kvm_rma_pages;
>   		if (rma_size>  memslot->npages)
>   			rma_size = memslot->npages;
>   		rma_size<<= PAGE_SHIFT;
> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   			/* POWER7 */
>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>   			lpcr |= rmls<<  LPCR_RMLS_SH;
> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>   		}
>   		kvm->arch.lpcr = lpcr;
>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>
>   		/* Initialize phys addrs of pages in RMO */
> -		npages = ri->npages;
> +		npages = kvm_rma_pages;
>   		porder = __ilog2(npages);
>   		physp = memslot->arch.slot_phys;
>   		if (physp) {
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index 4b865c5..8cd0dae 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -21,13 +21,6 @@
>   #include<asm/kvm_book3s.h>
>
>   #include "book3s_hv_cma.h"
> -
> -#define KVM_LINEAR_RMA		0
> -#define KVM_LINEAR_HPT		1
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type);
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>   /*
>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>    * should be power of 2.
> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>    * By default we reserve 5% of memory for hash pagetable allocation.
>    */
>   static unsigned long kvm_cma_resv_ratio = 5;
> -
> -/*************** RMA *************/
> -
>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */
> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
> -static unsigned long kvm_rma_count;
> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>
>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>      Assumes POWER7 or PPC970. */
> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int __init early_parse_rma_size(char *p)
>   {
> -	if (!p)
> -		return 1;
> +	unsigned long kvm_rma_size;
>
> +	pr_debug("%s(%s)\n", __func__, p);
> +	if (!p)
> +		return -EINVAL;
>   	kvm_rma_size = memparse(p,&p);
> -
> +	/*
> +	 * Check that the requested size is one supported in hardware
> +	 */
> +	if (lpcr_rmls(kvm_rma_size)<  0) {
> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> +		return -EINVAL;
> +	}
> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>   	return 0;
>   }
>   early_param("kvm_rma_size", early_parse_rma_size);
>
> -static int __init early_parse_rma_count(char *p)
> +struct kvm_rma_info *kvm_alloc_rma()
>   {
> -	if (!p)
> -		return 1;
> -
> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
> -
> -	return 0;
> -}
> -early_param("kvm_rma_count", early_parse_rma_count);
> -
> -struct kvmppc_linear_info *kvm_alloc_rma(void)
> -{
> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
> +	struct page *page;
> +	struct kvm_rma_info *ri;
> +
> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
> +	if (!ri)
> +		return NULL;
> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
> +	if (!page)
> +		goto err_out;
> +	atomic_set(&ri->use_count, 1);
> +	ri->base_pfn = page_to_pfn(page);
> +	return ri;
> +err_out:
> +	kfree(ri);
> +	return NULL;
>   }
>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>
> -void kvm_release_rma(struct kvmppc_linear_info *ri)
> +void kvm_release_rma(struct kvm_rma_info *ri)
>   {
> -	kvm_release_linear(ri);
> +	if (atomic_dec_and_test(&ri->use_count)) {
> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
> +		kfree(ri);
> +	}
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>
> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>
> -/*************** generic *************/
> -
> -static LIST_HEAD(free_linears);
> -static DEFINE_SPINLOCK(linear_lock);
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type)

Please split the linar removal bits out into a separate patch :).


Alex

> -{
> -	unsigned long i;
> -	unsigned long j, npages;
> -	void *linear;
> -	struct page *pg;
> -	const char *typestr;
> -	struct kvmppc_linear_info *linear_info;
> -
> -	if (!count)
> -		return;
> -
> -	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
> -
> -	npages = size>>  PAGE_SHIFT;
> -	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
> -	for (i = 0; i<  count; ++i) {
> -		linear = alloc_bootmem_align(size, size);
> -		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
> -			 size>>  20);
> -		linear_info[i].base_virt = linear;
> -		linear_info[i].base_pfn = __pa(linear)>>  PAGE_SHIFT;
> -		linear_info[i].npages = npages;
> -		linear_info[i].type = type;
> -		list_add_tail(&linear_info[i].list,&free_linears);
> -		atomic_set(&linear_info[i].use_count, 0);
> -
> -		pg = pfn_to_page(linear_info[i].base_pfn);
> -		for (j = 0; j<  npages; ++j) {
> -			atomic_inc(&pg->_count);
> -			++pg;
> -		}
> -	}
> -}
> -
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type)
> -{
> -	struct kvmppc_linear_info *ri, *ret;
> -
> -	ret = NULL;
> -	spin_lock(&linear_lock);
> -	list_for_each_entry(ri,&free_linears, list) {
> -		if (ri->type != type)
> -			continue;
> -
> -		list_del(&ri->list);
> -		atomic_inc(&ri->use_count);
> -		memset(ri->base_virt, 0, ri->npages<<  PAGE_SHIFT);
> -		ret = ri;
> -		break;
> -	}
> -	spin_unlock(&linear_lock);
> -	return ret;
> -}
> -
> -static void kvm_release_linear(struct kvmppc_linear_info *ri)
> -{
> -	if (atomic_dec_and_test(&ri->use_count)) {
> -		spin_lock(&linear_lock);
> -		list_add_tail(&ri->list,&free_linears);
> -		spin_unlock(&linear_lock);
> -
> -	}
> -}
> -
> -/*
> - * Called at boot time while the bootmem allocator is active,
> - * to allocate contiguous physical memory for the hash page
> - * tables for guests.
> - */
> -void __init kvm_linear_init(void)
> -{
> -	/* RMA */
> -	/* Only do this on PPC970 in HV mode */
> -	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> -	    !cpu_has_feature(CPU_FTR_ARCH_201))
> -		return;
> -
> -	if (!kvm_rma_size || !kvm_rma_count)
> -		return;
> -
> -	/* Check that the requested size is one supported in hardware */
> -	if (lpcr_rmls(kvm_rma_size)<  0) {
> -		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> -		return;
> -	}
> -
> -	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
> -}
> -
>   /**
>    * kvm_cma_reserve() - reserve area for kvm hash pagetable
>    *
> @@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void)
>   			align_size = __rounddown_pow_of_two(selected_size);
>   		else
>   			align_size = HPT_ALIGN_PAGES<<  PAGE_SHIFT;
> +
> +		align_size = max(kvm_rma_pages<<  PAGE_SHIFT, align_size);
>   		kvm_cma_declare_contiguous(selected_size, align_size);
>   	}
>   }
Aneesh Kumar K.V - July 2, 2013, 3:29 p.m.
Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>> contigous since we use the range when address translation is not enabled.
>>
>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>> also remove the the linear allocator which not used any more
>>
>> Acked-by: Paul Mackerras<paulus@samba.org>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> ---

.... snip ....

>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 550f592..55c8519 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>   {
>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>   	struct page *page;
>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>
>> -	if (vmf->pgoff>= ri->npages)
>> +	if (vmf->pgoff>= kvm_rma_pages)
>>   		return VM_FAULT_SIGBUS;
>>
>>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>
>>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>>   {
>> -	struct kvmppc_linear_info *ri = filp->private_data;
>> +	struct kvm_rma_info *ri = filp->private_data;
>>
>>   	kvm_release_rma(ri);
>>   	return 0;
>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>
>>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   {
>> -	struct kvmppc_linear_info *ri;
>>   	long fd;
>> +	struct kvm_rma_info *ri;
>> +	/*
>> +	 * Only do this on PPC970 in HV mode
>> +	 */
>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>> +		return -EINVAL;
>
> Is this really what we want? User space may want to use an RMA on POWER7 
> systems, no?

IIUC they will use virtual real mode area (VRMA) and not RMA

>
>> +
>> +	if (!kvm_rma_pages)
>> +		return -EINVAL;
>>
>>   	ri = kvm_alloc_rma();
>>   	if (!ri)
>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   	if (fd<  0)
>>   		kvm_release_rma(ri);
>>
>> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
>> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>>   	return fd;
>>   }
>>
>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   {
>>   	int err = 0;
>>   	struct kvm *kvm = vcpu->kvm;
>> -	struct kvmppc_linear_info *ri = NULL;
>> +	struct kvm_rma_info *ri = NULL;
>>   	unsigned long hva;
>>   	struct kvm_memory_slot *memslot;
>>   	struct vm_area_struct *vma;
>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>
>>   	} else {
>>   		/* Set up to use an RMO region */
>> -		rma_size = ri->npages;
>> +		rma_size = kvm_rma_pages;
>>   		if (rma_size>  memslot->npages)
>>   			rma_size = memslot->npages;
>>   		rma_size<<= PAGE_SHIFT;
>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   			/* POWER7 */
>>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>   			lpcr |= rmls<<  LPCR_RMLS_SH;
>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
>> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>>   		}
>>   		kvm->arch.lpcr = lpcr;
>>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>>
>>   		/* Initialize phys addrs of pages in RMO */
>> -		npages = ri->npages;
>> +		npages = kvm_rma_pages;
>>   		porder = __ilog2(npages);
>>   		physp = memslot->arch.slot_phys;
>>   		if (physp) {
>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>> index 4b865c5..8cd0dae 100644
>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>> @@ -21,13 +21,6 @@
>>   #include<asm/kvm_book3s.h>
>>
>>   #include "book3s_hv_cma.h"
>> -
>> -#define KVM_LINEAR_RMA		0
>> -#define KVM_LINEAR_HPT		1
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>   /*
>>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>    * should be power of 2.
>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>    * By default we reserve 5% of memory for hash pagetable allocation.
>>    */
>>   static unsigned long kvm_cma_resv_ratio = 5;
>> -
>> -/*************** RMA *************/
>> -
>>   /*
>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>    * Each RMA has to be physically contiguous and of a size that the
>>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>    * and other larger sizes.  Since we are unlikely to be allocate that
>>    * much physically contiguous memory after the system is up and running,
>> - * we preallocate a set of RMAs in early boot for KVM to use.
>> + * we preallocate a set of RMAs in early boot using CMA.
>> + * should be power of 2.
>>    */
>> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
>> -static unsigned long kvm_rma_count;
>> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>
>>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>      Assumes POWER7 or PPC970. */
>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int __init early_parse_rma_size(char *p)
>>   {
>> -	if (!p)
>> -		return 1;
>> +	unsigned long kvm_rma_size;
>>
>> +	pr_debug("%s(%s)\n", __func__, p);
>> +	if (!p)
>> +		return -EINVAL;
>>   	kvm_rma_size = memparse(p,&p);
>> -
>> +	/*
>> +	 * Check that the requested size is one supported in hardware
>> +	 */
>> +	if (lpcr_rmls(kvm_rma_size)<  0) {
>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>> +		return -EINVAL;
>> +	}
>> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>>   	return 0;
>>   }
>>   early_param("kvm_rma_size", early_parse_rma_size);
>>
>> -static int __init early_parse_rma_count(char *p)
>> +struct kvm_rma_info *kvm_alloc_rma()
>>   {
>> -	if (!p)
>> -		return 1;
>> -
>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>> -
>> -	return 0;
>> -}
>> -early_param("kvm_rma_count", early_parse_rma_count);
>> -
>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>> -{
>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>> +	struct page *page;
>> +	struct kvm_rma_info *ri;
>> +
>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>> +	if (!ri)
>> +		return NULL;
>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>> +	if (!page)
>> +		goto err_out;
>> +	atomic_set(&ri->use_count, 1);
>> +	ri->base_pfn = page_to_pfn(page);
>> +	return ri;
>> +err_out:
>> +	kfree(ri);
>> +	return NULL;
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>
>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>   {
>> -	kvm_release_linear(ri);
>> +	if (atomic_dec_and_test(&ri->use_count)) {
>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>> +		kfree(ri);
>> +	}
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>>
>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>
>> -/*************** generic *************/
>> -
>> -static LIST_HEAD(free_linears);
>> -static DEFINE_SPINLOCK(linear_lock);
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>
> Please split the linar removal bits out into a separate patch :).
>
>

That was the way I had in the earlier patchset. That will cause a bisect
build break, because we consider warnings as error and we hit warning
of unused function.

I also realized that linear alloc functions are nearby and mostly fall
in the same hunk. Hence folded it back.

-aneesh
Alexander Graf - July 2, 2013, 3:32 p.m.
On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>> contigous since we use the range when address translation is not enabled.
>>>
>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>> also remove the the linear allocator which not used any more
>>>
>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>> ---
> .... snip ....
>
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index 550f592..55c8519 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>    {
>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>    	struct page *page;
>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>
>>> -	if (vmf->pgoff>= ri->npages)
>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>    		return VM_FAULT_SIGBUS;
>>>
>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>
>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>    {
>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>
>>>    	kvm_release_rma(ri);
>>>    	return 0;
>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>
>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    {
>>> -	struct kvmppc_linear_info *ri;
>>>    	long fd;
>>> +	struct kvm_rma_info *ri;
>>> +	/*
>>> +	 * Only do this on PPC970 in HV mode
>>> +	 */
>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>> +		return -EINVAL;
>> Is this really what we want? User space may want to use an RMA on POWER7
>> systems, no?
> IIUC they will use virtual real mode area (VRMA) and not RMA

Then I suppose we should at least update the comment a bit further down 
the patch that indicates that on POWER7 systems we do support a real 
RMA. I can't really think of any reason why user space would want to use 
RMA over VRMA.

>
>>> +
>>> +	if (!kvm_rma_pages)
>>> +		return -EINVAL;
>>>
>>>    	ri = kvm_alloc_rma();
>>>    	if (!ri)
>>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    	if (fd<   0)
>>>    		kvm_release_rma(ri);
>>>
>>> -	ret->rma_size = ri->npages<<   PAGE_SHIFT;
>>> +	ret->rma_size = kvm_rma_pages<<   PAGE_SHIFT;
>>>    	return fd;
>>>    }
>>>
>>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    {
>>>    	int err = 0;
>>>    	struct kvm *kvm = vcpu->kvm;
>>> -	struct kvmppc_linear_info *ri = NULL;
>>> +	struct kvm_rma_info *ri = NULL;
>>>    	unsigned long hva;
>>>    	struct kvm_memory_slot *memslot;
>>>    	struct vm_area_struct *vma;
>>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>
>>>    	} else {
>>>    		/* Set up to use an RMO region */
>>> -		rma_size = ri->npages;
>>> +		rma_size = kvm_rma_pages;
>>>    		if (rma_size>   memslot->npages)
>>>    			rma_size = memslot->npages;
>>>    		rma_size<<= PAGE_SHIFT;
>>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    			/* POWER7 */
>>>    			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>>    			lpcr |= rmls<<   LPCR_RMLS_SH;
>>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<   PAGE_SHIFT;
>>> +			kvm->arch.rmor = ri->base_pfn<<   PAGE_SHIFT;
>>>    		}
>>>    		kvm->arch.lpcr = lpcr;
>>>    		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>>    			ri->base_pfn<<   PAGE_SHIFT, rma_size, lpcr);
>>>
>>>    		/* Initialize phys addrs of pages in RMO */
>>> -		npages = ri->npages;
>>> +		npages = kvm_rma_pages;
>>>    		porder = __ilog2(npages);
>>>    		physp = memslot->arch.slot_phys;
>>>    		if (physp) {
>>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> index 4b865c5..8cd0dae 100644
>>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> @@ -21,13 +21,6 @@
>>>    #include<asm/kvm_book3s.h>
>>>
>>>    #include "book3s_hv_cma.h"
>>> -
>>> -#define KVM_LINEAR_RMA		0
>>> -#define KVM_LINEAR_HPT		1
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>    /*
>>>     * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>>     * should be power of 2.
>>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>     * By default we reserve 5% of memory for hash pagetable allocation.
>>>     */
>>>    static unsigned long kvm_cma_resv_ratio = 5;
>>> -
>>> -/*************** RMA *************/
>>> -
>>>    /*
>>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>>     * Each RMA has to be physically contiguous and of a size that the
>>>     * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>>     * and other larger sizes.  Since we are unlikely to be allocate that
>>>     * much physically contiguous memory after the system is up and running,
>>> - * we preallocate a set of RMAs in early boot for KVM to use.
>>> + * we preallocate a set of RMAs in early boot using CMA.
>>> + * should be power of 2.
>>>     */
>>> -static unsigned long kvm_rma_size = 64<<   20;	/* 64MB */
>>> -static unsigned long kvm_rma_count;
>>> +unsigned long kvm_rma_pages = (1<<   27)>>   PAGE_SHIFT;	/* 128MB */
>>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>>
>>>    /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>>       Assumes POWER7 or PPC970. */
>>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int __init early_parse_rma_size(char *p)
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> +	unsigned long kvm_rma_size;
>>>
>>> +	pr_debug("%s(%s)\n", __func__, p);
>>> +	if (!p)
>>> +		return -EINVAL;
>>>    	kvm_rma_size = memparse(p,&p);
>>> -
>>> +	/*
>>> +	 * Check that the requested size is one supported in hardware
>>> +	 */
>>> +	if (lpcr_rmls(kvm_rma_size)<   0) {
>>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>>> +		return -EINVAL;
>>> +	}
>>> +	kvm_rma_pages = kvm_rma_size>>   PAGE_SHIFT;
>>>    	return 0;
>>>    }
>>>    early_param("kvm_rma_size", early_parse_rma_size);
>>>
>>> -static int __init early_parse_rma_count(char *p)
>>> +struct kvm_rma_info *kvm_alloc_rma()
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> -
>>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>>> -
>>> -	return 0;
>>> -}
>>> -early_param("kvm_rma_count", early_parse_rma_count);
>>> -
>>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>>> -{
>>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>>> +	struct page *page;
>>> +	struct kvm_rma_info *ri;
>>> +
>>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>>> +	if (!ri)
>>> +		return NULL;
>>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>>> +	if (!page)
>>> +		goto err_out;
>>> +	atomic_set(&ri->use_count, 1);
>>> +	ri->base_pfn = page_to_pfn(page);
>>> +	return ri;
>>> +err_out:
>>> +	kfree(ri);
>>> +	return NULL;
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>>
>>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>>    {
>>> -	kvm_release_linear(ri);
>>> +	if (atomic_dec_and_test(&ri->use_count)) {
>>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>>> +		kfree(ri);
>>> +	}
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_rma);
>>>
>>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>>
>>> -/*************** generic *************/
>>> -
>>> -static LIST_HEAD(free_linears);
>>> -static DEFINE_SPINLOCK(linear_lock);
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>> Please split the linar removal bits out into a separate patch :).
>>
>>
> That was the way I had in the earlier patchset. That will cause a bisect
> build break, because we consider warnings as error and we hit warning
> of unused function.
>
> I also realized that linear alloc functions are nearby and mostly fall
> in the same hunk. Hence folded it back.

Fair enough :)


Alex
Aneesh Kumar K.V - July 2, 2013, 4:28 p.m.
Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>> Alexander Graf<agraf@suse.de>  writes:
>>
>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>
>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>> contigous since we use the range when address translation is not enabled.
>>>>
>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>> also remove the the linear allocator which not used any more
>>>>
>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>> ---
>> .... snip ....
>>
>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>> index 550f592..55c8519 100644
>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>
>>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>    	struct page *page;
>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>
>>>> -	if (vmf->pgoff>= ri->npages)
>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>    		return VM_FAULT_SIGBUS;
>>>>
>>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>
>>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>
>>>>    	kvm_release_rma(ri);
>>>>    	return 0;
>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>
>>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri;
>>>>    	long fd;
>>>> +	struct kvm_rma_info *ri;
>>>> +	/*
>>>> +	 * Only do this on PPC970 in HV mode
>>>> +	 */
>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>> +		return -EINVAL;
>>> Is this really what we want? User space may want to use an RMA on POWER7
>>> systems, no?
>> IIUC they will use virtual real mode area (VRMA) and not RMA
>
> Then I suppose we should at least update the comment a bit further down 
> the patch that indicates that on POWER7 systems we do support a real 
> RMA. I can't really think of any reason why user space would want to use 
> RMA over VRMA.
>

where ? We have comments like

/* On POWER7, use VRMA; on PPC970, give up */

-aneesh
Alexander Graf - July 2, 2013, 4:36 p.m.
On 07/02/2013 06:28 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>>> Alexander Graf<agraf@suse.de>   writes:
>>>
>>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>>
>>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>>> contigous since we use the range when address translation is not enabled.
>>>>>
>>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>>> also remove the the linear allocator which not used any more
>>>>>
>>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>>> ---
>>> .... snip ....
>>>
>>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>>> index 550f592..55c8519 100644
>>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>>
>>>>>     static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>>     	struct page *page;
>>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>>
>>>>> -	if (vmf->pgoff>= ri->npages)
>>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>>     		return VM_FAULT_SIGBUS;
>>>>>
>>>>>     	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>>
>>>>>     static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>>
>>>>>     	kvm_release_rma(ri);
>>>>>     	return 0;
>>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>>
>>>>>     long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri;
>>>>>     	long fd;
>>>>> +	struct kvm_rma_info *ri;
>>>>> +	/*
>>>>> +	 * Only do this on PPC970 in HV mode
>>>>> +	 */
>>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>>> +		return -EINVAL;
>>>> Is this really what we want? User space may want to use an RMA on POWER7
>>>> systems, no?
>>> IIUC they will use virtual real mode area (VRMA) and not RMA
>> Then I suppose we should at least update the comment a bit further down
>> the patch that indicates that on POWER7 systems we do support a real
>> RMA. I can't really think of any reason why user space would want to use
>> RMA over VRMA.
>>
> where ? We have comments like
>
> /* On POWER7, use VRMA; on PPC970, give up */

>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */

This could be falsely interpreted as "POWER7 can use an RMA".


Alex

Patch

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index f8355a9..76ff0b5 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,6 +37,7 @@  static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
+extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0097dab..3328353 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -183,13 +183,9 @@  struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvmppc_linear_info {
-	void		*base_virt;
-	unsigned long	 base_pfn;
-	unsigned long	 npages;
-	struct list_head list;
-	atomic_t	 use_count;
-	int		 type;
+struct kvm_rma_info {
+	atomic_t use_count;
+	unsigned long base_pfn;
 };
 
 /* XICS components, defined in book3s_xics.c */
@@ -246,7 +242,7 @@  struct kvm_arch {
 	int tlbie_lock;
 	unsigned long lpcr;
 	unsigned long rmor;
-	struct kvmppc_linear_info *rma;
+	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
 	int using_mmu_notifiers;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b5ef7a3..5a26bfc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -137,8 +137,8 @@  extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
-extern struct kvmppc_linear_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvmppc_linear_info *ri);
+extern struct kvm_rma_info *kvm_alloc_rma(void);
+extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
@@ -282,7 +282,6 @@  static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 }
 
 extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
-extern void kvm_linear_init(void);
 
 #else
 static inline void __init kvm_cma_reserve(void)
@@ -291,9 +290,6 @@  static inline void __init kvm_cma_reserve(void)
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
-static inline void kvm_linear_init(void)
-{}
-
 static inline u32 kvmppc_get_xics_latch(void)
 {
 	return 0;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ee28d1f..8a022f5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -611,8 +611,6 @@  void __init setup_arch(char **cmdline_p)
 	/* Initialize the MMU context management stuff */
 	mmu_context_init();
 
-	kvm_linear_init();
-
 	/* Interrupt code needs to be 64K-aligned */
 	if ((unsigned long)_stext & 0xffff)
 		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f592..55c8519 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1511,10 +1511,10 @@  static inline int lpcr_rmls(unsigned long rma_size)
 
 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
 	struct page *page;
+	struct kvm_rma_info *ri = vma->vm_file->private_data;
 
-	if (vmf->pgoff >= ri->npages)
+	if (vmf->pgoff >= kvm_rma_pages)
 		return VM_FAULT_SIGBUS;
 
 	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
@@ -1536,7 +1536,7 @@  static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int kvm_rma_release(struct inode *inode, struct file *filp)
 {
-	struct kvmppc_linear_info *ri = filp->private_data;
+	struct kvm_rma_info *ri = filp->private_data;
 
 	kvm_release_rma(ri);
 	return 0;
@@ -1549,8 +1549,17 @@  static const struct file_operations kvm_rma_fops = {
 
 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 {
-	struct kvmppc_linear_info *ri;
 	long fd;
+	struct kvm_rma_info *ri;
+	/*
+	 * Only do this on PPC970 in HV mode
+	 */
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_201))
+		return -EINVAL;
+
+	if (!kvm_rma_pages)
+		return -EINVAL;
 
 	ri = kvm_alloc_rma();
 	if (!ri)
@@ -1560,7 +1569,7 @@  long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 	if (fd < 0)
 		kvm_release_rma(ri);
 
-	ret->rma_size = ri->npages << PAGE_SHIFT;
+	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
 	return fd;
 }
 
@@ -1725,7 +1734,7 @@  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvmppc_linear_info *ri = NULL;
+	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
@@ -1803,7 +1812,7 @@  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
 	} else {
 		/* Set up to use an RMO region */
-		rma_size = ri->npages;
+		rma_size = kvm_rma_pages;
 		if (rma_size > memslot->npages)
 			rma_size = memslot->npages;
 		rma_size <<= PAGE_SHIFT;
@@ -1831,14 +1840,14 @@  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 			/* POWER7 */
 			lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
 			lpcr |= rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
 		}
 		kvm->arch.lpcr = lpcr;
 		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
 			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
 		/* Initialize phys addrs of pages in RMO */
-		npages = ri->npages;
+		npages = kvm_rma_pages;
 		porder = __ilog2(npages);
 		physp = memslot->arch.slot_phys;
 		if (physp) {
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 4b865c5..8cd0dae 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -21,13 +21,6 @@ 
 #include <asm/kvm_book3s.h>
 
 #include "book3s_hv_cma.h"
-
-#define KVM_LINEAR_RMA		0
-#define KVM_LINEAR_HPT		1
-
-static void __init kvm_linear_init_one(ulong size, int count, int type);
-static struct kvmppc_linear_info *kvm_alloc_linear(int type);
-static void kvm_release_linear(struct kvmppc_linear_info *ri);
 /*
  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  * should be power of 2.
@@ -37,19 +30,17 @@  static void kvm_release_linear(struct kvmppc_linear_info *ri);
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-
-/*************** RMA *************/
-
 /*
- * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
  * Each RMA has to be physically contiguous and of a size that the
  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
  * and other larger sizes.  Since we are unlikely to be allocate that
  * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot for KVM to use.
+ * we preallocate a set of RMAs in early boot using CMA.
+ * should be power of 2.
  */
-static unsigned long kvm_rma_size = 64 << 20;	/* 64MB */
-static unsigned long kvm_rma_count;
+unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
+EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
    Assumes POWER7 or PPC970. */
@@ -79,35 +70,50 @@  static inline int lpcr_rmls(unsigned long rma_size)
 
 static int __init early_parse_rma_size(char *p)
 {
-	if (!p)
-		return 1;
+	unsigned long kvm_rma_size;
 
+	pr_debug("%s(%s)\n", __func__, p);
+	if (!p)
+		return -EINVAL;
 	kvm_rma_size = memparse(p, &p);
-
+	/*
+	 * Check that the requested size is one supported in hardware
+	 */
+	if (lpcr_rmls(kvm_rma_size) < 0) {
+		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+		return -EINVAL;
+	}
+	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
 	return 0;
 }
 early_param("kvm_rma_size", early_parse_rma_size);
 
-static int __init early_parse_rma_count(char *p)
+struct kvm_rma_info *kvm_alloc_rma()
 {
-	if (!p)
-		return 1;
-
-	kvm_rma_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
-}
-early_param("kvm_rma_count", early_parse_rma_count);
-
-struct kvmppc_linear_info *kvm_alloc_rma(void)
-{
-	return kvm_alloc_linear(KVM_LINEAR_RMA);
+	struct page *page;
+	struct kvm_rma_info *ri;
+
+	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
+	if (!ri)
+		return NULL;
+	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+	if (!page)
+		goto err_out;
+	atomic_set(&ri->use_count, 1);
+	ri->base_pfn = page_to_pfn(page);
+	return ri;
+err_out:
+	kfree(ri);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 
-void kvm_release_rma(struct kvmppc_linear_info *ri)
+void kvm_release_rma(struct kvm_rma_info *ri)
 {
-	kvm_release_linear(ri);
+	if (atomic_dec_and_test(&ri->use_count)) {
+		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+		kfree(ri);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
@@ -137,101 +143,6 @@  void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
-/*************** generic *************/
-
-static LIST_HEAD(free_linears);
-static DEFINE_SPINLOCK(linear_lock);
-
-static void __init kvm_linear_init_one(ulong size, int count, int type)
-{
-	unsigned long i;
-	unsigned long j, npages;
-	void *linear;
-	struct page *pg;
-	const char *typestr;
-	struct kvmppc_linear_info *linear_info;
-
-	if (!count)
-		return;
-
-	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
-
-	npages = size >> PAGE_SHIFT;
-	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
-	for (i = 0; i < count; ++i) {
-		linear = alloc_bootmem_align(size, size);
-		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
-			 size >> 20);
-		linear_info[i].base_virt = linear;
-		linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
-		linear_info[i].npages = npages;
-		linear_info[i].type = type;
-		list_add_tail(&linear_info[i].list, &free_linears);
-		atomic_set(&linear_info[i].use_count, 0);
-
-		pg = pfn_to_page(linear_info[i].base_pfn);
-		for (j = 0; j < npages; ++j) {
-			atomic_inc(&pg->_count);
-			++pg;
-		}
-	}
-}
-
-static struct kvmppc_linear_info *kvm_alloc_linear(int type)
-{
-	struct kvmppc_linear_info *ri, *ret;
-
-	ret = NULL;
-	spin_lock(&linear_lock);
-	list_for_each_entry(ri, &free_linears, list) {
-		if (ri->type != type)
-			continue;
-
-		list_del(&ri->list);
-		atomic_inc(&ri->use_count);
-		memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
-		ret = ri;
-		break;
-	}
-	spin_unlock(&linear_lock);
-	return ret;
-}
-
-static void kvm_release_linear(struct kvmppc_linear_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		spin_lock(&linear_lock);
-		list_add_tail(&ri->list, &free_linears);
-		spin_unlock(&linear_lock);
-
-	}
-}
-
-/*
- * Called at boot time while the bootmem allocator is active,
- * to allocate contiguous physical memory for the hash page
- * tables for guests.
- */
-void __init kvm_linear_init(void)
-{
-	/* RMA */
-	/* Only do this on PPC970 in HV mode */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return;
-
-	if (!kvm_rma_size || !kvm_rma_count)
-		return;
-
-	/* Check that the requested size is one supported in hardware */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return;
-	}
-
-	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
-}
-
 /**
  * kvm_cma_reserve() - reserve area for kvm hash pagetable
  *
@@ -265,6 +176,8 @@  void __init kvm_cma_reserve(void)
 			align_size = __rounddown_pow_of_two(selected_size);
 		else
 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+
+		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
 		kvm_cma_declare_contiguous(selected_size, align_size);
 	}
 }