diff mbox series

[v3,1/5] KVM: PPC: Book3S HV: Disable page merging in H_SVM_INIT_START

Message ID 1594458827-31866-2-git-send-email-linuxram@us.ibm.com
State Superseded
Headers show
Series Migrate non-migrated pages of a SVM. | expand

Commit Message

Ram Pai July 11, 2020, 9:13 a.m. UTC
Merging of pages associated with each memslot of a SVM is
disabled the page is migrated in H_SVM_PAGE_IN handler.

This operation should have been done much earlier; the moment the VM
is initiated for secure-transition. Delaying this operation, increases
the probability for those pages to acquire new references , making it
impossible to migrate those pages in H_SVM_PAGE_IN handler.

Disable page-migration in H_SVM_INIT_START handling.

Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
 arch/powerpc/kvm/book3s_hv_uvmem.c | 96 +++++++++++++++++++++++++++++---------
 1 file changed, 74 insertions(+), 22 deletions(-)

Comments

Bharata B Rao July 13, 2020, 5:29 a.m. UTC | #1
On Sat, Jul 11, 2020 at 02:13:43AM -0700, Ram Pai wrote:
> Merging of pages associated with each memslot of a SVM is
> disabled the page is migrated in H_SVM_PAGE_IN handler.
> 
> This operation should have been done much earlier; the moment the VM
> is initiated for secure-transition. Delaying this operation, increases
> the probability for those pages to acquire new references , making it
> impossible to migrate those pages in H_SVM_PAGE_IN handler.
> 
> Disable page-migration in H_SVM_INIT_START handling.

While it is a good idea to disable KSM merging for all VMAs during
H_SVM_INIT_START, I am curious if you did observe an actual case of
ksm_madvise() failing which resulted in subsequent H_SVM_PAGE_IN
failing to migrate?

> 
> Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> ---
>  arch/powerpc/kvm/book3s_hv_uvmem.c | 96 +++++++++++++++++++++++++++++---------
>  1 file changed, 74 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index 3d987b1..bfc3841 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -211,6 +211,65 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
>  	return false;
>  }
>  
> +static int kvmppc_memslot_page_merge(struct kvm *kvm,
> +		struct kvm_memory_slot *memslot, bool merge)
> +{
> +	unsigned long gfn = memslot->base_gfn;
> +	unsigned long end, start = gfn_to_hva(kvm, gfn);
> +	int ret = 0;
> +	struct vm_area_struct *vma;
> +	int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
> +
> +	if (kvm_is_error_hva(start))
> +		return H_STATE;

This and other cases below seem to be a new return value from
H_SVM_INIT_START. May be update the documentation too along with
this patch?

> +
> +	end = start + (memslot->npages << PAGE_SHIFT);
> +
> +	down_write(&kvm->mm->mmap_sem);

When you rebase the patches against latest upstream you may want to
replace the above and other instances by mmap_write/read_lock().

> +	do {
> +		vma = find_vma_intersection(kvm->mm, start, end);
> +		if (!vma) {
> +			ret = H_STATE;
> +			break;
> +		}
> +		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> +			  merge_flag, &vma->vm_flags);
> +		if (ret) {
> +			ret = H_STATE;
> +			break;
> +		}
> +		start = vma->vm_end + 1;
> +	} while (end > vma->vm_end);
> +
> +	up_write(&kvm->mm->mmap_sem);
> +	return ret;
> +}
> +
> +static int __kvmppc_page_merge(struct kvm *kvm, bool merge)
> +{
> +	struct kvm_memslots *slots;
> +	struct kvm_memory_slot *memslot;
> +	int ret = 0;
> +
> +	slots = kvm_memslots(kvm);
> +	kvm_for_each_memslot(memslot, slots) {
> +		ret = kvmppc_memslot_page_merge(kvm, memslot, merge);
> +		if (ret)
> +			break;
> +	}
> +	return ret;
> +}
> +
> +static inline int kvmppc_disable_page_merge(struct kvm *kvm)
> +{
> +	return __kvmppc_page_merge(kvm, false);
> +}
> +
> +static inline int kvmppc_enable_page_merge(struct kvm *kvm)
> +{
> +	return __kvmppc_page_merge(kvm, true);
> +}
> +
>  unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
>  {
>  	struct kvm_memslots *slots;
> @@ -232,11 +291,18 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
>  		return H_AUTHORITY;
>  
>  	srcu_idx = srcu_read_lock(&kvm->srcu);
> +
> +	/* disable page-merging for all memslot */
> +	ret = kvmppc_disable_page_merge(kvm);
> +	if (ret)
> +		goto out;
> +
> +	/* register the memslot */
>  	slots = kvm_memslots(kvm);
>  	kvm_for_each_memslot(memslot, slots) {
>  		if (kvmppc_uvmem_slot_init(kvm, memslot)) {
>  			ret = H_PARAMETER;
> -			goto out;
> +			break;
>  		}
>  		ret = uv_register_mem_slot(kvm->arch.lpid,
>  					   memslot->base_gfn << PAGE_SHIFT,
> @@ -245,9 +311,12 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
>  		if (ret < 0) {
>  			kvmppc_uvmem_slot_free(kvm, memslot);
>  			ret = H_PARAMETER;
> -			goto out;
> +			break;
>  		}
>  	}
> +
> +	if (ret)
> +		kvmppc_enable_page_merge(kvm);

Is there any use of enabling KSM merging in the failure path here?
Won't UV terminate the VM if H_SVM_INIT_START fails? If there is no need,
you can do away with some extra routines above.

>  out:
>  	srcu_read_unlock(&kvm->srcu, srcu_idx);
>  	return ret;
> @@ -384,7 +453,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
>   */
>  static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>  		   unsigned long end, unsigned long gpa, struct kvm *kvm,
> -		   unsigned long page_shift, bool *downgrade)
> +		   unsigned long page_shift)
>  {
>  	unsigned long src_pfn, dst_pfn = 0;
>  	struct migrate_vma mig;
> @@ -400,18 +469,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
>  	mig.src = &src_pfn;
>  	mig.dst = &dst_pfn;
>  
> -	/*
> -	 * We come here with mmap_sem write lock held just for
> -	 * ksm_madvise(), otherwise we only need read mmap_sem.
> -	 * Hence downgrade to read lock once ksm_madvise() is done.
> -	 */
> -	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> -			  MADV_UNMERGEABLE, &vma->vm_flags);

I haven't seen the subsequent patches yet, but guess you are
taking care of disabling KSM mering for hot-plugged memory too.

Regards,
Bharata.
Ram Pai July 15, 2020, 5:16 a.m. UTC | #2
On Mon, Jul 13, 2020 at 10:59:41AM +0530, Bharata B Rao wrote:
> On Sat, Jul 11, 2020 at 02:13:43AM -0700, Ram Pai wrote:
> > Merging of pages associated with each memslot of a SVM is
> > disabled the page is migrated in H_SVM_PAGE_IN handler.
> > 
> > This operation should have been done much earlier; the moment the VM
> > is initiated for secure-transition. Delaying this operation, increases
> > the probability for those pages to acquire new references , making it
> > impossible to migrate those pages in H_SVM_PAGE_IN handler.
> > 
> > Disable page-migration in H_SVM_INIT_START handling.
> 
> While it is a good idea to disable KSM merging for all VMAs during
> H_SVM_INIT_START, I am curious if you did observe an actual case of
> ksm_madvise() failing which resulted in subsequent H_SVM_PAGE_IN
> failing to migrate?

No. I did not find any ksm_madvise() failing.  But it did not make sense
to ksm_madvise() everytime a page_in was requested. Hence i proposed
this patch. H_SVM_INIT_START is the right place for ksm_advise().

> 
> > 
> > Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> > ---
> >  arch/powerpc/kvm/book3s_hv_uvmem.c | 96 +++++++++++++++++++++++++++++---------
> >  1 file changed, 74 insertions(+), 22 deletions(-)
> > 
> > diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> > index 3d987b1..bfc3841 100644
> > --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> > +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> > @@ -211,6 +211,65 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
> >  	return false;
> >  }
> >  
> > +static int kvmppc_memslot_page_merge(struct kvm *kvm,
> > +		struct kvm_memory_slot *memslot, bool merge)
> > +{
> > +	unsigned long gfn = memslot->base_gfn;
> > +	unsigned long end, start = gfn_to_hva(kvm, gfn);
> > +	int ret = 0;
> > +	struct vm_area_struct *vma;
> > +	int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
> > +
> > +	if (kvm_is_error_hva(start))
> > +		return H_STATE;
> 
> This and other cases below seem to be a new return value from
> H_SVM_INIT_START. May be update the documentation too along with
> this patch?

ok.

> 
> > +
> > +	end = start + (memslot->npages << PAGE_SHIFT);
> > +
> > +	down_write(&kvm->mm->mmap_sem);
> 
> When you rebase the patches against latest upstream you may want to
> replace the above and other instances by mmap_write/read_lock().

ok.

> 
> > +	do {
> > +		vma = find_vma_intersection(kvm->mm, start, end);
> > +		if (!vma) {
> > +			ret = H_STATE;
> > +			break;
> > +		}
> > +		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> > +			  merge_flag, &vma->vm_flags);
> > +		if (ret) {
> > +			ret = H_STATE;
> > +			break;
> > +		}
> > +		start = vma->vm_end + 1;
> > +	} while (end > vma->vm_end);
> > +
> > +	up_write(&kvm->mm->mmap_sem);
> > +	return ret;
> > +}
> > +
> > +static int __kvmppc_page_merge(struct kvm *kvm, bool merge)
> > +{
> > +	struct kvm_memslots *slots;
> > +	struct kvm_memory_slot *memslot;
> > +	int ret = 0;
> > +
> > +	slots = kvm_memslots(kvm);
> > +	kvm_for_each_memslot(memslot, slots) {
> > +		ret = kvmppc_memslot_page_merge(kvm, memslot, merge);
> > +		if (ret)
> > +			break;
> > +	}
> > +	return ret;
> > +}
> > +
> > +static inline int kvmppc_disable_page_merge(struct kvm *kvm)
> > +{
> > +	return __kvmppc_page_merge(kvm, false);
> > +}
> > +
> > +static inline int kvmppc_enable_page_merge(struct kvm *kvm)
> > +{
> > +	return __kvmppc_page_merge(kvm, true);
> > +}
> > +
> >  unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> >  {
> >  	struct kvm_memslots *slots;
> > @@ -232,11 +291,18 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> >  		return H_AUTHORITY;
> >  
> >  	srcu_idx = srcu_read_lock(&kvm->srcu);
> > +
> > +	/* disable page-merging for all memslot */
> > +	ret = kvmppc_disable_page_merge(kvm);
> > +	if (ret)
> > +		goto out;
> > +
> > +	/* register the memslot */
> >  	slots = kvm_memslots(kvm);
> >  	kvm_for_each_memslot(memslot, slots) {
> >  		if (kvmppc_uvmem_slot_init(kvm, memslot)) {
> >  			ret = H_PARAMETER;
> > -			goto out;
> > +			break;
> >  		}
> >  		ret = uv_register_mem_slot(kvm->arch.lpid,
> >  					   memslot->base_gfn << PAGE_SHIFT,
> > @@ -245,9 +311,12 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> >  		if (ret < 0) {
> >  			kvmppc_uvmem_slot_free(kvm, memslot);
> >  			ret = H_PARAMETER;
> > -			goto out;
> > +			break;
> >  		}
> >  	}
> > +
> > +	if (ret)
> > +		kvmppc_enable_page_merge(kvm);
> 
> Is there any use of enabling KSM merging in the failure path here?
> Won't UV terminate the VM if H_SVM_INIT_START fails? If there is no need,
> you can do away with some extra routines above.

UV will terminate it. But I did not want to tie that assumption into
this function.

> 
> >  out:
> >  	srcu_read_unlock(&kvm->srcu, srcu_idx);
> >  	return ret;
> > @@ -384,7 +453,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
> >   */
> >  static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
> >  		   unsigned long end, unsigned long gpa, struct kvm *kvm,
> > -		   unsigned long page_shift, bool *downgrade)
> > +		   unsigned long page_shift)
> >  {
> >  	unsigned long src_pfn, dst_pfn = 0;
> >  	struct migrate_vma mig;
> > @@ -400,18 +469,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
> >  	mig.src = &src_pfn;
> >  	mig.dst = &dst_pfn;
> >  
> > -	/*
> > -	 * We come here with mmap_sem write lock held just for
> > -	 * ksm_madvise(), otherwise we only need read mmap_sem.
> > -	 * Hence downgrade to read lock once ksm_madvise() is done.
> > -	 */
> > -	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> > -			  MADV_UNMERGEABLE, &vma->vm_flags);
> 
> I haven't seen the subsequent patches yet, but guess you are
> taking care of disabling KSM mering for hot-plugged memory too.

No. This is a good catch. The hotplugged memory patch needs to disable
KSM aswell.

RP
Bharata B Rao July 15, 2020, 7:36 a.m. UTC | #3
On Tue, Jul 14, 2020 at 10:16:14PM -0700, Ram Pai wrote:
> On Mon, Jul 13, 2020 at 10:59:41AM +0530, Bharata B Rao wrote:
> > On Sat, Jul 11, 2020 at 02:13:43AM -0700, Ram Pai wrote:
> > > Merging of pages associated with each memslot of a SVM is
> > > disabled the page is migrated in H_SVM_PAGE_IN handler.
> > > 
> > > This operation should have been done much earlier; the moment the VM
> > > is initiated for secure-transition. Delaying this operation, increases
> > > the probability for those pages to acquire new references , making it
> > > impossible to migrate those pages in H_SVM_PAGE_IN handler.
> > > 
> > > Disable page-migration in H_SVM_INIT_START handling.
> > 
> > While it is a good idea to disable KSM merging for all VMAs during
> > H_SVM_INIT_START, I am curious if you did observe an actual case of
> > ksm_madvise() failing which resulted in subsequent H_SVM_PAGE_IN
> > failing to migrate?
> 
> No. I did not find any ksm_madvise() failing.  But it did not make sense
> to ksm_madvise() everytime a page_in was requested. Hence i proposed
> this patch. H_SVM_INIT_START is the right place for ksm_advise().

Indeed yes. Then you may want to update the description which currently
seems to imply that this change is being done to avoid issues arising
out of delayed KSM unmerging advice.

> 
> > 
> > > 
> > > Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> > > ---
> > >  arch/powerpc/kvm/book3s_hv_uvmem.c | 96 +++++++++++++++++++++++++++++---------
> > >  1 file changed, 74 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> > > index 3d987b1..bfc3841 100644
> > > --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> > > +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> > > @@ -211,6 +211,65 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
> > >  	return false;
> > >  }
> > >  
> > > +static int kvmppc_memslot_page_merge(struct kvm *kvm,
> > > +		struct kvm_memory_slot *memslot, bool merge)
> > > +{
> > > +	unsigned long gfn = memslot->base_gfn;
> > > +	unsigned long end, start = gfn_to_hva(kvm, gfn);
> > > +	int ret = 0;
> > > +	struct vm_area_struct *vma;
> > > +	int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
> > > +
> > > +	if (kvm_is_error_hva(start))
> > > +		return H_STATE;
> > 
> > This and other cases below seem to be a new return value from
> > H_SVM_INIT_START. May be update the documentation too along with
> > this patch?
> 
> ok.
> 
> > 
> > > +
> > > +	end = start + (memslot->npages << PAGE_SHIFT);
> > > +
> > > +	down_write(&kvm->mm->mmap_sem);
> > 
> > When you rebase the patches against latest upstream you may want to
> > replace the above and other instances by mmap_write/read_lock().
> 
> ok.
> 
> > 
> > > +	do {
> > > +		vma = find_vma_intersection(kvm->mm, start, end);
> > > +		if (!vma) {
> > > +			ret = H_STATE;
> > > +			break;
> > > +		}
> > > +		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> > > +			  merge_flag, &vma->vm_flags);
> > > +		if (ret) {
> > > +			ret = H_STATE;
> > > +			break;
> > > +		}
> > > +		start = vma->vm_end + 1;
> > > +	} while (end > vma->vm_end);
> > > +
> > > +	up_write(&kvm->mm->mmap_sem);
> > > +	return ret;
> > > +}
> > > +
> > > +static int __kvmppc_page_merge(struct kvm *kvm, bool merge)
> > > +{
> > > +	struct kvm_memslots *slots;
> > > +	struct kvm_memory_slot *memslot;
> > > +	int ret = 0;
> > > +
> > > +	slots = kvm_memslots(kvm);
> > > +	kvm_for_each_memslot(memslot, slots) {
> > > +		ret = kvmppc_memslot_page_merge(kvm, memslot, merge);
> > > +		if (ret)
> > > +			break;
> > > +	}
> > > +	return ret;
> > > +}
> > > +
> > > +static inline int kvmppc_disable_page_merge(struct kvm *kvm)
> > > +{
> > > +	return __kvmppc_page_merge(kvm, false);
> > > +}
> > > +
> > > +static inline int kvmppc_enable_page_merge(struct kvm *kvm)
> > > +{
> > > +	return __kvmppc_page_merge(kvm, true);
> > > +}
> > > +
> > >  unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> > >  {
> > >  	struct kvm_memslots *slots;
> > > @@ -232,11 +291,18 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> > >  		return H_AUTHORITY;
> > >  
> > >  	srcu_idx = srcu_read_lock(&kvm->srcu);
> > > +
> > > +	/* disable page-merging for all memslot */
> > > +	ret = kvmppc_disable_page_merge(kvm);
> > > +	if (ret)
> > > +		goto out;
> > > +
> > > +	/* register the memslot */
> > >  	slots = kvm_memslots(kvm);
> > >  	kvm_for_each_memslot(memslot, slots) {
> > >  		if (kvmppc_uvmem_slot_init(kvm, memslot)) {
> > >  			ret = H_PARAMETER;
> > > -			goto out;
> > > +			break;
> > >  		}
> > >  		ret = uv_register_mem_slot(kvm->arch.lpid,
> > >  					   memslot->base_gfn << PAGE_SHIFT,
> > > @@ -245,9 +311,12 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> > >  		if (ret < 0) {
> > >  			kvmppc_uvmem_slot_free(kvm, memslot);
> > >  			ret = H_PARAMETER;
> > > -			goto out;
> > > +			break;
> > >  		}
> > >  	}
> > > +
> > > +	if (ret)
> > > +		kvmppc_enable_page_merge(kvm);
> > 
> > Is there any use of enabling KSM merging in the failure path here?
> > Won't UV terminate the VM if H_SVM_INIT_START fails? If there is no need,
> > you can do away with some extra routines above.
> 
> UV will terminate it. But I did not want to tie that assumption into
> this function.

Hmm ok, but having code around which isn't expected to be executed at all
was my concern.

Regards,
Bharata.
diff mbox series

Patch

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 3d987b1..bfc3841 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -211,6 +211,65 @@  static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
 	return false;
 }
 
+static int kvmppc_memslot_page_merge(struct kvm *kvm,
+		struct kvm_memory_slot *memslot, bool merge)
+{
+	unsigned long gfn = memslot->base_gfn;
+	unsigned long end, start = gfn_to_hva(kvm, gfn);
+	int ret = 0;
+	struct vm_area_struct *vma;
+	int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
+
+	if (kvm_is_error_hva(start))
+		return H_STATE;
+
+	end = start + (memslot->npages << PAGE_SHIFT);
+
+	down_write(&kvm->mm->mmap_sem);
+	do {
+		vma = find_vma_intersection(kvm->mm, start, end);
+		if (!vma) {
+			ret = H_STATE;
+			break;
+		}
+		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+			  merge_flag, &vma->vm_flags);
+		if (ret) {
+			ret = H_STATE;
+			break;
+		}
+		start = vma->vm_end + 1;
+	} while (end > vma->vm_end);
+
+	up_write(&kvm->mm->mmap_sem);
+	return ret;
+}
+
+static int __kvmppc_page_merge(struct kvm *kvm, bool merge)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int ret = 0;
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots) {
+		ret = kvmppc_memslot_page_merge(kvm, memslot, merge);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static inline int kvmppc_disable_page_merge(struct kvm *kvm)
+{
+	return __kvmppc_page_merge(kvm, false);
+}
+
+static inline int kvmppc_enable_page_merge(struct kvm *kvm)
+{
+	return __kvmppc_page_merge(kvm, true);
+}
+
 unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 {
 	struct kvm_memslots *slots;
@@ -232,11 +291,18 @@  unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 		return H_AUTHORITY;
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	/* disable page-merging for all memslot */
+	ret = kvmppc_disable_page_merge(kvm);
+	if (ret)
+		goto out;
+
+	/* register the memslot */
 	slots = kvm_memslots(kvm);
 	kvm_for_each_memslot(memslot, slots) {
 		if (kvmppc_uvmem_slot_init(kvm, memslot)) {
 			ret = H_PARAMETER;
-			goto out;
+			break;
 		}
 		ret = uv_register_mem_slot(kvm->arch.lpid,
 					   memslot->base_gfn << PAGE_SHIFT,
@@ -245,9 +311,12 @@  unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 		if (ret < 0) {
 			kvmppc_uvmem_slot_free(kvm, memslot);
 			ret = H_PARAMETER;
-			goto out;
+			break;
 		}
 	}
+
+	if (ret)
+		kvmppc_enable_page_merge(kvm);
 out:
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 	return ret;
@@ -384,7 +453,7 @@  static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
  */
 static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
 		   unsigned long end, unsigned long gpa, struct kvm *kvm,
-		   unsigned long page_shift, bool *downgrade)
+		   unsigned long page_shift)
 {
 	unsigned long src_pfn, dst_pfn = 0;
 	struct migrate_vma mig;
@@ -400,18 +469,6 @@  static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
 	mig.src = &src_pfn;
 	mig.dst = &dst_pfn;
 
-	/*
-	 * We come here with mmap_sem write lock held just for
-	 * ksm_madvise(), otherwise we only need read mmap_sem.
-	 * Hence downgrade to read lock once ksm_madvise() is done.
-	 */
-	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
-			  MADV_UNMERGEABLE, &vma->vm_flags);
-	downgrade_write(&kvm->mm->mmap_sem);
-	*downgrade = true;
-	if (ret)
-		return ret;
-
 	ret = migrate_vma_setup(&mig);
 	if (ret)
 		return ret;
@@ -503,7 +560,6 @@  unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
 		unsigned long flags,
 		unsigned long page_shift)
 {
-	bool downgrade = false;
 	unsigned long start, end;
 	struct vm_area_struct *vma;
 	int srcu_idx;
@@ -540,16 +596,12 @@  unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
 	if (!vma || vma->vm_start > start || vma->vm_end < end)
 		goto out_unlock;
 
-	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
-				&downgrade))
+	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift))
 		ret = H_SUCCESS;
 out_unlock:
 	mutex_unlock(&kvm->arch.uvmem_lock);
 out:
-	if (downgrade)
-		up_read(&kvm->mm->mmap_sem);
-	else
-		up_write(&kvm->mm->mmap_sem);
+	up_write(&kvm->mm->mmap_sem);
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 	return ret;
 }