diff mbox series

[v2] KVM: PPC: Book3S HV: Make HPT resizing work on POWER9

Message ID 20180207091135.GE19335@fergus.ozlabs.ibm.com
State Accepted
Headers show
Series [v2] KVM: PPC: Book3S HV: Make HPT resizing work on POWER9 | expand

Commit Message

Paul Mackerras Feb. 7, 2018, 9:11 a.m. UTC
From: David Gibson <david@gibson.dropbear.id.au>

This adds code to enable the HPT resizing code to work on POWER9,
which uses a slightly modified HPT entry format compared to POWER8.
On POWER9, we convert HPTEs read from the HPT from the new format to
the old format so that the rest of the HPT resizing code can work as
before.  HPTEs written to the new HPT are converted to the new format
as the last step before writing them into the new HPT.

This takes out the checks added by commit bcd3bb63dbc8 ("KVM: PPC:
Book3S HV: Disable HPT resizing on POWER9 for now", 2017-02-18),
now that HPT resizing works on POWER9.

On POWER9, when we pivot to the new HPT, we now call
kvmppc_setup_partition_table() to update the partition table in order
to make the hardware use the new HPT.

[paulus@ozlabs.org - added kvmppc_setup_partition_table() call,
 wrote commit message.]

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
v2: Include change to powerpc.c, which somehow got missed in v1.

 arch/powerpc/kvm/book3s_64_mmu_hv.c | 30 +++++++++++++++++++++++-------
 arch/powerpc/kvm/powerpc.c          |  3 +--
 2 files changed, 24 insertions(+), 9 deletions(-)

Comments

Laurent Vivier Feb. 8, 2018, 6:11 p.m. UTC | #1
On 07/02/2018 10:11, Paul Mackerras wrote:
> From: David Gibson <david@gibson.dropbear.id.au>
> 
> This adds code to enable the HPT resizing code to work on POWER9,
> which uses a slightly modified HPT entry format compared to POWER8.
> On POWER9, we convert HPTEs read from the HPT from the new format to
> the old format so that the rest of the HPT resizing code can work as
> before.  HPTEs written to the new HPT are converted to the new format
> as the last step before writing them into the new HPT.
> 
> This takes out the checks added by commit bcd3bb63dbc8 ("KVM: PPC:
> Book3S HV: Disable HPT resizing on POWER9 for now", 2017-02-18),
> now that HPT resizing works on POWER9.
> 
> On POWER9, when we pivot to the new HPT, we now call
> kvmppc_setup_partition_table() to update the partition table in order
> to make the hardware use the new HPT.
> 
> [paulus@ozlabs.org - added kvmppc_setup_partition_table() call,
>  wrote commit message.]
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> ---
> v2: Include change to powerpc.c, which somehow got missed in v1.
> 
>  arch/powerpc/kvm/book3s_64_mmu_hv.c | 30 +++++++++++++++++++++++-------
>  arch/powerpc/kvm/powerpc.c          |  3 +--
>  2 files changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index d196499..cb34be7 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -1261,6 +1261,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  		/* Nothing to do */
>  		goto out;
>  
> +	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +		rpte = be64_to_cpu(hptep[1]);
> +		vpte = hpte_new_to_old_v(vpte, rpte);
> +	}
> +
>  	/* Unmap */
>  	rev = &old->rev[idx];
>  	guest_rpte = rev->guest_rpte;
> @@ -1290,7 +1295,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  
>  	/* Reload PTE after unmap */
>  	vpte = be64_to_cpu(hptep[0]);
> -
>  	BUG_ON(vpte & HPTE_V_VALID);
>  	BUG_ON(!(vpte & HPTE_V_ABSENT));
>  
> @@ -1299,6 +1303,12 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  		goto out;
>  
>  	rpte = be64_to_cpu(hptep[1]);
> +
> +	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +		vpte = hpte_new_to_old_v(vpte, rpte);
> +		rpte = hpte_new_to_old_r(rpte);
> +	}
> +
>  	pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
>  	avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
>  	pteg = idx / HPTES_PER_GROUP;
> @@ -1336,6 +1346,10 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  	new_hptep = (__be64 *)(new->virt + (new_idx << 4));
>  
>  	replace_vpte = be64_to_cpu(new_hptep[0]);
> +	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +		unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
> +		replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
> +	}
>  
>  	if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
>  		BUG_ON(new->order >= old->order);
> @@ -1351,6 +1365,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  		/* Discard the previous HPTE */
>  	}
>  
> +	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +		rpte = hpte_old_to_new_r(vpte, rpte);
> +		vpte = hpte_old_to_new_v(vpte);
> +	}
> +
>  	new_hptep[1] = cpu_to_be64(rpte);
>  	new->rev[new_idx].guest_rpte = guest_rpte;
>  	/* No need for a barrier, since new HPT isn't active */
> @@ -1368,12 +1387,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
>  	unsigned  long i;
>  	int rc;
>  
> -	/*
> -	 * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
> -	 * that POWER9 uses, and could well hit a BUG_ON on POWER9.
> -	 */
> -	if (cpu_has_feature(CPU_FTR_ARCH_300))
> -		return -EIO;
>  	for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
>  		rc = resize_hpt_rehash_hpte(resize, i);
>  		if (rc != 0)
> @@ -1404,6 +1417,9 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
>  
>  	synchronize_srcu_expedited(&kvm->srcu);
>  
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		kvmppc_setup_partition_table(kvm);
> +
>  	resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
>  }
>  
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index b4010b8..47c7a30 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -633,8 +633,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  		r = 1;
>  		break;
>  	case KVM_CAP_SPAPR_RESIZE_HPT:
> -		/* Disable this on POWER9 until code handles new HPTE format */
> -		r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
> +		r = !!hv_enabled;
>  		break;
>  #endif
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> 

I've tested this series with a RHEL 4.14 kernel on a POWER9 host and a
RHEL 3.10 kernel in the guest (max-cpu-compat=power8 and
"-m 80G,slots=256,maxmem=257G"), all works well:

[root@localhost ~]# cat /sys/kernel/debug/powerpc/hpt_order

30

[root@localhost ~]# echo 31 > /sys/kernel/debug/powerpc/hpt_order

[   65.888226] lpar: Attempting to resize HPT to shift 31

[   66.634834] lpar: HPT resize to shift 31 complete (210 ms / 535 ms)

[root@localhost ~]# cat /sys/kernel/debug/powerpc/hpt_order

31

[root@localhost ~]# echo 29 > /sys/kernel/debug/powerpc/hpt_order

[   77.023857] lpar: Attempting to resize HPT to shift 29

[   77.739819] lpar: HPT resize to shift 29 complete (105 ms / 610 ms)

[root@localhost ~]# cat /sys/kernel/debug/powerpc/hpt_order

29

Previously, it was failing with:

-bash: echo: write error: No such device

Tested-by: Laurent Vivier <lvivier@redhat.com>

Thanks,
Laurent
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Gibson Feb. 8, 2018, 11:40 p.m. UTC | #2
On Wed, Feb 07, 2018 at 08:11:35PM +1100, Paul Mackerras wrote:
> From: David Gibson <david@gibson.dropbear.id.au>
> 
> This adds code to enable the HPT resizing code to work on POWER9,
> which uses a slightly modified HPT entry format compared to POWER8.
> On POWER9, we convert HPTEs read from the HPT from the new format to
> the old format so that the rest of the HPT resizing code can work as
> before.  HPTEs written to the new HPT are converted to the new format
> as the last step before writing them into the new HPT.
> 
> This takes out the checks added by commit bcd3bb63dbc8 ("KVM: PPC:
> Book3S HV: Disable HPT resizing on POWER9 for now", 2017-02-18),
> now that HPT resizing works on POWER9.
> 
> On POWER9, when we pivot to the new HPT, we now call
> kvmppc_setup_partition_table() to update the partition table in order
> to make the hardware use the new HPT.
> 
> [paulus@ozlabs.org - added kvmppc_setup_partition_table() call,
>  wrote commit message.]
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
> v2: Include change to powerpc.c, which somehow got missed in v1.
> 
>  arch/powerpc/kvm/book3s_64_mmu_hv.c | 30 +++++++++++++++++++++++-------
>  arch/powerpc/kvm/powerpc.c          |  3 +--
>  2 files changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index d196499..cb34be7 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -1261,6 +1261,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  		/* Nothing to do */
>  		goto out;
>  
> +	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +		rpte = be64_to_cpu(hptep[1]);
> +		vpte = hpte_new_to_old_v(vpte, rpte);
> +	}
> +
>  	/* Unmap */
>  	rev = &old->rev[idx];
>  	guest_rpte = rev->guest_rpte;
> @@ -1290,7 +1295,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  
>  	/* Reload PTE after unmap */
>  	vpte = be64_to_cpu(hptep[0]);
> -
>  	BUG_ON(vpte & HPTE_V_VALID);
>  	BUG_ON(!(vpte & HPTE_V_ABSENT));
>  
> @@ -1299,6 +1303,12 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  		goto out;
>  
>  	rpte = be64_to_cpu(hptep[1]);
> +
> +	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +		vpte = hpte_new_to_old_v(vpte, rpte);
> +		rpte = hpte_new_to_old_r(rpte);
> +	}
> +
>  	pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
>  	avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
>  	pteg = idx / HPTES_PER_GROUP;
> @@ -1336,6 +1346,10 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  	new_hptep = (__be64 *)(new->virt + (new_idx << 4));
>  
>  	replace_vpte = be64_to_cpu(new_hptep[0]);
> +	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +		unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
> +		replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
> +	}
>  
>  	if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
>  		BUG_ON(new->order >= old->order);
> @@ -1351,6 +1365,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  		/* Discard the previous HPTE */
>  	}
>  
> +	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> +		rpte = hpte_old_to_new_r(vpte, rpte);
> +		vpte = hpte_old_to_new_v(vpte);
> +	}
> +
>  	new_hptep[1] = cpu_to_be64(rpte);
>  	new->rev[new_idx].guest_rpte = guest_rpte;
>  	/* No need for a barrier, since new HPT isn't active */
> @@ -1368,12 +1387,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
>  	unsigned  long i;
>  	int rc;
>  
> -	/*
> -	 * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
> -	 * that POWER9 uses, and could well hit a BUG_ON on POWER9.
> -	 */
> -	if (cpu_has_feature(CPU_FTR_ARCH_300))
> -		return -EIO;
>  	for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
>  		rc = resize_hpt_rehash_hpte(resize, i);
>  		if (rc != 0)
> @@ -1404,6 +1417,9 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
>  
>  	synchronize_srcu_expedited(&kvm->srcu);
>  
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		kvmppc_setup_partition_table(kvm);
> +
>  	resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
>  }
>  
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index b4010b8..47c7a30 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -633,8 +633,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  		r = 1;
>  		break;
>  	case KVM_CAP_SPAPR_RESIZE_HPT:
> -		/* Disable this on POWER9 until code handles new HPTE format */
> -		r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
> +		r = !!hv_enabled;
>  		break;
>  #endif
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff mbox series

Patch

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d196499..cb34be7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1261,6 +1261,11 @@  static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 		/* Nothing to do */
 		goto out;
 
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		rpte = be64_to_cpu(hptep[1]);
+		vpte = hpte_new_to_old_v(vpte, rpte);
+	}
+
 	/* Unmap */
 	rev = &old->rev[idx];
 	guest_rpte = rev->guest_rpte;
@@ -1290,7 +1295,6 @@  static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 
 	/* Reload PTE after unmap */
 	vpte = be64_to_cpu(hptep[0]);
-
 	BUG_ON(vpte & HPTE_V_VALID);
 	BUG_ON(!(vpte & HPTE_V_ABSENT));
 
@@ -1299,6 +1303,12 @@  static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 		goto out;
 
 	rpte = be64_to_cpu(hptep[1]);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		vpte = hpte_new_to_old_v(vpte, rpte);
+		rpte = hpte_new_to_old_r(rpte);
+	}
+
 	pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
 	avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
 	pteg = idx / HPTES_PER_GROUP;
@@ -1336,6 +1346,10 @@  static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 	new_hptep = (__be64 *)(new->virt + (new_idx << 4));
 
 	replace_vpte = be64_to_cpu(new_hptep[0]);
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
+		replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
+	}
 
 	if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
 		BUG_ON(new->order >= old->order);
@@ -1351,6 +1365,11 @@  static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 		/* Discard the previous HPTE */
 	}
 
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		rpte = hpte_old_to_new_r(vpte, rpte);
+		vpte = hpte_old_to_new_v(vpte);
+	}
+
 	new_hptep[1] = cpu_to_be64(rpte);
 	new->rev[new_idx].guest_rpte = guest_rpte;
 	/* No need for a barrier, since new HPT isn't active */
@@ -1368,12 +1387,6 @@  static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
 	unsigned  long i;
 	int rc;
 
-	/*
-	 * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
-	 * that POWER9 uses, and could well hit a BUG_ON on POWER9.
-	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		return -EIO;
 	for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
 		rc = resize_hpt_rehash_hpte(resize, i);
 		if (rc != 0)
@@ -1404,6 +1417,9 @@  static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
 
 	synchronize_srcu_expedited(&kvm->srcu);
 
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		kvmppc_setup_partition_table(kvm);
+
 	resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
 }
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b4010b8..47c7a30 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -633,8 +633,7 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = 1;
 		break;
 	case KVM_CAP_SPAPR_RESIZE_HPT:
-		/* Disable this on POWER9 until code handles new HPTE format */
-		r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
+		r = !!hv_enabled;
 		break;
 #endif
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE