diff mbox series

[RFC,35/43] KVM: PPC: Book3S HV P9: Demand fault TM facility registers

Message ID 20210622105736.633352-36-npiggin@gmail.com (mailing list archive)
State Not Applicable
Headers show
Series KVM: PPC: Book3S HV P9: entry/exit optimisations round 1 | expand
Related show

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch powerpc/merge (7f030e9d57b8ff6025bde4162f42378e6081126a)
snowpatch_ozlabs/apply_patch warning Failed to apply on branch powerpc/next (9a0b020d61685913a1504398273ccec8dbc8c32e)
snowpatch_ozlabs/apply_patch warning Failed to apply on branch linus/master (a96bfed64c8986d6404e553f18203cae1f5ac7e6)
snowpatch_ozlabs/apply_patch warning Failed to apply on branch powerpc/fixes (60b7ed54a41b550d50caf7f2418db4a7e75b5bdc)
snowpatch_ozlabs/apply_patch warning Failed to apply on branch linux-next (13311e74253fe64329390df80bed3f07314ddd61)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Nicholas Piggin June 22, 2021, 10:57 a.m. UTC
Use HFSCR facility disabling to implement demand faulting for TM, with
a hysteresis counter similar to the load_fp etc counters in context
switching that implement the equivalent demand faulting for userspace
facilities.

This speeds up guest entry/exit by avoiding the register save/restore
when a guest is not frequently using them. When a guest does use them
often, there will be some additional demand fault overhead, but these
are not commonly used facilities.

-304 cycles (6681) POWER9 virt-mode NULL hcall with the previous patch

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/kvm_host.h   |  1 +
 arch/powerpc/kvm/book3s_hv.c          | 21 +++++++++++++++++----
 arch/powerpc/kvm/book3s_hv_nested.c   |  2 +-
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 18 ++++++++++++------
 4 files changed, 31 insertions(+), 11 deletions(-)

Comments

Fabiano Rosas July 8, 2021, 5:46 p.m. UTC | #1
Nicholas Piggin <npiggin@gmail.com> writes:

> Use HFSCR facility disabling to implement demand faulting for TM, with
> a hysteresis counter similar to the load_fp etc counters in context
> switching that implement the equivalent demand faulting for userspace
> facilities.
>
> This speeds up guest entry/exit by avoiding the register save/restore
> when a guest is not frequently using them. When a guest does use them
> often, there will be some additional demand fault overhead, but these
> are not commonly used facilities.
>
> -304 cycles (6681) POWER9 virt-mode NULL hcall with the previous patch
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>

> ---
>  arch/powerpc/include/asm/kvm_host.h   |  1 +
>  arch/powerpc/kvm/book3s_hv.c          | 21 +++++++++++++++++----
>  arch/powerpc/kvm/book3s_hv_nested.c   |  2 +-
>  arch/powerpc/kvm/book3s_hv_p9_entry.c | 18 ++++++++++++------
>  4 files changed, 31 insertions(+), 11 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index bee95106c1f2..d79f0b1b1578 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -586,6 +586,7 @@ struct kvm_vcpu_arch {
>  	ulong ppr;
>  	u32 pspb;
>  	u8 load_ebb;
> +	u8 load_tm;
>  	ulong fscr;
>  	ulong shadow_fscr;
>  	ulong ebbhr;
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 99e9da078e7d..2430725f29f7 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1373,6 +1373,13 @@ static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
>  	return RESUME_GUEST;
>  }
>
> +static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
> +{
> +	vcpu->arch.hfscr |= HFSCR_TM;
> +
> +	return RESUME_GUEST;
> +}
> +
>  static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
>  				 struct task_struct *tsk)
>  {
> @@ -1654,6 +1661,8 @@ XXX benchmark guest exits
>  				r = kvmppc_pmu_unavailable(vcpu);
>  			if (cause == FSCR_EBB_LG)
>  				r = kvmppc_ebb_unavailable(vcpu);
> +			if (cause == FSCR_TM_LG)
> +				r = kvmppc_tm_unavailable(vcpu);
>  		}
>  		if (r == EMULATE_FAIL) {
>  			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
> @@ -1775,6 +1784,8 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
>  			r = kvmppc_pmu_unavailable(vcpu);
>  		if (cause == FSCR_EBB_LG && (vcpu->arch.nested_hfscr & HFSCR_EBB))
>  			r = kvmppc_ebb_unavailable(vcpu);
> +		if (cause == FSCR_TM_LG && (vcpu->arch.nested_hfscr & HFSCR_TM))
> +			r = kvmppc_tm_unavailable(vcpu);
>
>  		if (r == EMULATE_FAIL)
>  			r = RESUME_HOST;
> @@ -3737,8 +3748,9 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
>  		msr |= MSR_VEC;
>  	if (cpu_has_feature(CPU_FTR_VSX))
>  		msr |= MSR_VSX;
> -	if (cpu_has_feature(CPU_FTR_TM) ||
> -	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> +	if ((cpu_has_feature(CPU_FTR_TM) ||
> +	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
> +			(vcpu->arch.hfscr & HFSCR_TM))
>  		msr |= MSR_TM;
>  	msr = msr_check_and_set(msr);
>
> @@ -4453,8 +4465,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
>  		msr |= MSR_VEC;
>  	if (cpu_has_feature(CPU_FTR_VSX))
>  		msr |= MSR_VSX;
> -	if (cpu_has_feature(CPU_FTR_TM) ||
> -	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> +	if ((cpu_has_feature(CPU_FTR_TM) ||
> +	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
> +			(vcpu->arch.hfscr & HFSCR_TM))
>  		msr |= MSR_TM;
>  	msr = msr_check_and_set(msr);
>
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index ee8668f056f9..5a534f7924f2 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -168,7 +168,7 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
>  	 * but preserve the interrupt cause field and facilities that might
>  	 * be disabled for demand faulting in the L1.
>  	 */
> -	hr->hfscr &= (HFSCR_INTR_CAUSE | HFSCR_PM | HFSCR_EBB |
> +	hr->hfscr &= (HFSCR_INTR_CAUSE | HFSCR_PM | HFSCR_TM | HFSCR_EBB |
>  			vcpu->arch.hfscr);
>
>  	/* Don't let data address watchpoint match in hypervisor state */
> diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
> index cf41261daa97..653f2765a399 100644
> --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
> +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
> @@ -284,8 +284,9 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
>  void load_vcpu_state(struct kvm_vcpu *vcpu,
>  			   struct p9_host_os_sprs *host_os_sprs)
>  {
> -	if (cpu_has_feature(CPU_FTR_TM) ||
> -	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
> +	if ((cpu_has_feature(CPU_FTR_TM) ||
> +	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
> +		       (vcpu->arch.hfscr & HFSCR_TM)) {
>  		unsigned long msr = vcpu->arch.shregs.msr;
>  		if (MSR_TM_ACTIVE(msr)) {
>  			kvmppc_restore_tm_hv(vcpu, msr, true);
> @@ -316,8 +317,9 @@ void store_vcpu_state(struct kvm_vcpu *vcpu)
>  #endif
>  	vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
>
> -	if (cpu_has_feature(CPU_FTR_TM) ||
> -	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
> +	if ((cpu_has_feature(CPU_FTR_TM) ||
> +	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
> +		       (vcpu->arch.hfscr & HFSCR_TM)) {
>  		unsigned long msr = vcpu->arch.shregs.msr;
>  		if (MSR_TM_ACTIVE(msr)) {
>  			kvmppc_save_tm_hv(vcpu, msr, true);
> @@ -326,6 +328,9 @@ void store_vcpu_state(struct kvm_vcpu *vcpu)
>  			vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
>  			vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
>  		}
> +		vcpu->arch.load_tm++; /* see load_ebb comment for details */
> +		if (!vcpu->arch.load_tm)
> +			vcpu->arch.hfscr &= ~HFSCR_TM;
>  	}
>  }
>  EXPORT_SYMBOL_GPL(store_vcpu_state);
> @@ -615,8 +620,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
>  		msr |= MSR_VEC;
>  	if (cpu_has_feature(CPU_FTR_VSX))
>  		msr |= MSR_VSX;
> -	if (cpu_has_feature(CPU_FTR_TM) ||
> -	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> +	if ((cpu_has_feature(CPU_FTR_TM) ||
> +	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
> +			(vcpu->arch.hfscr & HFSCR_TM))
>  		msr |= MSR_TM;
>  	msr = msr_check_and_set(msr);
>  	/* Save MSR for restore. This is after hard disable, so EE is clear. */
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bee95106c1f2..d79f0b1b1578 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -586,6 +586,7 @@  struct kvm_vcpu_arch {
 	ulong ppr;
 	u32 pspb;
 	u8 load_ebb;
+	u8 load_tm;
 	ulong fscr;
 	ulong shadow_fscr;
 	ulong ebbhr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 99e9da078e7d..2430725f29f7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1373,6 +1373,13 @@  static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
+static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hfscr |= HFSCR_TM;
+
+	return RESUME_GUEST;
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 				 struct task_struct *tsk)
 {
@@ -1654,6 +1661,8 @@  XXX benchmark guest exits
 				r = kvmppc_pmu_unavailable(vcpu);
 			if (cause == FSCR_EBB_LG)
 				r = kvmppc_ebb_unavailable(vcpu);
+			if (cause == FSCR_TM_LG)
+				r = kvmppc_tm_unavailable(vcpu);
 		}
 		if (r == EMULATE_FAIL) {
 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
@@ -1775,6 +1784,8 @@  static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 			r = kvmppc_pmu_unavailable(vcpu);
 		if (cause == FSCR_EBB_LG && (vcpu->arch.nested_hfscr & HFSCR_EBB))
 			r = kvmppc_ebb_unavailable(vcpu);
+		if (cause == FSCR_TM_LG && (vcpu->arch.nested_hfscr & HFSCR_TM))
+			r = kvmppc_tm_unavailable(vcpu);
 
 		if (r == EMULATE_FAIL)
 			r = RESUME_HOST;
@@ -3737,8 +3748,9 @@  static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
 		msr |= MSR_VEC;
 	if (cpu_has_feature(CPU_FTR_VSX))
 		msr |= MSR_VSX;
-	if (cpu_has_feature(CPU_FTR_TM) ||
-	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+	if ((cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+			(vcpu->arch.hfscr & HFSCR_TM))
 		msr |= MSR_TM;
 	msr = msr_check_and_set(msr);
 
@@ -4453,8 +4465,9 @@  static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 		msr |= MSR_VEC;
 	if (cpu_has_feature(CPU_FTR_VSX))
 		msr |= MSR_VSX;
-	if (cpu_has_feature(CPU_FTR_TM) ||
-	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+	if ((cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+			(vcpu->arch.hfscr & HFSCR_TM))
 		msr |= MSR_TM;
 	msr = msr_check_and_set(msr);
 
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index ee8668f056f9..5a534f7924f2 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -168,7 +168,7 @@  static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 	 * but preserve the interrupt cause field and facilities that might
 	 * be disabled for demand faulting in the L1.
 	 */
-	hr->hfscr &= (HFSCR_INTR_CAUSE | HFSCR_PM | HFSCR_EBB |
+	hr->hfscr &= (HFSCR_INTR_CAUSE | HFSCR_PM | HFSCR_TM | HFSCR_EBB |
 			vcpu->arch.hfscr);
 
 	/* Don't let data address watchpoint match in hypervisor state */
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index cf41261daa97..653f2765a399 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -284,8 +284,9 @@  static void store_spr_state(struct kvm_vcpu *vcpu)
 void load_vcpu_state(struct kvm_vcpu *vcpu,
 			   struct p9_host_os_sprs *host_os_sprs)
 {
-	if (cpu_has_feature(CPU_FTR_TM) ||
-	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+	if ((cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+		       (vcpu->arch.hfscr & HFSCR_TM)) {
 		unsigned long msr = vcpu->arch.shregs.msr;
 		if (MSR_TM_ACTIVE(msr)) {
 			kvmppc_restore_tm_hv(vcpu, msr, true);
@@ -316,8 +317,9 @@  void store_vcpu_state(struct kvm_vcpu *vcpu)
 #endif
 	vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
 
-	if (cpu_has_feature(CPU_FTR_TM) ||
-	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+	if ((cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+		       (vcpu->arch.hfscr & HFSCR_TM)) {
 		unsigned long msr = vcpu->arch.shregs.msr;
 		if (MSR_TM_ACTIVE(msr)) {
 			kvmppc_save_tm_hv(vcpu, msr, true);
@@ -326,6 +328,9 @@  void store_vcpu_state(struct kvm_vcpu *vcpu)
 			vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
 			vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
 		}
+		vcpu->arch.load_tm++; /* see load_ebb comment for details */
+		if (!vcpu->arch.load_tm)
+			vcpu->arch.hfscr &= ~HFSCR_TM;
 	}
 }
 EXPORT_SYMBOL_GPL(store_vcpu_state);
@@ -615,8 +620,9 @@  int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 		msr |= MSR_VEC;
 	if (cpu_has_feature(CPU_FTR_VSX))
 		msr |= MSR_VSX;
-	if (cpu_has_feature(CPU_FTR_TM) ||
-	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+	if ((cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+			(vcpu->arch.hfscr & HFSCR_TM))
 		msr |= MSR_TM;
 	msr = msr_check_and_set(msr);
 	/* Save MSR for restore. This is after hard disable, so EE is clear. */