diff mbox series

[RFC,10/43] powerpc/64s: Always set PMU control registers to frozen/disabled when not in use

Message ID 20210622105736.633352-11-npiggin@gmail.com
State New
Headers show
Series KVM: PPC: Book3S HV P9: entry/exit optimisations round 1 | expand

Commit Message

Nicholas Piggin June 22, 2021, 10:57 a.m. UTC
KVM PMU management code looks for particular frozen/disabled bits in
the PMU registers so it knows whether it must clear them when coming
out of a guest or not. Setting this up helps KVM make these optimisations
without getting confused. Longer term the better approach might be to
move guest/host PMU switching to the perf subsystem.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
 arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
 arch/powerpc/kvm/book3s_hv.c          | 5 +++++
 arch/powerpc/perf/core-book3s.c       | 7 +++++++
 4 files changed, 17 insertions(+), 5 deletions(-)

Comments

Madhavan Srinivasan July 1, 2021, 1:17 p.m. UTC | #1
On 6/22/21 4:27 PM, Nicholas Piggin wrote:
> KVM PMU management code looks for particular frozen/disabled bits in
> the PMU registers so it knows whether it must clear them when coming
> out of a guest or not. Setting this up helps KVM make these optimisations
> without getting confused. Longer term the better approach might be to
> move guest/host PMU switching to the perf subsystem.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
>   arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
>   arch/powerpc/kvm/book3s_hv.c          | 5 +++++
>   arch/powerpc/perf/core-book3s.c       | 7 +++++++
>   4 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
> index a29dc8326622..3dc61e203f37 100644
> --- a/arch/powerpc/kernel/cpu_setup_power.c
> +++ b/arch/powerpc/kernel/cpu_setup_power.c
> @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
>   static void init_PMU(void)
>   {
>   	mtspr(SPRN_MMCRA, 0);
> -	mtspr(SPRN_MMCR0, 0);
> +	mtspr(SPRN_MMCR0, MMCR0_FC);

Sticky point here is, currently if not frozen, pmc5/6 will
keep countering. And not freezing them at boot is quiet useful
sometime, like say when running in a simulation where we could calculate
approx CPIs for micro benchmarks without perf subsystem.

>   	mtspr(SPRN_MMCR1, 0);
>   	mtspr(SPRN_MMCR2, 0);
>   }
> @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
>   {
>   	mtspr(SPRN_MMCR3, 0);
>   	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>   }
>
>   /*
> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
> index 0a6b36b4bda8..06a089fbeaa7 100644
> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
> @@ -353,7 +353,7 @@ static void init_pmu_power8(void)
>   	}
>
>   	mtspr(SPRN_MMCRA, 0);
> -	mtspr(SPRN_MMCR0, 0);
> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>   	mtspr(SPRN_MMCR1, 0);
>   	mtspr(SPRN_MMCR2, 0);
>   	mtspr(SPRN_MMCRS, 0);
> @@ -392,7 +392,7 @@ static void init_pmu_power9(void)
>   		mtspr(SPRN_MMCRC, 0);
>
>   	mtspr(SPRN_MMCRA, 0);
> -	mtspr(SPRN_MMCR0, 0);
> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>   	mtspr(SPRN_MMCR1, 0);
>   	mtspr(SPRN_MMCR2, 0);
>   }
> @@ -428,7 +428,7 @@ static void init_pmu_power10(void)
>
>   	mtspr(SPRN_MMCR3, 0);
>   	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>   }
>
>   static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1f30f98b09d1..f7349d150828 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2593,6 +2593,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
>   #endif
>   #endif
>   	vcpu->arch.mmcr[0] = MMCR0_FC;
> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +		vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
> +		vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
> +	}
> +
>   	vcpu->arch.ctrl = CTRL_RUNLATCH;
>   	/* default to host PVR, since we can't spoof it */
>   	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 51622411a7cc..e33b29ec1a65 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -1361,6 +1361,13 @@ static void power_pmu_enable(struct pmu *pmu)
>   		goto out;
>
>   	if (cpuhw->n_events == 0) {
> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +			mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
> +			mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
> +		} else {
> +			mtspr(SPRN_MMCRA, 0);
> +			mtspr(SPRN_MMCR0, MMCR0_FC);
> +		}
>   		ppc_set_pmu_inuse(0);
>   		goto out;
>   	}
Nicholas Piggin July 2, 2021, 12:27 a.m. UTC | #2
Excerpts from Madhavan Srinivasan's message of July 1, 2021 11:17 pm:
> 
> On 6/22/21 4:27 PM, Nicholas Piggin wrote:
>> KVM PMU management code looks for particular frozen/disabled bits in
>> the PMU registers so it knows whether it must clear them when coming
>> out of a guest or not. Setting this up helps KVM make these optimisations
>> without getting confused. Longer term the better approach might be to
>> move guest/host PMU switching to the perf subsystem.
>>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
>>   arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
>>   arch/powerpc/kvm/book3s_hv.c          | 5 +++++
>>   arch/powerpc/perf/core-book3s.c       | 7 +++++++
>>   4 files changed, 17 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
>> index a29dc8326622..3dc61e203f37 100644
>> --- a/arch/powerpc/kernel/cpu_setup_power.c
>> +++ b/arch/powerpc/kernel/cpu_setup_power.c
>> @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
>>   static void init_PMU(void)
>>   {
>>   	mtspr(SPRN_MMCRA, 0);
>> -	mtspr(SPRN_MMCR0, 0);
>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
> 
> Sticky point here is, currently if not frozen, pmc5/6 will
> keep countering. And not freezing them at boot is quiet useful
> sometime, like say when running in a simulation where we could calculate
> approx CPIs for micro benchmarks without perf subsystem.

You even can't use the sysfs files in this sim environment? In that case
what if we added a boot option that could set some things up? In that 
case possibly you could even gather some more types of events too.

Thanks,
Nick
Nicholas Piggin July 8, 2021, 12:45 p.m. UTC | #3
Excerpts from Nicholas Piggin's message of July 2, 2021 10:27 am:
> Excerpts from Madhavan Srinivasan's message of July 1, 2021 11:17 pm:
>> 
>> On 6/22/21 4:27 PM, Nicholas Piggin wrote:
>>> KVM PMU management code looks for particular frozen/disabled bits in
>>> the PMU registers so it knows whether it must clear them when coming
>>> out of a guest or not. Setting this up helps KVM make these optimisations
>>> without getting confused. Longer term the better approach might be to
>>> move guest/host PMU switching to the perf subsystem.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>   arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
>>>   arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
>>>   arch/powerpc/kvm/book3s_hv.c          | 5 +++++
>>>   arch/powerpc/perf/core-book3s.c       | 7 +++++++
>>>   4 files changed, 17 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
>>> index a29dc8326622..3dc61e203f37 100644
>>> --- a/arch/powerpc/kernel/cpu_setup_power.c
>>> +++ b/arch/powerpc/kernel/cpu_setup_power.c
>>> @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
>>>   static void init_PMU(void)
>>>   {
>>>   	mtspr(SPRN_MMCRA, 0);
>>> -	mtspr(SPRN_MMCR0, 0);
>>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>> 
>> Sticky point here is, currently if not frozen, pmc5/6 will
>> keep countering. And not freezing them at boot is quiet useful
>> sometime, like say when running in a simulation where we could calculate
>> approx CPIs for micro benchmarks without perf subsystem.
> 
> You even can't use the sysfs files in this sim environment? In that case
> what if we added a boot option that could set some things up? In that 
> case possibly you could even gather some more types of events too.

What if we added this to allow sim environments to run PMC5/6 and 
additionally specify MMCR1 without userspace involvement?

Thanks,
Nick

---
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index af8a4981c6f6..454771243529 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2425,8 +2425,24 @@ int register_power_pmu(struct power_pmu *pmu)
 }
 
 #ifdef CONFIG_PPC64
+static bool pmu_override = false;
+static unsigned long pmu_override_val;
+static void do_pmu_override(void *data)
+{
+	ppc_set_pmu_inuse(1);
+	if (pmu_override_val)
+		mtspr(SPRN_MMCR1, pmu_override_val);
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+}
+
 static int __init init_ppc64_pmu(void)
 {
+	if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
+		printk(KERN_WARNING "perf: disabling perf due to pmu= command line option.\n");
+		on_each_cpu(do_pmu_override, NULL, 1);
+		return 0;
+	}
+
 	/* run through all the pmu drivers one at a time */
 	if (!init_power5_pmu())
 		return 0;
@@ -2448,4 +2464,23 @@ static int __init init_ppc64_pmu(void)
 		return init_generic_compat_pmu();
 }
 early_initcall(init_ppc64_pmu);
+
+static int __init pmu_setup(char *str)
+{
+	unsigned long val;
+
+	if (!early_cpu_has_feature(CPU_FTR_HVMODE))
+		return 0;
+
+	pmu_override = true;
+
+	if (kstrtoul(str, 0, &val))
+		val = 0;
+
+	pmu_override_val = val;
+
+	return 1;
+}
+__setup("pmu=", pmu_setup);
+
 #endif
Athira Rajeev July 10, 2021, 2:50 a.m. UTC | #4
> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> KVM PMU management code looks for particular frozen/disabled bits in
> the PMU registers so it knows whether it must clear them when coming
> out of a guest or not. Setting this up helps KVM make these optimisations
> without getting confused. Longer term the better approach might be to
> move guest/host PMU switching to the perf subsystem.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
> arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
> arch/powerpc/kvm/book3s_hv.c          | 5 +++++
> arch/powerpc/perf/core-book3s.c       | 7 +++++++
> 4 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
> index a29dc8326622..3dc61e203f37 100644
> --- a/arch/powerpc/kernel/cpu_setup_power.c
> +++ b/arch/powerpc/kernel/cpu_setup_power.c
> @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
> static void init_PMU(void)
> {
> 	mtspr(SPRN_MMCRA, 0);
> -	mtspr(SPRN_MMCR0, 0);
> +	mtspr(SPRN_MMCR0, MMCR0_FC);
> 	mtspr(SPRN_MMCR1, 0);
> 	mtspr(SPRN_MMCR2, 0);
> }
> @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
> {
> 	mtspr(SPRN_MMCR3, 0);
> 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
> }
> 
> /*
> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
> index 0a6b36b4bda8..06a089fbeaa7 100644
> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
> @@ -353,7 +353,7 @@ static void init_pmu_power8(void)
> 	}
> 
> 	mtspr(SPRN_MMCRA, 0);
> -	mtspr(SPRN_MMCR0, 0);
> +	mtspr(SPRN_MMCR0, MMCR0_FC);
> 	mtspr(SPRN_MMCR1, 0);
> 	mtspr(SPRN_MMCR2, 0);
> 	mtspr(SPRN_MMCRS, 0);
> @@ -392,7 +392,7 @@ static void init_pmu_power9(void)
> 		mtspr(SPRN_MMCRC, 0);
> 
> 	mtspr(SPRN_MMCRA, 0);
> -	mtspr(SPRN_MMCR0, 0);
> +	mtspr(SPRN_MMCR0, MMCR0_FC);
> 	mtspr(SPRN_MMCR1, 0);
> 	mtspr(SPRN_MMCR2, 0);
> }
> @@ -428,7 +428,7 @@ static void init_pmu_power10(void)
> 
> 	mtspr(SPRN_MMCR3, 0);
> 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
> }
> 
> static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1f30f98b09d1..f7349d150828 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2593,6 +2593,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
> #endif
> #endif
> 	vcpu->arch.mmcr[0] = MMCR0_FC;
> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +		vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
> +		vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
> +	}
> +
> 	vcpu->arch.ctrl = CTRL_RUNLATCH;
> 	/* default to host PVR, since we can't spoof it */
> 	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 51622411a7cc..e33b29ec1a65 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -1361,6 +1361,13 @@ static void power_pmu_enable(struct pmu *pmu)
> 		goto out;
> 
> 	if (cpuhw->n_events == 0) {
> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +			mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
> +			mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
> +		} else {
> +			mtspr(SPRN_MMCRA, 0);
> +			mtspr(SPRN_MMCR0, MMCR0_FC);
> +		}


Hi Nick,

We are setting these bits in “power_pmu_disable” function. And disable will be called before any event gets deleted/stopped. Can you please help to understand why this is needed in power_pmu_enable path also ?

Thanks
Athira

> 		ppc_set_pmu_inuse(0);
> 		goto out;
> 	}
> -- 
> 2.23.0
>
Nicholas Piggin July 12, 2021, 2:41 a.m. UTC | #5
Excerpts from Athira Rajeev's message of July 10, 2021 12:50 pm:
> 
> 
>> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
>> 
>> KVM PMU management code looks for particular frozen/disabled bits in
>> the PMU registers so it knows whether it must clear them when coming
>> out of a guest or not. Setting this up helps KVM make these optimisations
>> without getting confused. Longer term the better approach might be to
>> move guest/host PMU switching to the perf subsystem.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
>> arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
>> arch/powerpc/kvm/book3s_hv.c          | 5 +++++
>> arch/powerpc/perf/core-book3s.c       | 7 +++++++
>> 4 files changed, 17 insertions(+), 5 deletions(-)
>> 
>> diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
>> index a29dc8326622..3dc61e203f37 100644
>> --- a/arch/powerpc/kernel/cpu_setup_power.c
>> +++ b/arch/powerpc/kernel/cpu_setup_power.c
>> @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
>> static void init_PMU(void)
>> {
>> 	mtspr(SPRN_MMCRA, 0);
>> -	mtspr(SPRN_MMCR0, 0);
>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>> 	mtspr(SPRN_MMCR1, 0);
>> 	mtspr(SPRN_MMCR2, 0);
>> }
>> @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
>> {
>> 	mtspr(SPRN_MMCR3, 0);
>> 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
>> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>> }
>> 
>> /*
>> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
>> index 0a6b36b4bda8..06a089fbeaa7 100644
>> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
>> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
>> @@ -353,7 +353,7 @@ static void init_pmu_power8(void)
>> 	}
>> 
>> 	mtspr(SPRN_MMCRA, 0);
>> -	mtspr(SPRN_MMCR0, 0);
>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>> 	mtspr(SPRN_MMCR1, 0);
>> 	mtspr(SPRN_MMCR2, 0);
>> 	mtspr(SPRN_MMCRS, 0);
>> @@ -392,7 +392,7 @@ static void init_pmu_power9(void)
>> 		mtspr(SPRN_MMCRC, 0);
>> 
>> 	mtspr(SPRN_MMCRA, 0);
>> -	mtspr(SPRN_MMCR0, 0);
>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>> 	mtspr(SPRN_MMCR1, 0);
>> 	mtspr(SPRN_MMCR2, 0);
>> }
>> @@ -428,7 +428,7 @@ static void init_pmu_power10(void)
>> 
>> 	mtspr(SPRN_MMCR3, 0);
>> 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
>> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>> }
>> 
>> static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 1f30f98b09d1..f7349d150828 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -2593,6 +2593,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
>> #endif
>> #endif
>> 	vcpu->arch.mmcr[0] = MMCR0_FC;
>> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
>> +		vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
>> +		vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
>> +	}
>> +
>> 	vcpu->arch.ctrl = CTRL_RUNLATCH;
>> 	/* default to host PVR, since we can't spoof it */
>> 	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>> index 51622411a7cc..e33b29ec1a65 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -1361,6 +1361,13 @@ static void power_pmu_enable(struct pmu *pmu)
>> 		goto out;
>> 
>> 	if (cpuhw->n_events == 0) {
>> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
>> +			mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>> +			mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>> +		} else {
>> +			mtspr(SPRN_MMCRA, 0);
>> +			mtspr(SPRN_MMCR0, MMCR0_FC);
>> +		}
> 
> 
> Hi Nick,
> 
> We are setting these bits in “power_pmu_disable” function. And disable will be called before any event gets deleted/stopped. Can you please help to understand why this is needed in power_pmu_enable path also ?

I'll have to go back and check what I needed it for.

Basically what I'm trying to achieve is that when the guest and host 
are not running perf, then the registers should match. This way the host 
can test them with mfspr but does not need to fix them with mtspr.

It's not too important for performance after TM facility demand faulting
and the nested guest PMU fix means you can usually just skip that 
entirely, but I think it's cleaner. I'll double check my perf/ changes
it's possible that part should be split out or is unnecessary.

Thanks,
Nick
Madhavan Srinivasan July 12, 2021, 3:42 a.m. UTC | #6
On 7/2/21 5:57 AM, Nicholas Piggin wrote:
> Excerpts from Madhavan Srinivasan's message of July 1, 2021 11:17 pm:
>> On 6/22/21 4:27 PM, Nicholas Piggin wrote:
>>> KVM PMU management code looks for particular frozen/disabled bits in
>>> the PMU registers so it knows whether it must clear them when coming
>>> out of a guest or not. Setting this up helps KVM make these optimisations
>>> without getting confused. Longer term the better approach might be to
>>> move guest/host PMU switching to the perf subsystem.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>    arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
>>>    arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
>>>    arch/powerpc/kvm/book3s_hv.c          | 5 +++++
>>>    arch/powerpc/perf/core-book3s.c       | 7 +++++++
>>>    4 files changed, 17 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
>>> index a29dc8326622..3dc61e203f37 100644
>>> --- a/arch/powerpc/kernel/cpu_setup_power.c
>>> +++ b/arch/powerpc/kernel/cpu_setup_power.c
>>> @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
>>>    static void init_PMU(void)
>>>    {
>>>    	mtspr(SPRN_MMCRA, 0);
>>> -	mtspr(SPRN_MMCR0, 0);
>>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>> Sticky point here is, currently if not frozen, pmc5/6 will
>> keep countering. And not freezing them at boot is quiet useful
>> sometime, like say when running in a simulation where we could calculate
>> approx CPIs for micro benchmarks without perf subsystem.

Sorry for a delayed response

> You even can't use the sysfs files in this sim environment? In that case

Yes it is possible. Just that we need to have additional patch to maintain
for the sim.
> what if we added a boot option that could set some things up? In that
> case possibly you could even gather some more types of events too.

Having a boot option will be a over-kill :). I guess we could have
this under config option?

>
> Thanks,
> Nick
Nicholas Piggin July 14, 2021, 12:39 p.m. UTC | #7
Excerpts from Nicholas Piggin's message of July 12, 2021 12:41 pm:
> Excerpts from Athira Rajeev's message of July 10, 2021 12:50 pm:
>> 
>> 
>>> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
>>> 
>>> KVM PMU management code looks for particular frozen/disabled bits in
>>> the PMU registers so it knows whether it must clear them when coming
>>> out of a guest or not. Setting this up helps KVM make these optimisations
>>> without getting confused. Longer term the better approach might be to
>>> move guest/host PMU switching to the perf subsystem.
>>> 
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>> arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
>>> arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
>>> arch/powerpc/kvm/book3s_hv.c          | 5 +++++
>>> arch/powerpc/perf/core-book3s.c       | 7 +++++++
>>> 4 files changed, 17 insertions(+), 5 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
>>> index a29dc8326622..3dc61e203f37 100644
>>> --- a/arch/powerpc/kernel/cpu_setup_power.c
>>> +++ b/arch/powerpc/kernel/cpu_setup_power.c
>>> @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
>>> static void init_PMU(void)
>>> {
>>> 	mtspr(SPRN_MMCRA, 0);
>>> -	mtspr(SPRN_MMCR0, 0);
>>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>>> 	mtspr(SPRN_MMCR1, 0);
>>> 	mtspr(SPRN_MMCR2, 0);
>>> }
>>> @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
>>> {
>>> 	mtspr(SPRN_MMCR3, 0);
>>> 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>>> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
>>> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>>> }
>>> 
>>> /*
>>> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
>>> index 0a6b36b4bda8..06a089fbeaa7 100644
>>> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
>>> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
>>> @@ -353,7 +353,7 @@ static void init_pmu_power8(void)
>>> 	}
>>> 
>>> 	mtspr(SPRN_MMCRA, 0);
>>> -	mtspr(SPRN_MMCR0, 0);
>>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>>> 	mtspr(SPRN_MMCR1, 0);
>>> 	mtspr(SPRN_MMCR2, 0);
>>> 	mtspr(SPRN_MMCRS, 0);
>>> @@ -392,7 +392,7 @@ static void init_pmu_power9(void)
>>> 		mtspr(SPRN_MMCRC, 0);
>>> 
>>> 	mtspr(SPRN_MMCRA, 0);
>>> -	mtspr(SPRN_MMCR0, 0);
>>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>>> 	mtspr(SPRN_MMCR1, 0);
>>> 	mtspr(SPRN_MMCR2, 0);
>>> }
>>> @@ -428,7 +428,7 @@ static void init_pmu_power10(void)
>>> 
>>> 	mtspr(SPRN_MMCR3, 0);
>>> 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>>> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
>>> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>>> }
>>> 
>>> static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index 1f30f98b09d1..f7349d150828 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -2593,6 +2593,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
>>> #endif
>>> #endif
>>> 	vcpu->arch.mmcr[0] = MMCR0_FC;
>>> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
>>> +		vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
>>> +		vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
>>> +	}
>>> +
>>> 	vcpu->arch.ctrl = CTRL_RUNLATCH;
>>> 	/* default to host PVR, since we can't spoof it */
>>> 	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
>>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>>> index 51622411a7cc..e33b29ec1a65 100644
>>> --- a/arch/powerpc/perf/core-book3s.c
>>> +++ b/arch/powerpc/perf/core-book3s.c
>>> @@ -1361,6 +1361,13 @@ static void power_pmu_enable(struct pmu *pmu)
>>> 		goto out;
>>> 
>>> 	if (cpuhw->n_events == 0) {
>>> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
>>> +			mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>>> +			mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>>> +		} else {
>>> +			mtspr(SPRN_MMCRA, 0);
>>> +			mtspr(SPRN_MMCR0, MMCR0_FC);
>>> +		}
>> 
>> 
>> Hi Nick,
>> 
>> We are setting these bits in “power_pmu_disable” function. And disable will be called before any event gets deleted/stopped. Can you please help to understand why this is needed in power_pmu_enable path also ?
> 
> I'll have to go back and check what I needed it for.

Okay, MMCRA is getting MMCRA_SDAR_MODE_DCACHE set on POWER9, by the looks.

That's not necessarily a problem, but KVM sets MMCRA to 0 to disable
SDAR updates. So KVM and perf don't agree on what the "correct" value
for disabled is. Which could be a problem with POWER10 not setting BHRB
disable before my series.

I'll get rid of this hunk for now, I expect things won't be exactly clean
or consistent until the KVM host PMU code is moved into perf/ though.

Thanks,
Nick
Madhavan Srinivasan July 16, 2021, 3:43 a.m. UTC | #8
On 7/14/21 6:09 PM, Nicholas Piggin wrote:
> Excerpts from Nicholas Piggin's message of July 12, 2021 12:41 pm:
>> Excerpts from Athira Rajeev's message of July 10, 2021 12:50 pm:
>>>
>>>> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
>>>>
>>>> KVM PMU management code looks for particular frozen/disabled bits in
>>>> the PMU registers so it knows whether it must clear them when coming
>>>> out of a guest or not. Setting this up helps KVM make these optimisations
>>>> without getting confused. Longer term the better approach might be to
>>>> move guest/host PMU switching to the perf subsystem.
>>>>
>>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>>> ---
>>>> arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
>>>> arch/powerpc/kernel/dt_cpu_ftrs.c     | 6 +++---
>>>> arch/powerpc/kvm/book3s_hv.c          | 5 +++++
>>>> arch/powerpc/perf/core-book3s.c       | 7 +++++++
>>>> 4 files changed, 17 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
>>>> index a29dc8326622..3dc61e203f37 100644
>>>> --- a/arch/powerpc/kernel/cpu_setup_power.c
>>>> +++ b/arch/powerpc/kernel/cpu_setup_power.c
>>>> @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
>>>> static void init_PMU(void)
>>>> {
>>>> 	mtspr(SPRN_MMCRA, 0);
>>>> -	mtspr(SPRN_MMCR0, 0);
>>>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>>>> 	mtspr(SPRN_MMCR1, 0);
>>>> 	mtspr(SPRN_MMCR2, 0);
>>>> }
>>>> @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
>>>> {
>>>> 	mtspr(SPRN_MMCR3, 0);
>>>> 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>>>> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
>>>> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>>>> }
>>>>
>>>> /*
>>>> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
>>>> index 0a6b36b4bda8..06a089fbeaa7 100644
>>>> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
>>>> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
>>>> @@ -353,7 +353,7 @@ static void init_pmu_power8(void)
>>>> 	}
>>>>
>>>> 	mtspr(SPRN_MMCRA, 0);
>>>> -	mtspr(SPRN_MMCR0, 0);
>>>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>>>> 	mtspr(SPRN_MMCR1, 0);
>>>> 	mtspr(SPRN_MMCR2, 0);
>>>> 	mtspr(SPRN_MMCRS, 0);
>>>> @@ -392,7 +392,7 @@ static void init_pmu_power9(void)
>>>> 		mtspr(SPRN_MMCRC, 0);
>>>>
>>>> 	mtspr(SPRN_MMCRA, 0);
>>>> -	mtspr(SPRN_MMCR0, 0);
>>>> +	mtspr(SPRN_MMCR0, MMCR0_FC);
>>>> 	mtspr(SPRN_MMCR1, 0);
>>>> 	mtspr(SPRN_MMCR2, 0);
>>>> }
>>>> @@ -428,7 +428,7 @@ static void init_pmu_power10(void)
>>>>
>>>> 	mtspr(SPRN_MMCR3, 0);
>>>> 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>>>> -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
>>>> +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>>>> }
>>>>
>>>> static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>> index 1f30f98b09d1..f7349d150828 100644
>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>> @@ -2593,6 +2593,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
>>>> #endif
>>>> #endif
>>>> 	vcpu->arch.mmcr[0] = MMCR0_FC;
>>>> +	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
>>>> +		vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
>>>> +		vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
>>>> +	}
>>>> +
>>>> 	vcpu->arch.ctrl = CTRL_RUNLATCH;
>>>> 	/* default to host PVR, since we can't spoof it */
>>>> 	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
>>>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>>>> index 51622411a7cc..e33b29ec1a65 100644
>>>> --- a/arch/powerpc/perf/core-book3s.c
>>>> +++ b/arch/powerpc/perf/core-book3s.c
>>>> @@ -1361,6 +1361,13 @@ static void power_pmu_enable(struct pmu *pmu)
>>>> 		goto out;
>>>>
>>>> 	if (cpuhw->n_events == 0) {
>>>> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
>>>> +			mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
>>>> +			mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
>>>> +		} else {
>>>> +			mtspr(SPRN_MMCRA, 0);
>>>> +			mtspr(SPRN_MMCR0, MMCR0_FC);
>>>> +		}
>>>
>>> Hi Nick,
>>>
>>> We are setting these bits in “power_pmu_disable” function. And disable will be called before any event gets deleted/stopped. Can you please help to understand why this is needed in power_pmu_enable path also ?
>> I'll have to go back and check what I needed it for.
> Okay, MMCRA is getting MMCRA_SDAR_MODE_DCACHE set on POWER9, by the looks.
>
> That's not necessarily a problem, but KVM sets MMCRA to 0 to disable
> SDAR updates. So KVM and perf don't agree on what the "correct" value
> for disabled is. Which could be a problem with POWER10 not setting BHRB
> disable before my series.

Nick,

BHRB disable will be only known to P10 guest kernel and in case of
P9 guest kernel it is not known. And for the P10 host and guest,
I guess we set BHRB_DISABLE in cpu_setup code and also in 
power_pmu_disable().
So you areseeing P10 guest having MMCRA as zero during context switch?

Maddy
>
> I'll get rid of this hunk for now, I expect things won't be exactly clean
> or consistent until the KVM host PMU code is moved into perf/ though.
>
> Thanks,
> Nick
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
index a29dc8326622..3dc61e203f37 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -109,7 +109,7 @@  static void init_PMU_HV_ISA207(void)
 static void init_PMU(void)
 {
 	mtspr(SPRN_MMCRA, 0);
-	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
 	mtspr(SPRN_MMCR1, 0);
 	mtspr(SPRN_MMCR2, 0);
 }
@@ -123,7 +123,7 @@  static void init_PMU_ISA31(void)
 {
 	mtspr(SPRN_MMCR3, 0);
 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
-	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
 }
 
 /*
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 0a6b36b4bda8..06a089fbeaa7 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -353,7 +353,7 @@  static void init_pmu_power8(void)
 	}
 
 	mtspr(SPRN_MMCRA, 0);
-	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
 	mtspr(SPRN_MMCR1, 0);
 	mtspr(SPRN_MMCR2, 0);
 	mtspr(SPRN_MMCRS, 0);
@@ -392,7 +392,7 @@  static void init_pmu_power9(void)
 		mtspr(SPRN_MMCRC, 0);
 
 	mtspr(SPRN_MMCRA, 0);
-	mtspr(SPRN_MMCR0, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
 	mtspr(SPRN_MMCR1, 0);
 	mtspr(SPRN_MMCR2, 0);
 }
@@ -428,7 +428,7 @@  static void init_pmu_power10(void)
 
 	mtspr(SPRN_MMCR3, 0);
 	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
-	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
 }
 
 static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1f30f98b09d1..f7349d150828 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2593,6 +2593,11 @@  static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 #endif
 #endif
 	vcpu->arch.mmcr[0] = MMCR0_FC;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
+		vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
+	}
+
 	vcpu->arch.ctrl = CTRL_RUNLATCH;
 	/* default to host PVR, since we can't spoof it */
 	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 51622411a7cc..e33b29ec1a65 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1361,6 +1361,13 @@  static void power_pmu_enable(struct pmu *pmu)
 		goto out;
 
 	if (cpuhw->n_events == 0) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+			mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
+		} else {
+			mtspr(SPRN_MMCRA, 0);
+			mtspr(SPRN_MMCR0, MMCR0_FC);
+		}
 		ppc_set_pmu_inuse(0);
 		goto out;
 	}