diff mbox series

[v2,2/2] powerpc/pseries: new lparcfg key/value pair: partition_affinity_score

Message ID 20200727184605.2945095-2-cheloha@linux.ibm.com (mailing list archive)
State Accepted
Commit 5d1bc776428f34941a6237afb9454061b5b5e1e1
Headers show
Series [v2,1/2] powerpc/perf: consolidate GPCI hcall structs into asm/hvcall.h | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (3f68564f1f5aca55654fda237fc01495bf050ce9)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e warning Upstream build failed, couldn't test patch
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 1 checks, 47 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Scott Cheloha July 27, 2020, 6:46 p.m. UTC
The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
Affinity_Domain_Info_By_Partition, which returns, among other things,
a "partition affinity score" for a given LPAR.  This score, a value on
[0-100], represents the processor-memory affinity for the LPAR in
question.  A score of 0 indicates the worst possible affinity while a
score of 100 indicates perfect affinity.  The score can be used to
reason about performance.

This patch adds the score for the local LPAR to the lparcfg procfile
under a new 'partition_affinity_score' key.

Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/lparcfg.c | 35 ++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

Comments

Tyrel Datwyler Aug. 5, 2020, 10:42 p.m. UTC | #1
On 7/27/20 11:46 AM, Scott Cheloha wrote:
> The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
> Affinity_Domain_Info_By_Partition, which returns, among other things,
> a "partition affinity score" for a given LPAR.  This score, a value on
> [0-100], represents the processor-memory affinity for the LPAR in
> question.  A score of 0 indicates the worst possible affinity while a
> score of 100 indicates perfect affinity.  The score can be used to
> reason about performance.
> 
> This patch adds the score for the local LPAR to the lparcfg procfile
> under a new 'partition_affinity_score' key.
> 
> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>

I was hoping Michael would chime in the first time around on this patch series
about adding another key/value pair to lparcfg. So, barring a NACK from mpe:

Reviewed-by: Tyrel Datwyler <tyreld@linux.ibm.com>

> ---
>  arch/powerpc/platforms/pseries/lparcfg.c | 35 ++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
> index b8d28ab88178..e278390ab28d 100644
> --- a/arch/powerpc/platforms/pseries/lparcfg.c
> +++ b/arch/powerpc/platforms/pseries/lparcfg.c
> @@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
>  	return rc;
>  }
>  
> +static void show_gpci_data(struct seq_file *m)
> +{
> +	struct hv_gpci_request_buffer *buf;
> +	unsigned int affinity_score;
> +	long ret;
> +
> +	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
> +	if (buf == NULL)
> +		return;
> +
> +	/*
> +	 * Show the local LPAR's affinity score.
> +	 *
> +	 * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
> +	 * The score is at byte 0xB in the output buffer.
> +	 */
> +	memset(&buf->params, 0, sizeof(buf->params));
> +	buf->params.counter_request = cpu_to_be32(0xB1);
> +	buf->params.starting_index = cpu_to_be32(-1);	/* local LPAR */
> +	buf->params.counter_info_version_in = 0x5;	/* v5+ for score */
> +	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
> +				 sizeof(*buf));
> +	if (ret != H_SUCCESS) {
> +		pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
> +			 ret, be32_to_cpu(buf->params.detail_rc));
> +		goto out;
> +	}
> +	affinity_score = buf->bytes[0xB];
> +	seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
> +out:
> +	kfree(buf);
> +}
> +
>  static unsigned h_pic(unsigned long *pool_idle_time,
>  		      unsigned long *num_procs)
>  {
> @@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
>  			   partition_active_processors * 100);
>  	}
>  
> +	show_gpci_data(m);
> +
>  	seq_printf(m, "partition_active_processors=%d\n",
>  		   partition_active_processors);
>  
>
Michael Ellerman Aug. 6, 2020, 12:44 p.m. UTC | #2
Tyrel Datwyler <tyreld@linux.ibm.com> writes:
> On 7/27/20 11:46 AM, Scott Cheloha wrote:
>> The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
>> Affinity_Domain_Info_By_Partition, which returns, among other things,
>> a "partition affinity score" for a given LPAR.  This score, a value on
>> [0-100], represents the processor-memory affinity for the LPAR in
>> question.  A score of 0 indicates the worst possible affinity while a
>> score of 100 indicates perfect affinity.  The score can be used to
>> reason about performance.
>> 
>> This patch adds the score for the local LPAR to the lparcfg procfile
>> under a new 'partition_affinity_score' key.
>> 
>> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
>
> I was hoping Michael would chime in the first time around on this patch series
> about adding another key/value pair to lparcfg.

That guy is so unreliable.

I don't love adding new stuff in lparcfg, but given the file already
exists and there's no prospect of removing it, it's probably not worth
the effort to put the new field anywhere else.

My other query with this was how on earth anyone is meant to interpret
the metric. ie. if my metric is 50, what does that mean? If it's 90
should I worry?

Which makes me realise we have no documentation for lparcfg in the
kernel at all.

So it would be nice to have it mentioned somewhere in Documentation,
even if it just points to the manpage in powerpc-ibm-utils.

cheers


> So, barring a NACK from mpe:
>
> Reviewed-by: Tyrel Datwyler <tyreld@linux.ibm.com>
>
>> ---
>>  arch/powerpc/platforms/pseries/lparcfg.c | 35 ++++++++++++++++++++++++
>>  1 file changed, 35 insertions(+)
>> 
>> diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
>> index b8d28ab88178..e278390ab28d 100644
>> --- a/arch/powerpc/platforms/pseries/lparcfg.c
>> +++ b/arch/powerpc/platforms/pseries/lparcfg.c
>> @@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
>>  	return rc;
>>  }
>>  
>> +static void show_gpci_data(struct seq_file *m)
>> +{
>> +	struct hv_gpci_request_buffer *buf;
>> +	unsigned int affinity_score;
>> +	long ret;
>> +
>> +	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
>> +	if (buf == NULL)
>> +		return;
>> +
>> +	/*
>> +	 * Show the local LPAR's affinity score.
>> +	 *
>> +	 * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
>> +	 * The score is at byte 0xB in the output buffer.
>> +	 */
>> +	memset(&buf->params, 0, sizeof(buf->params));
>> +	buf->params.counter_request = cpu_to_be32(0xB1);
>> +	buf->params.starting_index = cpu_to_be32(-1);	/* local LPAR */
>> +	buf->params.counter_info_version_in = 0x5;	/* v5+ for score */
>> +	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
>> +				 sizeof(*buf));
>> +	if (ret != H_SUCCESS) {
>> +		pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
>> +			 ret, be32_to_cpu(buf->params.detail_rc));
>> +		goto out;
>> +	}
>> +	affinity_score = buf->bytes[0xB];
>> +	seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
>> +out:
>> +	kfree(buf);
>> +}
>> +
>>  static unsigned h_pic(unsigned long *pool_idle_time,
>>  		      unsigned long *num_procs)
>>  {
>> @@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
>>  			   partition_active_processors * 100);
>>  	}
>>  
>> +	show_gpci_data(m);
>> +
>>  	seq_printf(m, "partition_active_processors=%d\n",
>>  		   partition_active_processors);
>>  
>>
Nathan Lynch Aug. 6, 2020, 3:17 p.m. UTC | #3
Michael Ellerman <mpe@ellerman.id.au> writes:
> Tyrel Datwyler <tyreld@linux.ibm.com> writes:
>> On 7/27/20 11:46 AM, Scott Cheloha wrote:
>>> The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
>>> Affinity_Domain_Info_By_Partition, which returns, among other things,
>>> a "partition affinity score" for a given LPAR.  This score, a value on
>>> [0-100], represents the processor-memory affinity for the LPAR in
>>> question.  A score of 0 indicates the worst possible affinity while a
>>> score of 100 indicates perfect affinity.  The score can be used to
>>> reason about performance.
>>> 
>>> This patch adds the score for the local LPAR to the lparcfg procfile
>>> under a new 'partition_affinity_score' key.
>>> 
>>> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
>>
>> I was hoping Michael would chime in the first time around on this patch series
>> about adding another key/value pair to lparcfg.
>
> That guy is so unreliable.
>
> I don't love adding new stuff in lparcfg, but given the file already
> exists and there's no prospect of removing it, it's probably not worth
> the effort to put the new field anywhere else.
>
> My other query with this was how on earth anyone is meant to interpret
> the metric. ie. if my metric is 50, what does that mean? If it's 90
> should I worry?

Here's some more background.

This interface is just passing up what the platform provides, and it's
identical to the partition affinity score described in the documentation
for the management console's lsmemopt command:

https://www.ibm.com/support/knowledgecenter/POWER9/p9edm/lsmemopt.html

The score is 0-100, higher values are better. To illustrate: I believe a
partition's score will be 100 (or very close to it) if all of its CPUs
and memory reside within one node. It will be lower than that when a
partition has some memory without local CPUs, and lower still when there
is no CPU-memory affinity within the partition. Beyond that I don't have
more specific information and the algorithm and scale are set by the
platform.

The intent is for this to be a metric to gather during problem
determination e.g. via sosreport or similar, but as far as Linux is
concerned this should be treated as an opaque value.
Nathan Lynch Aug. 6, 2020, 3:18 p.m. UTC | #4
Scott Cheloha <cheloha@linux.ibm.com> writes:
> The H_GetPerformanceCounterInfo (GPCI) PHYP hypercall has a subcall,
> Affinity_Domain_Info_By_Partition, which returns, among other things,
> a "partition affinity score" for a given LPAR.  This score, a value on
> [0-100], represents the processor-memory affinity for the LPAR in
> question.  A score of 0 indicates the worst possible affinity while a
> score of 100 indicates perfect affinity.  The score can be used to
> reason about performance.
>
> This patch adds the score for the local LPAR to the lparcfg procfile
> under a new 'partition_affinity_score' key.
>
> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/lparcfg.c | 35 ++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
> index b8d28ab88178..e278390ab28d 100644
> --- a/arch/powerpc/platforms/pseries/lparcfg.c
> +++ b/arch/powerpc/platforms/pseries/lparcfg.c
> @@ -136,6 +136,39 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
>  	return rc;
>  }
>  
> +static void show_gpci_data(struct seq_file *m)
> +{
> +	struct hv_gpci_request_buffer *buf;
> +	unsigned int affinity_score;
> +	long ret;
> +
> +	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
> +	if (buf == NULL)
> +		return;
> +
> +	/*
> +	 * Show the local LPAR's affinity score.
> +	 *
> +	 * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
> +	 * The score is at byte 0xB in the output buffer.
> +	 */
> +	memset(&buf->params, 0, sizeof(buf->params));
> +	buf->params.counter_request = cpu_to_be32(0xB1);
> +	buf->params.starting_index = cpu_to_be32(-1);	/* local LPAR */
> +	buf->params.counter_info_version_in = 0x5;	/* v5+ for score */
> +	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
> +				 sizeof(*buf));
> +	if (ret != H_SUCCESS) {
> +		pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
> +			 ret, be32_to_cpu(buf->params.detail_rc));
> +		goto out;
> +	}
> +	affinity_score = buf->bytes[0xB];
> +	seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
> +out:
> +	kfree(buf);
> +}
> +
>  static unsigned h_pic(unsigned long *pool_idle_time,
>  		      unsigned long *num_procs)
>  {
> @@ -487,6 +520,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
>  			   partition_active_processors * 100);
>  	}
>  
> +	show_gpci_data(m);
> +
>  	seq_printf(m, "partition_active_processors=%d\n",
>  		   partition_active_processors);

Acked-by: Nathan Lynch <nathanl@linux.ibm.com>
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index b8d28ab88178..e278390ab28d 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -136,6 +136,39 @@  static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
 	return rc;
 }
 
+static void show_gpci_data(struct seq_file *m)
+{
+	struct hv_gpci_request_buffer *buf;
+	unsigned int affinity_score;
+	long ret;
+
+	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+	if (buf == NULL)
+		return;
+
+	/*
+	 * Show the local LPAR's affinity score.
+	 *
+	 * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
+	 * The score is at byte 0xB in the output buffer.
+	 */
+	memset(&buf->params, 0, sizeof(buf->params));
+	buf->params.counter_request = cpu_to_be32(0xB1);
+	buf->params.starting_index = cpu_to_be32(-1);	/* local LPAR */
+	buf->params.counter_info_version_in = 0x5;	/* v5+ for score */
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
+				 sizeof(*buf));
+	if (ret != H_SUCCESS) {
+		pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
+			 ret, be32_to_cpu(buf->params.detail_rc));
+		goto out;
+	}
+	affinity_score = buf->bytes[0xB];
+	seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
+out:
+	kfree(buf);
+}
+
 static unsigned h_pic(unsigned long *pool_idle_time,
 		      unsigned long *num_procs)
 {
@@ -487,6 +520,8 @@  static int pseries_lparcfg_data(struct seq_file *m, void *v)
 			   partition_active_processors * 100);
 	}
 
+	show_gpci_data(m);
+
 	seq_printf(m, "partition_active_processors=%d\n",
 		   partition_active_processors);