diff mbox series

[2/3] powerpc/sysfs: Show idle_purr and idle_spurr for every CPU

Message ID 1574856072-30972-3-git-send-email-ego@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show
Series pseries: Track and expose idle PURR and SPURR ticks | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (2ec2260ce7bce5eb6a8ced0bb78d75c1b3eca306)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 44 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Gautham R Shenoy Nov. 27, 2019, 12:01 p.m. UTC
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>

On Pseries LPARs, to calculate utilization, we need to know the
[S]PURR ticks when the CPUs were busy or idle.

The total PURR and SPURR ticks are already exposed via the per-cpu
sysfs files /sys/devices/system/cpu/cpuX/purr and
/sys/devices/system/cpu/cpuX/spurr.

This patch adds support for exposing the idle PURR and SPURR ticks via
/sys/devices/system/cpu/cpuX/idle_purr and
/sys/devices/system/cpu/cpuX/idle_spurr.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/sysfs.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

Comments

Kamalesh Babulal Dec. 3, 2019, 1:37 p.m. UTC | #1
On 11/27/19 5:31 PM, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> On Pseries LPARs, to calculate utilization, we need to know the
> [S]PURR ticks when the CPUs were busy or idle.
> 
> The total PURR and SPURR ticks are already exposed via the per-cpu
> sysfs files /sys/devices/system/cpu/cpuX/purr and
> /sys/devices/system/cpu/cpuX/spurr.
> 
> This patch adds support for exposing the idle PURR and SPURR ticks via
> /sys/devices/system/cpu/cpuX/idle_purr and
> /sys/devices/system/cpu/cpuX/idle_spurr.

The patch looks good to me, with a minor file mode nit pick mentioned below.

> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/sysfs.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 80a676d..42ade55 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -1044,6 +1044,36 @@ static ssize_t show_physical_id(struct device *dev,
>  }
>  static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
> 
> +static ssize_t idle_purr_show(struct device *dev,
> +			      struct device_attribute *attr, char *buf)
> +{
> +	struct cpu *cpu = container_of(dev, struct cpu, dev);
> +	unsigned int cpuid = cpu->dev.id;
> +	struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
> +	u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
> +
> +	return sprintf(buf, "%llx\n", idle_purr_cycles);
> +}
> +static DEVICE_ATTR_RO(idle_purr);

per cpu purr/spurr sysfs file is created with file mode 0400. Using
DEVICE_ATTR_RO for their idle_* variants will create sysfs files with 0444 as
their file mode, you should probably use DEVICE_ATTR() with file mode 0400 to
have consist permission for both variants.
kernel test robot Dec. 3, 2019, 9:02 p.m. UTC | #2
Hi "Gautham,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v5.4 next-20191203]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Gautham-R-Shenoy/powerpc-pseries-Account-for-SPURR-ticks-on-idle-CPUs/20191127-234537
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allnoconfig (attached as .config)
compiler: powerpc-linux-gcc (GCC) 7.5.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.5.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

Note: the linux-review/Gautham-R-Shenoy/powerpc-pseries-Account-for-SPURR-ticks-on-idle-CPUs/20191127-234537 HEAD 54932d09dae77a0b15c47c7e51f0fb6e7d34900f builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   arch/powerpc/kernel/sysfs.c: In function 'idle_purr_show':
>> arch/powerpc/kernel/sysfs.c:1052:34: error: 'paca_ptrs' undeclared (first use in this function); did you mean 'hash_ptr'?
     struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
                                     ^~~~~~~~~
                                     hash_ptr
   arch/powerpc/kernel/sysfs.c:1052:34: note: each undeclared identifier is reported only once for each function it appears in
   In file included from include/linux/byteorder/big_endian.h:5:0,
                    from arch/powerpc/include/uapi/asm/byteorder.h:14,
                    from include/asm-generic/bitops/le.h:6,
                    from arch/powerpc/include/asm/bitops.h:243,
                    from include/linux/bitops.h:26,
                    from include/linux/kernel.h:12,
                    from include/linux/list.h:9,
                    from include/linux/kobject.h:19,
                    from include/linux/device.h:16,
                    from arch/powerpc/kernel/sysfs.c:2:
>> arch/powerpc/kernel/sysfs.c:1053:51: error: dereferencing pointer to incomplete type 'struct lppaca'
     u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
                                                      ^
   include/uapi/linux/byteorder/big_endian.h:38:51: note: in definition of macro '__be64_to_cpu'
    #define __be64_to_cpu(x) ((__force __u64)(__be64)(x))
                                                      ^
   arch/powerpc/kernel/sysfs.c:1053:25: note: in expansion of macro 'be64_to_cpu'
     u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
                            ^~~~~~~~~~~

vim +1052 arch/powerpc/kernel/sysfs.c

  1046	
  1047	static ssize_t idle_purr_show(struct device *dev,
  1048				      struct device_attribute *attr, char *buf)
  1049	{
  1050		struct cpu *cpu = container_of(dev, struct cpu, dev);
  1051		unsigned int cpuid = cpu->dev.id;
> 1052		struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
> 1053		u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
  1054	
  1055		return sprintf(buf, "%llx\n", idle_purr_cycles);
  1056	}
  1057	static DEVICE_ATTR_RO(idle_purr);
  1058	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
Gautham R Shenoy Dec. 4, 2019, 12:37 p.m. UTC | #3
Hi Kamalesh,

On Tue, Dec 03, 2019 at 07:07:53PM +0530, Kamalesh Babulal wrote:
> On 11/27/19 5:31 PM, Gautham R. Shenoy wrote:
> > From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> > 
> > On Pseries LPARs, to calculate utilization, we need to know the
> > [S]PURR ticks when the CPUs were busy or idle.
> > 
> > The total PURR and SPURR ticks are already exposed via the per-cpu
> > sysfs files /sys/devices/system/cpu/cpuX/purr and
> > /sys/devices/system/cpu/cpuX/spurr.
> > 
> > This patch adds support for exposing the idle PURR and SPURR ticks via
> > /sys/devices/system/cpu/cpuX/idle_purr and
> > /sys/devices/system/cpu/cpuX/idle_spurr.
> 
> The patch looks good to me, with a minor file mode nit pick mentioned below.
> 
> > 
> > Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > ---
> >  arch/powerpc/kernel/sysfs.c | 32 ++++++++++++++++++++++++++++++++
> >  1 file changed, 32 insertions(+)
> > 
> > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> > index 80a676d..42ade55 100644
> > --- a/arch/powerpc/kernel/sysfs.c
> > +++ b/arch/powerpc/kernel/sysfs.c
> > @@ -1044,6 +1044,36 @@ static ssize_t show_physical_id(struct device *dev,
> >  }
> >  static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
> > 
> > +static ssize_t idle_purr_show(struct device *dev,
> > +			      struct device_attribute *attr, char *buf)
> > +{
> > +	struct cpu *cpu = container_of(dev, struct cpu, dev);
> > +	unsigned int cpuid = cpu->dev.id;
> > +	struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
> > +	u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
> > +
> > +	return sprintf(buf, "%llx\n", idle_purr_cycles);
> > +}
> > +static DEVICE_ATTR_RO(idle_purr);
> 
> per cpu purr/spurr sysfs file is created with file mode 0400. Using
> DEVICE_ATTR_RO for their idle_* variants will create sysfs files with 0444 as
> their file mode, you should probably use DEVICE_ATTR() with file mode 0400 to
> have consist permission for both variants.

Thanks for catching this. I missed checking the permissions of purr
and spurr. Will send another version.


> 
> -- 
> Kamalesh
Nathan Lynch Dec. 4, 2019, 10:24 p.m. UTC | #4
"Gautham R. Shenoy" <ego@linux.vnet.ibm.com> writes:
> @@ -1067,6 +1097,8 @@ static int __init topology_init(void)
>  			register_cpu(c, cpu);
>  
>  			device_create_file(&c->dev, &dev_attr_physical_id);
> +			if (firmware_has_feature(FW_FEATURE_SPLPAR))
> +				create_idle_purr_spurr_sysfs_entry(&c->dev);

Architecturally speaking PURR/SPURR aren't strongly linked to the PAPR
SPLPAR option, are they? I'm not sure it's right for these attributes to
be absent if the platform does not support shared processor mode.
Naveen N. Rao Dec. 5, 2019, 4:53 p.m. UTC | #5
Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> On Pseries LPARs, to calculate utilization, we need to know the
> [S]PURR ticks when the CPUs were busy or idle.
> 
> The total PURR and SPURR ticks are already exposed via the per-cpu
> sysfs files /sys/devices/system/cpu/cpuX/purr and
> /sys/devices/system/cpu/cpuX/spurr.
> 
> This patch adds support for exposing the idle PURR and SPURR ticks via
> /sys/devices/system/cpu/cpuX/idle_purr and
> /sys/devices/system/cpu/cpuX/idle_spurr.
> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/sysfs.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 80a676d..42ade55 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -1044,6 +1044,36 @@ static ssize_t show_physical_id(struct device *dev,
>  }
>  static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
> 
> +static ssize_t idle_purr_show(struct device *dev,
> +			      struct device_attribute *attr, char *buf)
> +{
> +	struct cpu *cpu = container_of(dev, struct cpu, dev);
> +	unsigned int cpuid = cpu->dev.id;
> +	struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
> +	u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
> +
> +	return sprintf(buf, "%llx\n", idle_purr_cycles);
> +}
> +static DEVICE_ATTR_RO(idle_purr);
> +
> +DECLARE_PER_CPU(u64, idle_spurr_cycles);
> +static ssize_t idle_spurr_show(struct device *dev,
> +			       struct device_attribute *attr, char *buf)
> +{
> +	struct cpu *cpu = container_of(dev, struct cpu, dev);
> +	unsigned int cpuid = cpu->dev.id;
> +	u64 *idle_spurr_cycles_ptr = per_cpu_ptr(&idle_spurr_cycles, cpuid);

Is it possible for a user to read stale values if a particular cpu is in 
an extended cede? Is it possible to use smp_call_function_single() to 
force the cpu out of idle?

- Naveen
Gautham R Shenoy Feb. 3, 2020, 4:47 a.m. UTC | #6
Hello Nathan,


On Wed, Dec 04, 2019 at 04:24:31PM -0600, Nathan Lynch wrote:
> "Gautham R. Shenoy" <ego@linux.vnet.ibm.com> writes:
> > @@ -1067,6 +1097,8 @@ static int __init topology_init(void)
> >  			register_cpu(c, cpu);
> >  
> >  			device_create_file(&c->dev, &dev_attr_physical_id);
> > +			if (firmware_has_feature(FW_FEATURE_SPLPAR))
> > +				create_idle_purr_spurr_sysfs_entry(&c->dev);
> 
> Architecturally speaking PURR/SPURR aren't strongly linked to the PAPR
> SPLPAR option, are they? I'm not sure it's right for these attributes to
> be absent if the platform does not support shared processor mode.

Doesn't FW_FEATURE_SPLPAR refer to all Pseries guests ? It is perhaps
incorrectly named, but from the other uses in the kernel, it seems to
indicate that we are running as a guest instead of on a bare-metal
system.

--
Thanks and Regards
gautham.
Gautham R Shenoy Feb. 3, 2020, 4:50 a.m. UTC | #7
Hi Naveen,

On Thu, Dec 05, 2019 at 10:23:58PM +0530, Naveen N. Rao wrote:
> >diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> >index 80a676d..42ade55 100644
> >--- a/arch/powerpc/kernel/sysfs.c
> >+++ b/arch/powerpc/kernel/sysfs.c
> >@@ -1044,6 +1044,36 @@ static ssize_t show_physical_id(struct device *dev,
> > }
> > static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
> >
> >+static ssize_t idle_purr_show(struct device *dev,
> >+			      struct device_attribute *attr, char *buf)
> >+{
> >+	struct cpu *cpu = container_of(dev, struct cpu, dev);
> >+	unsigned int cpuid = cpu->dev.id;
> >+	struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
> >+	u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
> >+
> >+	return sprintf(buf, "%llx\n", idle_purr_cycles);
> >+}
> >+static DEVICE_ATTR_RO(idle_purr);
> >+
> >+DECLARE_PER_CPU(u64, idle_spurr_cycles);
> >+static ssize_t idle_spurr_show(struct device *dev,
> >+			       struct device_attribute *attr, char *buf)
> >+{
> >+	struct cpu *cpu = container_of(dev, struct cpu, dev);
> >+	unsigned int cpuid = cpu->dev.id;
> >+	u64 *idle_spurr_cycles_ptr = per_cpu_ptr(&idle_spurr_cycles, cpuid);
> 
> Is it possible for a user to read stale values if a particular cpu is in an
> extended cede? Is it possible to use smp_call_function_single() to force the
> cpu out of idle?

Yes, if the CPU whose idle_spurr cycle is being read is still in idle,
then we will miss reporting the delta spurr cycles for this last
idle-duration. Yes, we can use an smp_call_function_single(), though
that will introduce IPI noise. How often will idle_[s]purr be read ?

> 
> - Naveen
>

--
Thanks and Regards
gautham.
Naveen N. Rao Feb. 4, 2020, 7:52 a.m. UTC | #8
Gautham R Shenoy wrote:
> Hi Naveen,
> 
> On Thu, Dec 05, 2019 at 10:23:58PM +0530, Naveen N. Rao wrote:
>> >diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
>> >index 80a676d..42ade55 100644
>> >--- a/arch/powerpc/kernel/sysfs.c
>> >+++ b/arch/powerpc/kernel/sysfs.c
>> >@@ -1044,6 +1044,36 @@ static ssize_t show_physical_id(struct device *dev,
>> > }
>> > static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
>> >
>> >+static ssize_t idle_purr_show(struct device *dev,
>> >+			      struct device_attribute *attr, char *buf)
>> >+{
>> >+	struct cpu *cpu = container_of(dev, struct cpu, dev);
>> >+	unsigned int cpuid = cpu->dev.id;
>> >+	struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
>> >+	u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
>> >+
>> >+	return sprintf(buf, "%llx\n", idle_purr_cycles);
>> >+}
>> >+static DEVICE_ATTR_RO(idle_purr);
>> >+
>> >+DECLARE_PER_CPU(u64, idle_spurr_cycles);
>> >+static ssize_t idle_spurr_show(struct device *dev,
>> >+			       struct device_attribute *attr, char *buf)
>> >+{
>> >+	struct cpu *cpu = container_of(dev, struct cpu, dev);
>> >+	unsigned int cpuid = cpu->dev.id;
>> >+	u64 *idle_spurr_cycles_ptr = per_cpu_ptr(&idle_spurr_cycles, cpuid);
>> 
>> Is it possible for a user to read stale values if a particular cpu is in an
>> extended cede? Is it possible to use smp_call_function_single() to force the
>> cpu out of idle?
> 
> Yes, if the CPU whose idle_spurr cycle is being read is still in idle,
> then we will miss reporting the delta spurr cycles for this last
> idle-duration. Yes, we can use an smp_call_function_single(), though
> that will introduce IPI noise. How often will idle_[s]purr be read ?

Since it is possible for a cpu to go into extended cede for multiple 
seconds during which time it is possible to mis-report utilization, I 
think it is better to ensure that the sysfs interface for idle_[s]purr 
report the proper values through use of IPI.

With repect to lparstat, the read interval is user-specified and just 
gets passed onto sleep().

- Naveen
Gautham R Shenoy Feb. 5, 2020, 4:19 a.m. UTC | #9
Hi Naveen,

On Tue, Feb 04, 2020 at 01:22:19PM +0530, Naveen N. Rao wrote:
> Gautham R Shenoy wrote:
> >Hi Naveen,
> >
> >On Thu, Dec 05, 2019 at 10:23:58PM +0530, Naveen N. Rao wrote:
> >>>diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> >>>index 80a676d..42ade55 100644
> >>>--- a/arch/powerpc/kernel/sysfs.c
> >>>+++ b/arch/powerpc/kernel/sysfs.c
> >>>@@ -1044,6 +1044,36 @@ static ssize_t show_physical_id(struct device *dev,
> >>> }
> >>> static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
> >>>
> >>>+static ssize_t idle_purr_show(struct device *dev,
> >>>+			      struct device_attribute *attr, char *buf)
> >>>+{
> >>>+	struct cpu *cpu = container_of(dev, struct cpu, dev);
> >>>+	unsigned int cpuid = cpu->dev.id;
> >>>+	struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
> >>>+	u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
> >>>+
> >>>+	return sprintf(buf, "%llx\n", idle_purr_cycles);
> >>>+}
> >>>+static DEVICE_ATTR_RO(idle_purr);
> >>>+
> >>>+DECLARE_PER_CPU(u64, idle_spurr_cycles);
> >>>+static ssize_t idle_spurr_show(struct device *dev,
> >>>+			       struct device_attribute *attr, char *buf)
> >>>+{
> >>>+	struct cpu *cpu = container_of(dev, struct cpu, dev);
> >>>+	unsigned int cpuid = cpu->dev.id;
> >>>+	u64 *idle_spurr_cycles_ptr = per_cpu_ptr(&idle_spurr_cycles, cpuid);
> >>
> >>Is it possible for a user to read stale values if a particular cpu is in an
> >>extended cede? Is it possible to use smp_call_function_single() to force the
> >>cpu out of idle?
> >
> >Yes, if the CPU whose idle_spurr cycle is being read is still in idle,
> >then we will miss reporting the delta spurr cycles for this last
> >idle-duration. Yes, we can use an smp_call_function_single(), though
> >that will introduce IPI noise. How often will idle_[s]purr be read ?
> 
> Since it is possible for a cpu to go into extended cede for multiple seconds
> during which time it is possible to mis-report utilization, I think it is
> better to ensure that the sysfs interface for idle_[s]purr report the proper
> values through use of IPI.
>

Fair enough.


> With repect to lparstat, the read interval is user-specified and just gets
> passed onto sleep().

Ok. So I guess currently you will be sending smp_call_function every
time you read a PURR and SPURR. That number will now increase by 2
times when we read idle_purr and idle_spurr.


> 
> - Naveen
> 

--
Thanks and Regards
gautham.
Naveen N. Rao Feb. 5, 2020, 6:58 a.m. UTC | #10
Gautham R Shenoy wrote:
> 
>> With repect to lparstat, the read interval is user-specified and just gets
>> passed onto sleep().
> 
> Ok. So I guess currently you will be sending smp_call_function every
> time you read a PURR and SPURR. That number will now increase by 2
> times when we read idle_purr and idle_spurr.

Yes, not really efficient. I just wanted to point out that we can't have 
stale data being returned if we choose to add another sysfs file.

We should be able to use any other interface too, if you have a 
different interface in mind.


- Naveen
Christophe Leroy Feb. 5, 2020, 7:08 a.m. UTC | #11
Le 27/11/2019 à 13:01, Gautham R. Shenoy a écrit :
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> On Pseries LPARs, to calculate utilization, we need to know the
> [S]PURR ticks when the CPUs were busy or idle.
> 
> The total PURR and SPURR ticks are already exposed via the per-cpu
> sysfs files /sys/devices/system/cpu/cpuX/purr and
> /sys/devices/system/cpu/cpuX/spurr.
> 
> This patch adds support for exposing the idle PURR and SPURR ticks via
> /sys/devices/system/cpu/cpuX/idle_purr and
> /sys/devices/system/cpu/cpuX/idle_spurr.

Might be a candid question, but I see in arch/powerpc/kernel/time.c that 
PURR/SPURR are already taken into account by the kernel to calculate 
utilisation when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is selected.

As far as I understand, you are wanting to expose this to userland to 
redo the calculation there. What is wrong with the values reported by 
the kernel ?

Christophe

> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>   arch/powerpc/kernel/sysfs.c | 32 ++++++++++++++++++++++++++++++++
>   1 file changed, 32 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 80a676d..42ade55 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -1044,6 +1044,36 @@ static ssize_t show_physical_id(struct device *dev,
>   }
>   static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
>   
> +static ssize_t idle_purr_show(struct device *dev,
> +			      struct device_attribute *attr, char *buf)
> +{
> +	struct cpu *cpu = container_of(dev, struct cpu, dev);
> +	unsigned int cpuid = cpu->dev.id;
> +	struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
> +	u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
> +
> +	return sprintf(buf, "%llx\n", idle_purr_cycles);
> +}
> +static DEVICE_ATTR_RO(idle_purr);
> +
> +DECLARE_PER_CPU(u64, idle_spurr_cycles);
> +static ssize_t idle_spurr_show(struct device *dev,
> +			       struct device_attribute *attr, char *buf)
> +{
> +	struct cpu *cpu = container_of(dev, struct cpu, dev);
> +	unsigned int cpuid = cpu->dev.id;
> +	u64 *idle_spurr_cycles_ptr = per_cpu_ptr(&idle_spurr_cycles, cpuid);
> +
> +	return sprintf(buf, "%llx\n", *idle_spurr_cycles_ptr);
> +}
> +static DEVICE_ATTR_RO(idle_spurr);
> +
> +static void create_idle_purr_spurr_sysfs_entry(struct device *cpudev)
> +{
> +	device_create_file(cpudev, &dev_attr_idle_purr);
> +	device_create_file(cpudev, &dev_attr_idle_spurr);
> +}
> +
>   static int __init topology_init(void)
>   {
>   	int cpu, r;
> @@ -1067,6 +1097,8 @@ static int __init topology_init(void)
>   			register_cpu(c, cpu);
>   
>   			device_create_file(&c->dev, &dev_attr_physical_id);
> +			if (firmware_has_feature(FW_FEATURE_SPLPAR))
> +				create_idle_purr_spurr_sysfs_entry(&c->dev);
>   		}
>   	}
>   	r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online",
>
Naveen N. Rao Feb. 5, 2020, 8:07 a.m. UTC | #12
Christophe Leroy wrote:
> 
> 
> Le 27/11/2019 à 13:01, Gautham R. Shenoy a écrit :
>> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>> 
>> On Pseries LPARs, to calculate utilization, we need to know the
>> [S]PURR ticks when the CPUs were busy or idle.
>> 
>> The total PURR and SPURR ticks are already exposed via the per-cpu
>> sysfs files /sys/devices/system/cpu/cpuX/purr and
>> /sys/devices/system/cpu/cpuX/spurr.
>> 
>> This patch adds support for exposing the idle PURR and SPURR ticks via
>> /sys/devices/system/cpu/cpuX/idle_purr and
>> /sys/devices/system/cpu/cpuX/idle_spurr.
> 
> Might be a candid question, but I see in arch/powerpc/kernel/time.c that 
> PURR/SPURR are already taken into account by the kernel to calculate 
> utilisation when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is selected.
> 
> As far as I understand, you are wanting to expose this to userland to 
> redo the calculation there. What is wrong with the values reported by 
> the kernel ?

As you point out, it is only done with 
CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, but isn't available with NO_HZ_FULL, 
which happens to be the distro default nowadays.

- Naveen
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 80a676d..42ade55 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -1044,6 +1044,36 @@  static ssize_t show_physical_id(struct device *dev,
 }
 static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
 
+static ssize_t idle_purr_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	unsigned int cpuid = cpu->dev.id;
+	struct lppaca *cpu_lppaca_ptr = paca_ptrs[cpuid]->lppaca_ptr;
+	u64 idle_purr_cycles = be64_to_cpu(cpu_lppaca_ptr->wait_state_cycles);
+
+	return sprintf(buf, "%llx\n", idle_purr_cycles);
+}
+static DEVICE_ATTR_RO(idle_purr);
+
+DECLARE_PER_CPU(u64, idle_spurr_cycles);
+static ssize_t idle_spurr_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	unsigned int cpuid = cpu->dev.id;
+	u64 *idle_spurr_cycles_ptr = per_cpu_ptr(&idle_spurr_cycles, cpuid);
+
+	return sprintf(buf, "%llx\n", *idle_spurr_cycles_ptr);
+}
+static DEVICE_ATTR_RO(idle_spurr);
+
+static void create_idle_purr_spurr_sysfs_entry(struct device *cpudev)
+{
+	device_create_file(cpudev, &dev_attr_idle_purr);
+	device_create_file(cpudev, &dev_attr_idle_spurr);
+}
+
 static int __init topology_init(void)
 {
 	int cpu, r;
@@ -1067,6 +1097,8 @@  static int __init topology_init(void)
 			register_cpu(c, cpu);
 
 			device_create_file(&c->dev, &dev_attr_physical_id);
+			if (firmware_has_feature(FW_FEATURE_SPLPAR))
+				create_idle_purr_spurr_sysfs_entry(&c->dev);
 		}
 	}
 	r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online",