diff mbox series

[v4,6/6] pseries/sysfs: Minimise IPI noise while reading [idle_][s]purr

Message ID 1585308760-28792-7-git-send-email-ego@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show
Series Track and expose idle PURR and SPURR ticks | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (c6624071c338732402e8c726df6a4074473eaa0e)
snowpatch_ozlabs/build-ppc64le warning Build succeeded but added 2 new sparse warnings
snowpatch_ozlabs/build-ppc64be warning Build succeeded but added 2 new sparse warnings
snowpatch_ozlabs/build-ppc64e warning Build succeeded but added 2 new sparse warnings
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch warning total: 0 errors, 2 warnings, 0 checks, 156 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Gautham R Shenoy March 27, 2020, 11:32 a.m. UTC
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>

Currently purr, spurr, idle_purr, idle_spurr are exposed for every CPU
via the sysfs interface
/sys/devices/system/cpu/cpuX/[idle_][s]purr. Each sysfs read currently
generates an IPI to obtain the desired value from the target CPU X.
Since these aforementioned sysfs are typically read one after another,
we end up generating 4 IPIs per CPU in a short duration.

In order to minimize the IPI noise, this patch caches the values of
all the four entities whenever one of them is read. If subsequently
any of these are read within the next 10ms, the cached value is
returned. With this, we will generate at most one IPI every 10ms for
every CPU.

Test-results: While reading the four sysfs files back-to-back for a
given CPU every second for 100 seconds.

Without the patch:
		 16 [XICS 2 Edge IPI] = 422 times
		 DBL [Doorbell interrupts] = 13 times
		 Total : 435 IPIs.

With the patch:
		  16 [XICS 2 Edge IPI] = 111 times
		  DBL [Doorbell interrupts] = 17 times
		  Total : 128 IPIs.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/sysfs.c | 117 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 97 insertions(+), 20 deletions(-)

Comments

Naveen N. Rao April 1, 2020, 9:58 a.m. UTC | #1
Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> Currently purr, spurr, idle_purr, idle_spurr are exposed for every CPU
> via the sysfs interface
> /sys/devices/system/cpu/cpuX/[idle_][s]purr. Each sysfs read currently
> generates an IPI to obtain the desired value from the target CPU X.
> Since these aforementioned sysfs are typically read one after another,
> we end up generating 4 IPIs per CPU in a short duration.
> 
> In order to minimize the IPI noise, this patch caches the values of
> all the four entities whenever one of them is read. If subsequently
> any of these are read within the next 10ms, the cached value is
> returned. With this, we will generate at most one IPI every 10ms for
> every CPU.
> 
> Test-results: While reading the four sysfs files back-to-back for a
> given CPU every second for 100 seconds.
> 
> Without the patch:
> 		 16 [XICS 2 Edge IPI] = 422 times
> 		 DBL [Doorbell interrupts] = 13 times
> 		 Total : 435 IPIs.
> 
> With the patch:
> 		  16 [XICS 2 Edge IPI] = 111 times
> 		  DBL [Doorbell interrupts] = 17 times
> 		  Total : 128 IPIs.
> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/sysfs.c | 117 ++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 97 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 571b325..bd92023 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -586,8 +586,6 @@ void ppc_enable_pmcs(void)
>   * SPRs which are not related to PMU.
>   */
>  #ifdef CONFIG_PPC64
> -SYSFS_SPRSETUP(purr, SPRN_PURR);
> -SYSFS_SPRSETUP(spurr, SPRN_SPURR);
>  SYSFS_SPRSETUP(pir, SPRN_PIR);
>  SYSFS_SPRSETUP(tscr, SPRN_TSCR);
> 
> @@ -596,8 +594,6 @@ void ppc_enable_pmcs(void)
>    enable write when needed with a separate function.
>    Lets be conservative and default to pseries.
>  */
> -static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
> -static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
>  static DEVICE_ATTR(pir, 0400, show_pir, NULL);
>  static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
>  #endif /* CONFIG_PPC64 */
> @@ -761,22 +757,110 @@ static void create_svm_file(void)
>  }
>  #endif /* CONFIG_PPC_SVM */
> 
> +#ifdef CONFIG_PPC64
> +/*
> + * The duration (in ms) from the last IPI to the target CPU until
> + * which a cached value of purr, spurr, idle_purr, idle_spurr can be
> + * reported to the user on a corresponding sysfs file read. Beyond
> + * this duration, fresh values need to be obtained by sending IPIs to
> + * the target CPU when the sysfs files are read.
> + */
> +static unsigned long util_stats_staleness_tolerance_ms = 10;

This is a nice optimization for our use in lparstat, though I have a 
concern below.

> +struct util_acct_stats {
> +	u64 latest_purr;
> +	u64 latest_spurr;
> +#ifdef CONFIG_PPC_PSERIES
> +	u64 latest_idle_purr;
> +	u64 latest_idle_spurr;
> +#endif

You can probably drop the 'latest_' prefix.

> +	unsigned long last_update_jiffies;
> +};
> +
> +DEFINE_PER_CPU(struct util_acct_stats, util_acct_stats);

Per snowpatch, this should be static, and so should get_util_stats_ptr() 
below:
https://openpower.xyz/job/snowpatch/job/snowpatch-linux-sparse/16601//artifact/linux/report.txt

> +
> +static void update_util_acct_stats(void *ptr)
> +{
> +	struct util_acct_stats *stats = ptr;
> +
> +	stats->latest_purr = mfspr(SPRN_PURR);
> +	stats->latest_spurr = mfspr(SPRN_SPURR);
>  #ifdef CONFIG_PPC_PSERIES
> -static void read_idle_purr(void *val)
> +	stats->latest_idle_purr = read_this_idle_purr();
> +	stats->latest_idle_spurr = read_this_idle_spurr();
> +#endif
> +	stats->last_update_jiffies = jiffies;
> +}
> +
> +struct util_acct_stats *get_util_stats_ptr(int cpu)
> +{
> +	struct util_acct_stats *stats = per_cpu_ptr(&util_acct_stats, cpu);
> +	unsigned long delta_jiffies;
> +
> +	delta_jiffies = jiffies - stats->last_update_jiffies;
> +
> +	/*
> +	 * If we have a recent enough data, reuse that instead of
> +	 * sending an IPI.
> +	 */
> +	if (jiffies_to_msecs(delta_jiffies) < util_stats_staleness_tolerance_ms)
> +		return stats;
> +
> +	smp_call_function_single(cpu, update_util_acct_stats, stats, 1);
> +	return stats;
> +}
> +
> +static ssize_t show_purr(struct device *dev,
> +			 struct device_attribute *attr, char *buf)
>  {
> -	u64 *ret = val;
> +	struct cpu *cpu = container_of(dev, struct cpu, dev);
> +	struct util_acct_stats *stats;
> 
> -	*ret = read_this_idle_purr();
> +	stats = get_util_stats_ptr(cpu->dev.id);
> +	return sprintf(buf, "%llx\n", stats->latest_purr);

This alters the behavior of the current sysfs purr file. I am not sure 
if it is reasonable to return the same PURR value across a 10ms window.

I wonder if we should introduce a sysctl interface to control 
thresholding. It can default to 0, which disables thresholding so that 
the existing behavior continues. Applications (lparstat) can optionally 
set it to suit their use.

- Naveen
Gautham R Shenoy April 1, 2020, 12:01 p.m. UTC | #2
Hello Naveen,


On Wed, Apr 01, 2020 at 03:28:48PM +0530, Naveen N. Rao wrote:
> Gautham R. Shenoy wrote:
> >From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> >
 [..snip..]

> >-static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
> >-static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
> > static DEVICE_ATTR(pir, 0400, show_pir, NULL);
> > static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
> > #endif /* CONFIG_PPC64 */
> >@@ -761,22 +757,110 @@ static void create_svm_file(void)
> > }
> > #endif /* CONFIG_PPC_SVM */
> >
> >+#ifdef CONFIG_PPC64
> >+/*
> >+ * The duration (in ms) from the last IPI to the target CPU until
> >+ * which a cached value of purr, spurr, idle_purr, idle_spurr can be
> >+ * reported to the user on a corresponding sysfs file read. Beyond
> >+ * this duration, fresh values need to be obtained by sending IPIs to
> >+ * the target CPU when the sysfs files are read.
> >+ */
> >+static unsigned long util_stats_staleness_tolerance_ms = 10;
> 
> This is a nice optimization for our use in lparstat, though I have a concern
> below.
> 
> >+struct util_acct_stats {
> >+	u64 latest_purr;
> >+	u64 latest_spurr;
> >+#ifdef CONFIG_PPC_PSERIES
> >+	u64 latest_idle_purr;
> >+	u64 latest_idle_spurr;
> >+#endif
> 
> You can probably drop the 'latest_' prefix.


Sure.

> 
> >+	unsigned long last_update_jiffies;
> >+};
> >+
> >+DEFINE_PER_CPU(struct util_acct_stats, util_acct_stats);
> 
> Per snowpatch, this should be static, and so should get_util_stats_ptr()
> below:
> https://openpower.xyz/job/snowpatch/job/snowpatch-linux-sparse/16601//artifact/linux/report.txt

Ok, will fix this in v5.

> 
> >+
> >+static void update_util_acct_stats(void *ptr)
> >+{
> >+	struct util_acct_stats *stats = ptr;
> >+
> >+	stats->latest_purr = mfspr(SPRN_PURR);
> >+	stats->latest_spurr = mfspr(SPRN_SPURR);
> > #ifdef CONFIG_PPC_PSERIES
> >-static void read_idle_purr(void *val)
> >+	stats->latest_idle_purr = read_this_idle_purr();
> >+	stats->latest_idle_spurr = read_this_idle_spurr();
> >+#endif
> >+	stats->last_update_jiffies = jiffies;
> >+}
> >+
> >+struct util_acct_stats *get_util_stats_ptr(int cpu)
> >+{
> >+	struct util_acct_stats *stats = per_cpu_ptr(&util_acct_stats, cpu);
> >+	unsigned long delta_jiffies;
> >+
> >+	delta_jiffies = jiffies - stats->last_update_jiffies;
> >+
> >+	/*
> >+	 * If we have a recent enough data, reuse that instead of
> >+	 * sending an IPI.
> >+	 */
> >+	if (jiffies_to_msecs(delta_jiffies) < util_stats_staleness_tolerance_ms)
> >+		return stats;
> >+
> >+	smp_call_function_single(cpu, update_util_acct_stats, stats, 1);
> >+	return stats;
> >+}
> >+
> >+static ssize_t show_purr(struct device *dev,
> >+			 struct device_attribute *attr, char *buf)
> > {
> >-	u64 *ret = val;
> >+	struct cpu *cpu = container_of(dev, struct cpu, dev);
> >+	struct util_acct_stats *stats;
> >
> >-	*ret = read_this_idle_purr();
> >+	stats = get_util_stats_ptr(cpu->dev.id);
> >+	return sprintf(buf, "%llx\n", stats->latest_purr);
> 
> This alters the behavior of the current sysfs purr file. I am not sure if it
> is reasonable to return the same PURR value across a 10ms window.


It does reduce it to 10ms window. I am not sure if anyone samples PURR
etc faster than that rate.

I measured how much time it takes to read the purr, spurr, idle_purr,
idle_spurr files back-to-back. It takes not more than 150us.  From
lparstat will these values be read back-to-back ? If so, we can reduce
the staleness_tolerance to something like 500us and still avoid extra
IPIs. If not, what is the maximum delay between the first sysfs file
read and the last sysfs file read ?

>
> I wonder if we should introduce a sysctl interface to control thresholding.
> It can default to 0, which disables thresholding so that the existing
> behavior continues. Applications (lparstat) can optionally set it to suit
> their use.

We would be introducing 3 new sysfs interfaces that way instead of
two.

/sys/devices/system/cpu/purr_spurr_staleness
/sys/devices/system/cpu/cpuX/idle_purr
/sys/devices/system/cpu/cpuX/idle_spurr

I don't have a problem with this. Nathan, Michael, thoughts on this?


The alternative is to have a procfs interface, something like
/proc/powerpc/resource_util_stats

which gives a listing similar to /proc/stat, i.e

      CPUX  <purr>  <idle_purr>  <spurr>  <idle_spurr>

Even in this case, the values can be obtained in one-shot with a
single IPI and be printed in the row corresponding to the CPU.

> 
> - Naveen
> 

--
Thanks and Regards
gautham.
Naveen N. Rao April 2, 2020, 7:34 a.m. UTC | #3
Gautham R Shenoy wrote:
> Hello Naveen,
> 
> 
> On Wed, Apr 01, 2020 at 03:28:48PM +0530, Naveen N. Rao wrote:
>> Gautham R. Shenoy wrote:
>> >From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>> >
>  [..snip..]
> 
>> >+
>> >+static ssize_t show_purr(struct device *dev,
>> >+			 struct device_attribute *attr, char *buf)
>> > {
>> >-	u64 *ret = val;
>> >+	struct cpu *cpu = container_of(dev, struct cpu, dev);
>> >+	struct util_acct_stats *stats;
>> >
>> >-	*ret = read_this_idle_purr();
>> >+	stats = get_util_stats_ptr(cpu->dev.id);
>> >+	return sprintf(buf, "%llx\n", stats->latest_purr);
>> 
>> This alters the behavior of the current sysfs purr file. I am not sure if it
>> is reasonable to return the same PURR value across a 10ms window.
> 
> 
> It does reduce it to 10ms window. I am not sure if anyone samples PURR
> etc faster than that rate.
> 
> I measured how much time it takes to read the purr, spurr, idle_purr,
> idle_spurr files back-to-back. It takes not more than 150us.  From
> lparstat will these values be read back-to-back ? If so, we can reduce
> the staleness_tolerance to something like 500us and still avoid extra
> IPIs. If not, what is the maximum delay between the first sysfs file
> read and the last sysfs file read ?

Oh, for lparstat usage, this is perfectly fine.

I meant that there could be other users of [s]purr who might care. I 
don't know of one, but since this is an existing sysfs interface, I 
wanted to point out that the behavior might change.

> 
>>
>> I wonder if we should introduce a sysctl interface to control thresholding.
>> It can default to 0, which disables thresholding so that the existing
>> behavior continues. Applications (lparstat) can optionally set it to suit
>> their use.
> 
> We would be introducing 3 new sysfs interfaces that way instead of
> two.
> 
> /sys/devices/system/cpu/purr_spurr_staleness
> /sys/devices/system/cpu/cpuX/idle_purr
> /sys/devices/system/cpu/cpuX/idle_spurr
> 
> I don't have a problem with this. Nathan, Michael, thoughts on this?
> 
> 
> The alternative is to have a procfs interface, something like
> /proc/powerpc/resource_util_stats
> 
> which gives a listing similar to /proc/stat, i.e
> 
>       CPUX  <purr>  <idle_purr>  <spurr>  <idle_spurr>
> 
> Even in this case, the values can be obtained in one-shot with a
> single IPI and be printed in the row corresponding to the CPU.

Right -- and that would be optimal requiring a single system call, at 
the cost of using a legacy interface.

The other option would be to drop this patch and to just go with patches 
1-5 introducing the new sysfs interfaces for idle_[s]purr. It isn't 
entirely clear how often this would be used, or its actual impact. We 
can perhaps consider this optimization if and when this causes 
problems...


Thanks,
Naveen
Gautham R Shenoy April 3, 2020, 6:28 a.m. UTC | #4
Hi Naveen,

On Thu, Apr 02, 2020 at 01:04:34PM +0530, Naveen N. Rao wrote:
[..snip..]

> >
> >It does reduce it to 10ms window. I am not sure if anyone samples PURR
> >etc faster than that rate.
> >
> >I measured how much time it takes to read the purr, spurr, idle_purr,
> >idle_spurr files back-to-back. It takes not more than 150us.  From
> >lparstat will these values be read back-to-back ? If so, we can reduce
> >the staleness_tolerance to something like 500us and still avoid extra
> >IPIs. If not, what is the maximum delay between the first sysfs file
> >read and the last sysfs file read ?
> 
> Oh, for lparstat usage, this is perfectly fine.
> 
> I meant that there could be other users of [s]purr who might care. I don't
> know of one, but since this is an existing sysfs interface, I wanted to
> point out that the behavior might change.

Fair point. Perhaps this should be documented in the Documentation, if
we are going to continue with this patch.

> 
> >
> >>
> >>I wonder if we should introduce a sysctl interface to control thresholding.
> >>It can default to 0, which disables thresholding so that the existing
> >>behavior continues. Applications (lparstat) can optionally set it to suit
> >>their use.
> >
> >We would be introducing 3 new sysfs interfaces that way instead of
> >two.
> >
> >/sys/devices/system/cpu/purr_spurr_staleness
> >/sys/devices/system/cpu/cpuX/idle_purr
> >/sys/devices/system/cpu/cpuX/idle_spurr
> >
> >I don't have a problem with this. Nathan, Michael, thoughts on this?
> >
> >
> >The alternative is to have a procfs interface, something like
> >/proc/powerpc/resource_util_stats
> >
> >which gives a listing similar to /proc/stat, i.e
> >
> >      CPUX  <purr>  <idle_purr>  <spurr>  <idle_spurr>
> >
> >Even in this case, the values can be obtained in one-shot with a
> >single IPI and be printed in the row corresponding to the CPU.
> 
> Right -- and that would be optimal requiring a single system call, at the
> cost of using a legacy interface.
> 
> The other option would be to drop this patch and to just go with patches 1-5
> introducing the new sysfs interfaces for idle_[s]purr. It isn't entirely
> clear how often this would be used, or its actual impact. We can perhaps
> consider this optimization if and when this causes problems...

I am ok with that. We can revisit the problem if IPI noise becomes
noticable. However, if Nathan or Michael feel that this problem is
better solved now, than leaving it for the future, we will have to
take a call on what the interface is going to be.

> 
> 
> Thanks,
> Naveen
> 

--
Thanks and Regards
gautham.
Nathan Lynch April 3, 2020, 6:10 p.m. UTC | #5
Gautham R Shenoy <ego@linux.vnet.ibm.com> writes:
> On Thu, Apr 02, 2020 at 01:04:34PM +0530, Naveen N. Rao wrote:
>> >>
>> >>I wonder if we should introduce a sysctl interface to control thresholding.
>> >>It can default to 0, which disables thresholding so that the existing
>> >>behavior continues. Applications (lparstat) can optionally set it to suit
>> >>their use.
>> >
>> >We would be introducing 3 new sysfs interfaces that way instead of
>> >two.
>> >
>> >/sys/devices/system/cpu/purr_spurr_staleness
>> >/sys/devices/system/cpu/cpuX/idle_purr
>> >/sys/devices/system/cpu/cpuX/idle_spurr
>> >
>> >I don't have a problem with this. Nathan, Michael, thoughts on this?

No, I don't think this warrants a tunable when the issue it's intended
to address is still a bit speculative at this point. (Also, note that
this would be a system-wide value, but you could have multiple
concurrent users of the interface with different needs.)


>> >The alternative is to have a procfs interface, something like
>> >/proc/powerpc/resource_util_stats
>> >
>> >which gives a listing similar to /proc/stat, i.e
>> >
>> >      CPUX  <purr>  <idle_purr>  <spurr>  <idle_spurr>
>> >
>> >Even in this case, the values can be obtained in one-shot with a
>> >single IPI and be printed in the row corresponding to the CPU.
>> 
>> Right -- and that would be optimal requiring a single system call, at the
>> cost of using a legacy interface.
>> 
>> The other option would be to drop this patch and to just go with patches 1-5
>> introducing the new sysfs interfaces for idle_[s]purr. It isn't entirely
>> clear how often this would be used, or its actual impact. We can perhaps
>> consider this optimization if and when this causes problems...
>
> I am ok with that. We can revisit the problem if IPI noise becomes
> noticable. However, if Nathan or Michael feel that this problem is
> better solved now, than leaving it for the future, we will have to
> take a call on what the interface is going to be.

While I maintain some concern about the overhead on larger LPARs (150us
per CPU works out to ~0.15s total to serially sample 1024 CPUs, ~0.3s
for 2048 and so on), I am OK with the straightforward addition of the
attributes without any batching or sampling thresholds behind the scenes
for now. I appreciate your consideration of the issue.

If this turns out to be too inefficient then I think we should consider
a non-sysfs mechanism such as chardev+ioctl.
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 571b325..bd92023 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -586,8 +586,6 @@  void ppc_enable_pmcs(void)
  * SPRs which are not related to PMU.
  */
 #ifdef CONFIG_PPC64
-SYSFS_SPRSETUP(purr, SPRN_PURR);
-SYSFS_SPRSETUP(spurr, SPRN_SPURR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
 SYSFS_SPRSETUP(tscr, SPRN_TSCR);
 
@@ -596,8 +594,6 @@  void ppc_enable_pmcs(void)
   enable write when needed with a separate function.
   Lets be conservative and default to pseries.
 */
-static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
-static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
 static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
 #endif /* CONFIG_PPC64 */
@@ -761,22 +757,110 @@  static void create_svm_file(void)
 }
 #endif /* CONFIG_PPC_SVM */
 
+#ifdef CONFIG_PPC64
+/*
+ * The duration (in ms) from the last IPI to the target CPU until
+ * which a cached value of purr, spurr, idle_purr, idle_spurr can be
+ * reported to the user on a corresponding sysfs file read. Beyond
+ * this duration, fresh values need to be obtained by sending IPIs to
+ * the target CPU when the sysfs files are read.
+ */
+static unsigned long util_stats_staleness_tolerance_ms = 10;
+struct util_acct_stats {
+	u64 latest_purr;
+	u64 latest_spurr;
+#ifdef CONFIG_PPC_PSERIES
+	u64 latest_idle_purr;
+	u64 latest_idle_spurr;
+#endif
+	unsigned long last_update_jiffies;
+};
+
+DEFINE_PER_CPU(struct util_acct_stats, util_acct_stats);
+
+static void update_util_acct_stats(void *ptr)
+{
+	struct util_acct_stats *stats = ptr;
+
+	stats->latest_purr = mfspr(SPRN_PURR);
+	stats->latest_spurr = mfspr(SPRN_SPURR);
 #ifdef CONFIG_PPC_PSERIES
-static void read_idle_purr(void *val)
+	stats->latest_idle_purr = read_this_idle_purr();
+	stats->latest_idle_spurr = read_this_idle_spurr();
+#endif
+	stats->last_update_jiffies = jiffies;
+}
+
+struct util_acct_stats *get_util_stats_ptr(int cpu)
+{
+	struct util_acct_stats *stats = per_cpu_ptr(&util_acct_stats, cpu);
+	unsigned long delta_jiffies;
+
+	delta_jiffies = jiffies - stats->last_update_jiffies;
+
+	/*
+	 * If we have a recent enough data, reuse that instead of
+	 * sending an IPI.
+	 */
+	if (jiffies_to_msecs(delta_jiffies) < util_stats_staleness_tolerance_ms)
+		return stats;
+
+	smp_call_function_single(cpu, update_util_acct_stats, stats, 1);
+	return stats;
+}
+
+static ssize_t show_purr(struct device *dev,
+			 struct device_attribute *attr, char *buf)
 {
-	u64 *ret = val;
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	struct util_acct_stats *stats;
 
-	*ret = read_this_idle_purr();
+	stats = get_util_stats_ptr(cpu->dev.id);
+	return sprintf(buf, "%llx\n", stats->latest_purr);
 }
 
+static void write_purr(void *val)
+{
+	mtspr(SPRN_PURR, *(unsigned long *)val);
+}
+
+static ssize_t __used store_purr(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	unsigned long val;
+	int ret = kstrtoul(buf, 16, &val);
+
+	if (ret != 0)
+		return -EINVAL;
+
+	smp_call_function_single(cpu->dev.id, write_purr, &val, 1);
+	return count;
+}
+static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
+
+static ssize_t show_spurr(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	struct util_acct_stats *stats;
+
+	stats = get_util_stats_ptr(cpu->dev.id);
+	return sprintf(buf, "%llx\n", stats->latest_spurr);
+}
+static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_PSERIES
 static ssize_t idle_purr_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
 	struct cpu *cpu = container_of(dev, struct cpu, dev);
-	u64 val;
+	struct util_acct_stats *stats;
 
-	smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1);
-	return sprintf(buf, "%llx\n", val);
+	stats = get_util_stats_ptr(cpu->dev.id);
+	return sprintf(buf, "%llx\n", stats->latest_idle_purr);
 }
 static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL);
 
@@ -792,21 +876,14 @@  static void remove_idle_purr_file(struct device *s)
 		device_remove_file(s, &dev_attr_idle_purr);
 }
 
-static void read_idle_spurr(void *val)
-{
-	u64 *ret = val;
-
-	*ret = read_this_idle_spurr();
-}
-
 static ssize_t idle_spurr_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
 	struct cpu *cpu = container_of(dev, struct cpu, dev);
-	u64 val;
+	struct util_acct_stats *stats;
 
-	smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1);
-	return sprintf(buf, "%llx\n", val);
+	stats =  get_util_stats_ptr(cpu->dev.id);
+	return sprintf(buf, "%llx\n", stats->latest_idle_spurr);
 }
 static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL);