diff mbox series

[v4,2/6] powerpc/idle: Add accessor function to always read latest idle PURR

Message ID 1585308760-28792-3-git-send-email-ego@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show
Series Track and expose idle PURR and SPURR ticks | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (c6624071c338732402e8c726df6a4074473eaa0e)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 148 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Gautham R Shenoy March 27, 2020, 11:32 a.m. UTC
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>

Currently when CPU goes idle, we take a snapshot of PURR via
pseries_idle_prolog() which is used at the CPU idle exit to compute
the idle PURR cycles via the function pseries_idle_epilog().  Thus,
the value of idle PURR cycle thus read before pseries_idle_prolog() and
after pseries_idle_epilog() is always correct.

However, if we were to read the idle PURR cycles from an interrupt
context between pseries_idle_prolog() and pseries_idle_epilog() (this will
be done in a future patch), then, the value of the idle PURR thus read
will not include the cycles spent in the most recent idle period.

This patch addresses the issue by providing accessor function to read
the idle PURR such such that it includes the cycles spent in the most
recent idle period, if we read it between pseries_idle_prolog() and
pseries_idle_epilog(). In order to achieve it, the patch saves the
snapshot of PURR in pseries_idle_prolog() in a per-cpu variable,
instead of on the stack, so that it can be accessed from an interrupt
context.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/idle.h        | 47 +++++++++++++++++++++++++++-------
 arch/powerpc/platforms/pseries/setup.c |  7 +++--
 drivers/cpuidle/cpuidle-pseries.c      | 15 +++++------
 3 files changed, 47 insertions(+), 22 deletions(-)

Comments

Naveen N. Rao April 1, 2020, 9:42 a.m. UTC | #1
Hi Gautham,

Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> Currently when CPU goes idle, we take a snapshot of PURR via
> pseries_idle_prolog() which is used at the CPU idle exit to compute
> the idle PURR cycles via the function pseries_idle_epilog().  Thus,
> the value of idle PURR cycle thus read before pseries_idle_prolog() and
> after pseries_idle_epilog() is always correct.
> 
> However, if we were to read the idle PURR cycles from an interrupt
> context between pseries_idle_prolog() and pseries_idle_epilog() (this will
> be done in a future patch), then, the value of the idle PURR thus read
> will not include the cycles spent in the most recent idle period.
> 
> This patch addresses the issue by providing accessor function to read
> the idle PURR such such that it includes the cycles spent in the most
> recent idle period, if we read it between pseries_idle_prolog() and
> pseries_idle_epilog(). In order to achieve it, the patch saves the
> snapshot of PURR in pseries_idle_prolog() in a per-cpu variable,
> instead of on the stack, so that it can be accessed from an interrupt
> context.
> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/idle.h        | 47 +++++++++++++++++++++++++++-------
>  arch/powerpc/platforms/pseries/setup.c |  7 +++--
>  drivers/cpuidle/cpuidle-pseries.c      | 15 +++++------
>  3 files changed, 47 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/idle.h b/arch/powerpc/include/asm/idle.h
> index 32064a4c..d4bfb6a 100644
> --- a/arch/powerpc/include/asm/idle.h
> +++ b/arch/powerpc/include/asm/idle.h
> @@ -5,10 +5,27 @@
>  #include <asm/paca.h>
> 
>  #ifdef CONFIG_PPC_PSERIES
> -static inline void pseries_idle_prolog(unsigned long *in_purr)
> +DECLARE_PER_CPU(u64, idle_entry_purr_snap);
> +
> +static inline void snapshot_purr_idle_entry(void)
> +{
> +	*this_cpu_ptr(&idle_entry_purr_snap) = mfspr(SPRN_PURR);
> +}
> +
> +static inline void update_idle_purr_accounting(void)
> +{
> +	u64 wait_cycles;
> +	u64 in_purr = *this_cpu_ptr(&idle_entry_purr_snap);
> +
> +	wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
> +	wait_cycles += mfspr(SPRN_PURR) - in_purr;
> +	get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
> +}
> +
> +static inline void pseries_idle_prolog(void)
>  {
>  	ppc64_runlatch_off();
> -	*in_purr = mfspr(SPRN_PURR);
> +	snapshot_purr_idle_entry();
>  	/*
>  	 * Indicate to the HV that we are idle. Now would be
>  	 * a good time to find other work to dispatch.
> @@ -16,16 +33,28 @@ static inline void pseries_idle_prolog(unsigned long *in_purr)
>  	get_lppaca()->idle = 1;
>  }
> 
> -static inline void pseries_idle_epilog(unsigned long in_purr)
> +static inline void pseries_idle_epilog(void)
>  {
> -	u64 wait_cycles;
> -
> -	wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
> -	wait_cycles += mfspr(SPRN_PURR) - in_purr;
> -	get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
> +	update_idle_purr_accounting();
>  	get_lppaca()->idle = 0;
> -
>  	ppc64_runlatch_on();
>  }
> +
> +static inline u64 read_this_idle_purr(void)
> +{
> +	/*
> +	 * If we are reading from an idle context, update the
> +	 * idle-purr cycles corresponding to the last idle period.
> +	 * Since the idle context is not yet over, take a fresh
> +	 * snapshot of the idle-purr.
> +	 */
> +	if (unlikely(get_lppaca()->idle == 1)) {
> +		update_idle_purr_accounting();
> +		snapshot_purr_idle_entry();
> +	}
> +
> +	return be64_to_cpu(get_lppaca()->wait_state_cycles);
> +}
> +

I think this and read_this_idle_spurr() from the next patch should be 
moved to Patch 4/6, where they are actually used.

- Naveen
Gautham R Shenoy April 3, 2020, 6:15 a.m. UTC | #2
On Wed, Apr 01, 2020 at 03:12:53PM +0530, Naveen N. Rao wrote:
> Hi Gautham,
> 
> Gautham R. Shenoy wrote:
> >From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> >
> >Currently when CPU goes idle, we take a snapshot of PURR via
> >pseries_idle_prolog() which is used at the CPU idle exit to compute
> >the idle PURR cycles via the function pseries_idle_epilog().  Thus,
> >the value of idle PURR cycle thus read before pseries_idle_prolog() and
> >after pseries_idle_epilog() is always correct.
> >
> >However, if we were to read the idle PURR cycles from an interrupt
> >context between pseries_idle_prolog() and pseries_idle_epilog() (this will
> >be done in a future patch), then, the value of the idle PURR thus read
> >will not include the cycles spent in the most recent idle period.
> >
> >This patch addresses the issue by providing accessor function to read
> >the idle PURR such such that it includes the cycles spent in the most
> >recent idle period, if we read it between pseries_idle_prolog() and
> >pseries_idle_epilog(). In order to achieve it, the patch saves the
> >snapshot of PURR in pseries_idle_prolog() in a per-cpu variable,
> >instead of on the stack, so that it can be accessed from an interrupt
> >context.
> >
> >Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> >---
> > arch/powerpc/include/asm/idle.h        | 47 +++++++++++++++++++++++++++-------
> > arch/powerpc/platforms/pseries/setup.c |  7 +++--
> > drivers/cpuidle/cpuidle-pseries.c      | 15 +++++------
> > 3 files changed, 47 insertions(+), 22 deletions(-)
> >
> >diff --git a/arch/powerpc/include/asm/idle.h b/arch/powerpc/include/asm/idle.h
> >index 32064a4c..d4bfb6a 100644
> >--- a/arch/powerpc/include/asm/idle.h
> >+++ b/arch/powerpc/include/asm/idle.h
> >@@ -5,10 +5,27 @@
> > #include <asm/paca.h>
> >
> > #ifdef CONFIG_PPC_PSERIES
> >-static inline void pseries_idle_prolog(unsigned long *in_purr)
> >+DECLARE_PER_CPU(u64, idle_entry_purr_snap);
> >+
> >+static inline void snapshot_purr_idle_entry(void)
> >+{
> >+	*this_cpu_ptr(&idle_entry_purr_snap) = mfspr(SPRN_PURR);
> >+}
> >+
> >+static inline void update_idle_purr_accounting(void)
> >+{
> >+	u64 wait_cycles;
> >+	u64 in_purr = *this_cpu_ptr(&idle_entry_purr_snap);
> >+
> >+	wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
> >+	wait_cycles += mfspr(SPRN_PURR) - in_purr;
> >+	get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
> >+}
> >+
> >+static inline void pseries_idle_prolog(void)
> > {
> > 	ppc64_runlatch_off();
> >-	*in_purr = mfspr(SPRN_PURR);
> >+	snapshot_purr_idle_entry();
> > 	/*
> > 	 * Indicate to the HV that we are idle. Now would be
> > 	 * a good time to find other work to dispatch.
> >@@ -16,16 +33,28 @@ static inline void pseries_idle_prolog(unsigned long *in_purr)
> > 	get_lppaca()->idle = 1;
> > }
> >
> >-static inline void pseries_idle_epilog(unsigned long in_purr)
> >+static inline void pseries_idle_epilog(void)
> > {
> >-	u64 wait_cycles;
> >-
> >-	wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
> >-	wait_cycles += mfspr(SPRN_PURR) - in_purr;
> >-	get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
> >+	update_idle_purr_accounting();
> > 	get_lppaca()->idle = 0;
> >-
> > 	ppc64_runlatch_on();
> > }
> >+
> >+static inline u64 read_this_idle_purr(void)
> >+{
> >+	/*
> >+	 * If we are reading from an idle context, update the
> >+	 * idle-purr cycles corresponding to the last idle period.
> >+	 * Since the idle context is not yet over, take a fresh
> >+	 * snapshot of the idle-purr.
> >+	 */
> >+	if (unlikely(get_lppaca()->idle == 1)) {
> >+		update_idle_purr_accounting();
> >+		snapshot_purr_idle_entry();
> >+	}
> >+
> >+	return be64_to_cpu(get_lppaca()->wait_state_cycles);
> >+}
> >+
> 
> I think this and read_this_idle_spurr() from the next patch should be moved
> to Patch 4/6, where they are actually used.

The reason I included this function in this patch was to justify why
we were introducing snapshotting the purr values in a global per-cpu
variable instead of on a stack variable. The reason being that someone
might want to read the PURR value from an interrupt context which had
woken up the CPU from idle. At this point, since epilog() function
wasn't called, the idle PURR count corresponding to this latest idle
period would have been accumulated in lppaca->wait_cycles. Thus, this
helper function safely reads the value by
   1) First updating the lppaca->wait_cycles with the latest idle_purr
   count.
   2) Take a fresh snapshot, since the time from now to the epilog()
   call is also counted under idle CPU. So the PURR cycle increment
   during this short period should also be accumulated in lppaca->wait_cycles.


prolog()
|	snapshot PURR
|
|
|
Idle
|
| <----- Interrupt . Read idle PURR ---- update idle PURR;
|                              	         snapshot PURR;
|                                   	 Read idle PURR.       
|
epilog()
	update idle PURR



> 
> - Naveen
> 

However, if you feel that moving this function to Patch 4 where it is
actually used makes it more readable, I can do that.

--
Thanks and Regards
gautham.
Naveen N. Rao April 3, 2020, 10:34 a.m. UTC | #3
Gautham R Shenoy wrote:
> On Wed, Apr 01, 2020 at 03:12:53PM +0530, Naveen N. Rao wrote:
>> Hi Gautham,
>> 
>> Gautham R. Shenoy wrote:
>> >From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>> >
>> >+
>> >+static inline u64 read_this_idle_purr(void)
>> >+{
>> >+	/*
>> >+	 * If we are reading from an idle context, update the
>> >+	 * idle-purr cycles corresponding to the last idle period.
>> >+	 * Since the idle context is not yet over, take a fresh
>> >+	 * snapshot of the idle-purr.
>> >+	 */
>> >+	if (unlikely(get_lppaca()->idle == 1)) {
>> >+		update_idle_purr_accounting();
>> >+		snapshot_purr_idle_entry();
>> >+	}
>> >+
>> >+	return be64_to_cpu(get_lppaca()->wait_state_cycles);
>> >+}
>> >+
>> 
>> I think this and read_this_idle_spurr() from the next patch should be moved
>> to Patch 4/6, where they are actually used.
> 
> The reason I included this function in this patch was to justify why
> we were introducing snapshotting the purr values in a global per-cpu
> variable instead of on a stack variable. The reason being that someone
> might want to read the PURR value from an interrupt context which had
> woken up the CPU from idle. At this point, since epilog() function
> wasn't called, the idle PURR count corresponding to this latest idle
> period would have been accumulated in lppaca->wait_cycles. Thus, this
> helper function safely reads the value by
>    1) First updating the lppaca->wait_cycles with the latest idle_purr
>    count.
>    2) Take a fresh snapshot, since the time from now to the epilog()
>    call is also counted under idle CPU. So the PURR cycle increment
>    during this short period should also be accumulated in lppaca->wait_cycles.
> 
> 
> prolog()
> |	snapshot PURR
> |
> |
> |
> Idle
> |
> | <----- Interrupt . Read idle PURR ---- update idle PURR;
> |                              	         snapshot PURR;
> |                                   	 Read idle PURR.       
> |
> epilog()
> 	update idle PURR
> 

Yes, I understand. It makes sense.

> 
> However, if you feel that moving this function to Patch 4 where it is
> actually used makes it more readable, I can do that.

My suggestion was from a bisectability standpoint though. This is a 
fairly simple function, but it is generally recommended to ensure that 
newly added code gets exercized in the patch that it is introduced in:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/5.Posting.rst#n119


Regards,
Naveen
Gautham R Shenoy April 3, 2020, 11:24 a.m. UTC | #4
On Fri, Apr 03, 2020 at 04:04:56PM +0530, Naveen N. Rao wrote:
> Gautham R Shenoy wrote:
> >On Wed, Apr 01, 2020 at 03:12:53PM +0530, Naveen N. Rao wrote:
> >>Hi Gautham,
> >>
> >>Gautham R. Shenoy wrote:
> >>>From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> >>>
> >>>+
> >>>+static inline u64 read_this_idle_purr(void)
> >>>+{
> >>>+	/*
> >>>+	 * If we are reading from an idle context, update the
> >>>+	 * idle-purr cycles corresponding to the last idle period.
> >>>+	 * Since the idle context is not yet over, take a fresh
> >>>+	 * snapshot of the idle-purr.
> >>>+	 */
> >>>+	if (unlikely(get_lppaca()->idle == 1)) {
> >>>+		update_idle_purr_accounting();
> >>>+		snapshot_purr_idle_entry();
> >>>+	}
> >>>+
> >>>+	return be64_to_cpu(get_lppaca()->wait_state_cycles);
> >>>+}
> >>>+
> >>
> >>I think this and read_this_idle_spurr() from the next patch should be moved
> >>to Patch 4/6, where they are actually used.
> >
> >The reason I included this function in this patch was to justify why
> >we were introducing snapshotting the purr values in a global per-cpu
> >variable instead of on a stack variable. The reason being that someone
> >might want to read the PURR value from an interrupt context which had
> >woken up the CPU from idle. At this point, since epilog() function
> >wasn't called, the idle PURR count corresponding to this latest idle
> >period would have been accumulated in lppaca->wait_cycles. Thus, this
> >helper function safely reads the value by
> >   1) First updating the lppaca->wait_cycles with the latest idle_purr
> >   count.
> >   2) Take a fresh snapshot, since the time from now to the epilog()
> >   call is also counted under idle CPU. So the PURR cycle increment
> >   during this short period should also be accumulated in lppaca->wait_cycles.
> >
> >
> >prolog()
> >|	snapshot PURR
> >|
> >|
> >|
> >Idle
> >|
> >| <----- Interrupt . Read idle PURR ---- update idle PURR;
> >|                              	         snapshot PURR;
> >|                                   	 Read idle PURR.       |
> >epilog()
> >	update idle PURR
> >
> 
> Yes, I understand. It makes sense.
> 
> >
> >However, if you feel that moving this function to Patch 4 where it is
> >actually used makes it more readable, I can do that.
> 
> My suggestion was from a bisectability standpoint though. This is a fairly
> simple function, but it is generally recommended to ensure that newly added
> code gets exercized in the patch that it is introduced in:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/5.Posting.rst#n119
>

Fair point. Will move those functions to Patch 4.


> 
> Regards,
> Naveen
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/idle.h b/arch/powerpc/include/asm/idle.h
index 32064a4c..d4bfb6a 100644
--- a/arch/powerpc/include/asm/idle.h
+++ b/arch/powerpc/include/asm/idle.h
@@ -5,10 +5,27 @@ 
 #include <asm/paca.h>
 
 #ifdef CONFIG_PPC_PSERIES
-static inline void pseries_idle_prolog(unsigned long *in_purr)
+DECLARE_PER_CPU(u64, idle_entry_purr_snap);
+
+static inline void snapshot_purr_idle_entry(void)
+{
+	*this_cpu_ptr(&idle_entry_purr_snap) = mfspr(SPRN_PURR);
+}
+
+static inline void update_idle_purr_accounting(void)
+{
+	u64 wait_cycles;
+	u64 in_purr = *this_cpu_ptr(&idle_entry_purr_snap);
+
+	wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
+	wait_cycles += mfspr(SPRN_PURR) - in_purr;
+	get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
+}
+
+static inline void pseries_idle_prolog(void)
 {
 	ppc64_runlatch_off();
-	*in_purr = mfspr(SPRN_PURR);
+	snapshot_purr_idle_entry();
 	/*
 	 * Indicate to the HV that we are idle. Now would be
 	 * a good time to find other work to dispatch.
@@ -16,16 +33,28 @@  static inline void pseries_idle_prolog(unsigned long *in_purr)
 	get_lppaca()->idle = 1;
 }
 
-static inline void pseries_idle_epilog(unsigned long in_purr)
+static inline void pseries_idle_epilog(void)
 {
-	u64 wait_cycles;
-
-	wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
-	wait_cycles += mfspr(SPRN_PURR) - in_purr;
-	get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
+	update_idle_purr_accounting();
 	get_lppaca()->idle = 0;
-
 	ppc64_runlatch_on();
 }
+
+static inline u64 read_this_idle_purr(void)
+{
+	/*
+	 * If we are reading from an idle context, update the
+	 * idle-purr cycles corresponding to the last idle period.
+	 * Since the idle context is not yet over, take a fresh
+	 * snapshot of the idle-purr.
+	 */
+	if (unlikely(get_lppaca()->idle == 1)) {
+		update_idle_purr_accounting();
+		snapshot_purr_idle_entry();
+	}
+
+	return be64_to_cpu(get_lppaca()->wait_state_cycles);
+}
+
 #endif /* CONFIG_PPC_PSERIES */
 #endif
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 2f53e6b..4905c96 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -318,10 +318,9 @@  static int alloc_dispatch_log_kmem_cache(void)
 }
 machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
 
+DEFINE_PER_CPU(u64, idle_entry_purr_snap);
 static void pseries_lpar_idle(void)
 {
-	unsigned long in_purr;
-
 	/*
 	 * Default handler to go into low thread priority and possibly
 	 * low power mode by ceding processor to hypervisor
@@ -331,7 +330,7 @@  static void pseries_lpar_idle(void)
 		return;
 
 	/* Indicate to hypervisor that we are idle. */
-	pseries_idle_prolog(&in_purr);
+	pseries_idle_prolog();
 
 	/*
 	 * Yield the processor to the hypervisor.  We return if
@@ -342,7 +341,7 @@  static void pseries_lpar_idle(void)
 	 */
 	cede_processor();
 
-	pseries_idle_epilog(in_purr);
+	pseries_idle_epilog();
 }
 
 /*
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 46d5e05..6513ef2 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -36,12 +36,11 @@  static int snooze_loop(struct cpuidle_device *dev,
 			struct cpuidle_driver *drv,
 			int index)
 {
-	unsigned long in_purr;
 	u64 snooze_exit_time;
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
-	pseries_idle_prolog(&in_purr);
+	pseries_idle_prolog();
 	local_irq_enable();
 	snooze_exit_time = get_tb() + snooze_timeout;
 
@@ -65,7 +64,7 @@  static int snooze_loop(struct cpuidle_device *dev,
 
 	local_irq_disable();
 
-	pseries_idle_epilog(in_purr);
+	pseries_idle_epilog();
 
 	return index;
 }
@@ -91,9 +90,8 @@  static int dedicated_cede_loop(struct cpuidle_device *dev,
 				struct cpuidle_driver *drv,
 				int index)
 {
-	unsigned long in_purr;
 
-	pseries_idle_prolog(&in_purr);
+	pseries_idle_prolog();
 	get_lppaca()->donate_dedicated_cpu = 1;
 
 	HMT_medium();
@@ -102,7 +100,7 @@  static int dedicated_cede_loop(struct cpuidle_device *dev,
 	local_irq_disable();
 	get_lppaca()->donate_dedicated_cpu = 0;
 
-	pseries_idle_epilog(in_purr);
+	pseries_idle_epilog();
 
 	return index;
 }
@@ -111,9 +109,8 @@  static int shared_cede_loop(struct cpuidle_device *dev,
 			struct cpuidle_driver *drv,
 			int index)
 {
-	unsigned long in_purr;
 
-	pseries_idle_prolog(&in_purr);
+	pseries_idle_prolog();
 
 	/*
 	 * Yield the processor to the hypervisor.  We return if
@@ -125,7 +122,7 @@  static int shared_cede_loop(struct cpuidle_device *dev,
 	check_and_cede_processor();
 
 	local_irq_disable();
-	pseries_idle_epilog(in_purr);
+	pseries_idle_epilog();
 
 	return index;
 }