Patchwork [7/7] ARM: tegra30: cpuidle: add LP2 driver for CPU0

login
register
mail settings
Submitter Joseph Lo
Date Oct. 8, 2012, 10:26 a.m.
Message ID <1349691981-31038-8-git-send-email-josephl@nvidia.com>
Download mbox | patch
Permalink /patch/189979/
State Superseded, archived
Headers show

Comments

Joseph Lo - Oct. 8, 2012, 10:26 a.m.
The cpuidle LP2 is a power gating idle mode. It support power gating
vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
be last one to go into LP2. We need to take care and make sure whole
secondary CPUs were in LP2 by checking CPU and power gate status.
After that, the CPU0 can go into LP2 safely. Then power gating the
CPU rail.

Base on the work by:
Scott Williams <scwilliams@nvidia.com>

Signed-off-by: Joseph Lo <josephl@nvidia.com>
---
 arch/arm/mach-tegra/cpuidle-tegra30.c |   39 ++++++++-
 arch/arm/mach-tegra/pm.c              |  152 +++++++++++++++++++++++++++++++++
 arch/arm/mach-tegra/pm.h              |    3 +
 arch/arm/mach-tegra/sleep-tegra30.S   |   44 ++++++++++
 arch/arm/mach-tegra/sleep.S           |   44 ++++++++++
 arch/arm/mach-tegra/sleep.h           |    2 +
 6 files changed, 280 insertions(+), 4 deletions(-)
Stephen Warren - Oct. 9, 2012, 10:49 p.m.
On 10/08/2012 04:26 AM, Joseph Lo wrote:
> The cpuidle LP2 is a power gating idle mode. It support power gating
> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
> be last one to go into LP2. We need to take care and make sure whole
> secondary CPUs were in LP2 by checking CPU and power gate status.
> After that, the CPU0 can go into LP2 safely. Then power gating the
> CPU rail.

> diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c

> +static bool tegra30_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
> +					 struct cpuidle_driver *drv,
> +					 int index)
> +{
> +	struct cpuidle_state *state = &drv->states[index];
> +	u32 cpu_on_time = state->exit_latency;
> +	u32 cpu_off_time = state->target_residency - state->exit_latency;
> +
> +	if (num_online_cpus() > 1 && !tegra_cpu_rail_off_ready()) {

Should that be || not &&?

Isn't the "num_online_cpus() > 1" condition effectively checked at the
call site, i.e. in tegra30_idle_lp2() below via the if (last_cpu) check?

> @@ -85,16 +108,22 @@ static int __cpuinit tegra30_idle_lp2(struct cpuidle_device *dev,
>  				      int index)
>  {
>  	bool entered_lp2 = false;
> +	bool last_cpu;
>  
>  	local_fiq_disable();
>  
> +	last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
> +	if (dev->cpu == 0) {
> +		if (last_cpu)
> +			entered_lp2 = tegra30_idle_enter_lp2_cpu_0(dev, drv,
> +								   index);
> +		else
> +			cpu_do_idle();
> +	} else {
>  		entered_lp2 = tegra30_idle_enter_lp2_cpu_n(dev, drv, index);
> +	}

Hmm. That means that if the last CPU to enter LP2 is e.g. CPU1, then
even though all CPUs are now in LP2, the complex as a whole doesn't
enter LP2. Is there a way to make the cluster as a whole enter LP2 in
this case? Isn't that what coupled cpuidle is for?

> diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c

> +static void set_power_timers(unsigned long us_on, unsigned long us_off)

> +	if (tegra_pclk == NULL) {
> +		tegra_pclk = clk_get_sys(NULL, "pclk");
> +		if (IS_ERR(tegra_pclk)) {
> +			/*
> +			 * pclk not been init or not exist.
> +			 * Use sclk to take the place of it.
> +			 * The default setting was pclk=sclk.
> +			 */
> +			tegra_pclk = clk_get_sys(NULL, "sclk");
> +		}
> +	}

That's a little odd. Surely the HW has pclk or it doesn't? Why use
different clocks at different times for what is apparently the same thing?
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Lo - Oct. 11, 2012, 11:24 a.m.
On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
> On 10/08/2012 04:26 AM, Joseph Lo wrote:
> > The cpuidle LP2 is a power gating idle mode. It support power gating
> > vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
> > be last one to go into LP2. We need to take care and make sure whole
> > secondary CPUs were in LP2 by checking CPU and power gate status.
> > After that, the CPU0 can go into LP2 safely. Then power gating the
> > CPU rail.
> 
> > diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
> 
> > +static bool tegra30_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
> > +					 struct cpuidle_driver *drv,
> > +					 int index)
> > +{
> > +	struct cpuidle_state *state = &drv->states[index];
> > +	u32 cpu_on_time = state->exit_latency;
> > +	u32 cpu_off_time = state->target_residency - state->exit_latency;
> > +
> > +	if (num_online_cpus() > 1 && !tegra_cpu_rail_off_ready()) {
> 
> Should that be || not &&?
> 
> Isn't the "num_online_cpus() > 1" condition effectively checked at the
> call site, i.e. in tegra30_idle_lp2() below via the if (last_cpu) check?
> 

Should be "&&" here.
Because we need to check if there are still multi CPUs online, then we
need to make sure all the secondary CPUs be power gated first. After all
the secondary CPUs been power gated, the CPU0 could go into LP2 and the
CPU rail could be shut off.
If all the secondary CPUs been hot plugged, then the "num_online_cpus()
>1" would be always false. Then the CPU0 can go into LP2 directly.

So it was used to check are there multi cpus online or not? It's
difference with the last_cpu check below. The last_cpu was used to check
all the CPUs were in LP2 process or not. If the CPU0 is the last one
went into LP2 process, then it would be true.

So the point here is. We can avoid to check the power status of the
secodarys CPUs if they be unplugged.

> > @@ -85,16 +108,22 @@ static int __cpuinit tegra30_idle_lp2(struct cpuidle_device *dev,
> >  				      int index)
> >  {
> >  	bool entered_lp2 = false;
> > +	bool last_cpu;
> >  
> >  	local_fiq_disable();
> >  
> > +	last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
> > +	if (dev->cpu == 0) {
> > +		if (last_cpu)
> > +			entered_lp2 = tegra30_idle_enter_lp2_cpu_0(dev, drv,
> > +								   index);
> > +		else
> > +			cpu_do_idle();
> > +	} else {
> >  		entered_lp2 = tegra30_idle_enter_lp2_cpu_n(dev, drv, index);
> > +	}
> 
> Hmm. That means that if the last CPU to enter LP2 is e.g. CPU1, then
> even though all CPUs are now in LP2, the complex as a whole doesn't
> enter LP2. Is there a way to make the cluster as a whole enter LP2 in
> this case? Isn't that what coupled cpuidle is for?
> 

It may look like the coupled cpuidle can satisfy the usage here. But it
didn't. Please check the criteria of coupled cpuidle. 

/*
 * To use coupled cpuidle states, a cpuidle driver must:
 *
 *    Set struct cpuidle_device.coupled_cpus to the mask of all
 *    coupled cpus, usually the same as cpu_possible_mask if all cpus
 *    are part of the same cluster.  The coupled_cpus mask must be
 *    set in the struct cpuidle_device for each cpu.
 *
 *    Set struct cpuidle_device.safe_state to a state that is not a
 *    coupled state.  This is usually WFI.
 *
 *    Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
 *    state that affects multiple cpus.
 *
 *    Provide a struct cpuidle_state.enter function for each state
 *    that affects multiple cpus.  This function is guaranteed to be
 *    called on all cpus at approximately the same time.  The driver
 *    should ensure that the cpus all abort together if any cpu tries
 *    to abort once the function is called.  The function should return
 *    with interrupts still disabled.
 */

The Tegra30 can support the secondary CPUs go into LP2 (power-gate)
independently. The limitation of the CPU0 is the CPU0 must be the last
one to go into LP2 to shut off CPU rail.

It also no need for every CPU to leave LP2 at the same time. The CPU0
would be always the first one that woken up from LP2. But all the other
secondary CPUs can still keep in LP2. One of the secondary CPUs can also
be woken up alone, if the CPU0 already up.

> > diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
> 
> > +static void set_power_timers(unsigned long us_on, unsigned long us_off)
> 
> > +	if (tegra_pclk == NULL) {
> > +		tegra_pclk = clk_get_sys(NULL, "pclk");
> > +		if (IS_ERR(tegra_pclk)) {
> > +			/*
> > +			 * pclk not been init or not exist.
> > +			 * Use sclk to take the place of it.
> > +			 * The default setting was pclk=sclk.
> > +			 */
> > +			tegra_pclk = clk_get_sys(NULL, "sclk");
> > +		}
> > +	}
> 
> That's a little odd. Surely the HW has pclk or it doesn't? Why use
> different clocks at different times for what is apparently the same thing?

It just because the "pclk" is not available on the Tegra30's clock
framework but Tegra20 right now.

Thanks,
Joseph

--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Warren - Oct. 11, 2012, 4:37 p.m.
On 10/11/2012 05:24 AM, Joseph Lo wrote:
> On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
>> On 10/08/2012 04:26 AM, Joseph Lo wrote:
>>> The cpuidle LP2 is a power gating idle mode. It support power gating
>>> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
>>> be last one to go into LP2. We need to take care and make sure whole
>>> secondary CPUs were in LP2 by checking CPU and power gate status.
>>> After that, the CPU0 can go into LP2 safely. Then power gating the
>>> CPU rail.
>>
>>> diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
>>
>>> +static bool tegra30_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
>>> +					 struct cpuidle_driver *drv,
>>> +					 int index)
>>> +{
>>> +	struct cpuidle_state *state = &drv->states[index];
>>> +	u32 cpu_on_time = state->exit_latency;
>>> +	u32 cpu_off_time = state->target_residency - state->exit_latency;
>>> +
>>> +	if (num_online_cpus() > 1 && !tegra_cpu_rail_off_ready()) {
>>
>> Should that be || not &&?
>>
>> Isn't the "num_online_cpus() > 1" condition effectively checked at the
>> call site, i.e. in tegra30_idle_lp2() below via the if (last_cpu) check?
>>
> 
> Should be "&&" here.
> Because we need to check if there are still multi CPUs online, then we
> need to make sure all the secondary CPUs be power gated first. After all
> the secondary CPUs been power gated, the CPU0 could go into LP2 and the
> CPU rail could be shut off.
> If all the secondary CPUs been hot plugged, then the "num_online_cpus()
>> 1" would be always false. Then the CPU0 can go into LP2 directly.
> 
> So it was used to check are there multi cpus online or not? It's
> difference with the last_cpu check below. The last_cpu was used to check
> all the CPUs were in LP2 process or not. If the CPU0 is the last one
> went into LP2 process, then it would be true.
> 
> So the point here is. We can avoid to check the power status of the
> secodarys CPUs if they be unplugged.

OK, so this condition is about ignoring the result of
tegra_cpu_rail_off_ready() if there is only 1 CPU online. That makes
sense, since we know in that case there cannot be any other CPUs to
check if they're in LP2 or not.

But what about the case where 2 CPUs are online and 2 offline. In that
case, num_online_cpus() > 1, so the call to tegra_cpu_rail_off_ready()
is skipped. Yet, with 2 CPUs online, we do need to check whichever other
CPU is online to see if it's in LP2 or not.

I think what we need to do is the following:

cpus_in_lp2_mask = generate_mask_from_pmc_registers();
if (cpus_in_lp2_mask != cpus_online_mask) {
    cpu_do_idle();
    return;
}
enter lp2;

right?

>>> @@ -85,16 +108,22 @@ static int __cpuinit tegra30_idle_lp2(struct cpuidle_device *dev,
>>>  				      int index)
>>>  {
>>>  	bool entered_lp2 = false;
>>> +	bool last_cpu;
>>>  
>>>  	local_fiq_disable();
>>>  
>>> +	last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
>>> +	if (dev->cpu == 0) {
>>> +		if (last_cpu)
>>> +			entered_lp2 = tegra30_idle_enter_lp2_cpu_0(dev, drv,
>>> +								   index);
>>> +		else
>>> +			cpu_do_idle();
>>> +	} else {
>>>  		entered_lp2 = tegra30_idle_enter_lp2_cpu_n(dev, drv, index);
>>> +	}
>>
>> Hmm. That means that if the last CPU to enter LP2 is e.g. CPU1, then
>> even though all CPUs are now in LP2, the complex as a whole doesn't
>> enter LP2. Is there a way to make the cluster as a whole enter LP2 in
>> this case? Isn't that what coupled cpuidle is for?
> 
> It may look like the coupled cpuidle can satisfy the usage here. But it
> didn't. Please check the criteria of coupled cpuidle. 

What about the first part of the question. What happens if:

CPU3 enters LP2
CPU2 enters LP2
CPU0 enters LP2
CPU1 enters LP2

Since CPU1 is not CPU0, tegra30_idle_enter_lp2_cpu_n() is called, and
hence I think the whole CPU complex is never rail-gated (just each CPU
is power-gated) even though all CPUs are in LP2 and the complex could be
rail-gated. Isn't this missing out on power-savings?

So, we either need to:

a) Make tegra30_idle_enter_lp2_cpu_n() rail-gate if the last CPU is
entering LP2, and then I'm not sure the implementation would be any
different to tegra30_idle_enter_lp2_cpu_0, would it?

b) If CPUn can't trigger rail-gating, then when CPUn is the last to
enter LP2 of the whole complex, it needs to IPI to CPU0 to tell it to
rail-gate, and simply power-gate itself. I believe this IPI interaction
is exactly what coupled cpuidle is about, isn't it?

> /*
>  * To use coupled cpuidle states, a cpuidle driver must:
>  *
>  *    Set struct cpuidle_device.coupled_cpus to the mask of all
>  *    coupled cpus, usually the same as cpu_possible_mask if all cpus
>  *    are part of the same cluster.  The coupled_cpus mask must be
>  *    set in the struct cpuidle_device for each cpu.
>  *
>  *    Set struct cpuidle_device.safe_state to a state that is not a
>  *    coupled state.  This is usually WFI.
>  *
>  *    Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
>  *    state that affects multiple cpus.
>  *
>  *    Provide a struct cpuidle_state.enter function for each state
>  *    that affects multiple cpus.  This function is guaranteed to be
>  *    called on all cpus at approximately the same time.  The driver
>  *    should ensure that the cpus all abort together if any cpu tries
>  *    to abort once the function is called.  The function should return
>  *    with interrupts still disabled.
>  */
> 
> The Tegra30 can support the secondary CPUs go into LP2 (power-gate)
> independently.

I think that just means that the safe state for CPUn (i.e. not CPU0) can
do better than WFI on Tegra30, even though it can't on Tegra20.

> The limitation of the CPU0 is the CPU0 must be the last
> one to go into LP2 to shut off CPU rail.
> 
> It also no need for every CPU to leave LP2 at the same time. The CPU0
> would be always the first one that woken up from LP2. But all the other
> secondary CPUs can still keep in LP2. One of the secondary CPUs can also
> be woken up alone, if the CPU0 already up.

That seems like an implementation detail. Perhaps coupled cpuidle needs
to be enhanced to best support Tegra30?

>>> diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
>>
>>> +static void set_power_timers(unsigned long us_on, unsigned long us_off)
>>
>>> +	if (tegra_pclk == NULL) {
>>> +		tegra_pclk = clk_get_sys(NULL, "pclk");
>>> +		if (IS_ERR(tegra_pclk)) {
>>> +			/*
>>> +			 * pclk not been init or not exist.
>>> +			 * Use sclk to take the place of it.
>>> +			 * The default setting was pclk=sclk.
>>> +			 */
>>> +			tegra_pclk = clk_get_sys(NULL, "sclk");
>>> +		}
>>> +	}
>>
>> That's a little odd. Surely the HW has pclk or it doesn't? Why use
>> different clocks at different times for what is apparently the same thing?
> 
> It just because the "pclk" is not available on the Tegra30's clock
> framework but Tegra20 right now.

We should just fix that instead of working around it then. I assume it's
a simple matter of adding the appropriate clock definition?
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Colin Cross - Oct. 11, 2012, 4:48 p.m.
On Thu, Oct 11, 2012 at 9:37 AM, Stephen Warren <swarren@wwwdotorg.org> wrote:
> On 10/11/2012 05:24 AM, Joseph Lo wrote:
>> On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
>>> On 10/08/2012 04:26 AM, Joseph Lo wrote:
>>>> The cpuidle LP2 is a power gating idle mode. It support power gating
>>>> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
>>>> be last one to go into LP2. We need to take care and make sure whole
>>>> secondary CPUs were in LP2 by checking CPU and power gate status.
>>>> After that, the CPU0 can go into LP2 safely. Then power gating the
>>>> CPU rail.

<snip>

>>>> @@ -85,16 +108,22 @@ static int __cpuinit tegra30_idle_lp2(struct cpuidle_device *dev,
>>>>                                   int index)
>>>>  {
>>>>     bool entered_lp2 = false;
>>>> +   bool last_cpu;
>>>>
>>>>     local_fiq_disable();
>>>>
>>>> +   last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
>>>> +   if (dev->cpu == 0) {
>>>> +           if (last_cpu)
>>>> +                   entered_lp2 = tegra30_idle_enter_lp2_cpu_0(dev, drv,
>>>> +                                                              index);
>>>> +           else
>>>> +                   cpu_do_idle();
>>>> +   } else {
>>>>             entered_lp2 = tegra30_idle_enter_lp2_cpu_n(dev, drv, index);
>>>> +   }
>>>
>>> Hmm. That means that if the last CPU to enter LP2 is e.g. CPU1, then
>>> even though all CPUs are now in LP2, the complex as a whole doesn't
>>> enter LP2. Is there a way to make the cluster as a whole enter LP2 in
>>> this case? Isn't that what coupled cpuidle is for?
>>
>> It may look like the coupled cpuidle can satisfy the usage here. But it
>> didn't. Please check the criteria of coupled cpuidle.
>
> What about the first part of the question. What happens if:
>
> CPU3 enters LP2
> CPU2 enters LP2
> CPU0 enters LP2
> CPU1 enters LP2
>
> Since CPU1 is not CPU0, tegra30_idle_enter_lp2_cpu_n() is called, and
> hence I think the whole CPU complex is never rail-gated (just each CPU
> is power-gated) even though all CPUs are in LP2 and the complex could be
> rail-gated. Isn't this missing out on power-savings?
>
> So, we either need to:
>
> a) Make tegra30_idle_enter_lp2_cpu_n() rail-gate if the last CPU is
> entering LP2, and then I'm not sure the implementation would be any
> different to tegra30_idle_enter_lp2_cpu_0, would it?
>
> b) If CPUn can't trigger rail-gating, then when CPUn is the last to
> enter LP2 of the whole complex, it needs to IPI to CPU0 to tell it to
> rail-gate, and simply power-gate itself. I believe this IPI interaction
> is exactly what coupled cpuidle is about, isn't it?
>
>> /*
>>  * To use coupled cpuidle states, a cpuidle driver must:
>>  *
>>  *    Set struct cpuidle_device.coupled_cpus to the mask of all
>>  *    coupled cpus, usually the same as cpu_possible_mask if all cpus
>>  *    are part of the same cluster.  The coupled_cpus mask must be
>>  *    set in the struct cpuidle_device for each cpu.
>>  *
>>  *    Set struct cpuidle_device.safe_state to a state that is not a
>>  *    coupled state.  This is usually WFI.
>>  *
>>  *    Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
>>  *    state that affects multiple cpus.
>>  *
>>  *    Provide a struct cpuidle_state.enter function for each state
>>  *    that affects multiple cpus.  This function is guaranteed to be
>>  *    called on all cpus at approximately the same time.  The driver
>>  *    should ensure that the cpus all abort together if any cpu tries
>>  *    to abort once the function is called.  The function should return
>>  *    with interrupts still disabled.
>>  */
>>
>> The Tegra30 can support the secondary CPUs go into LP2 (power-gate)
>> independently.
>
> I think that just means that the safe state for CPUn (i.e. not CPU0) can
> do better than WFI on Tegra30, even though it can't on Tegra20.

Exactly.

>> The limitation of the CPU0 is the CPU0 must be the last
>> one to go into LP2 to shut off CPU rail.
>>
>> It also no need for every CPU to leave LP2 at the same time. The CPU0
>> would be always the first one that woken up from LP2. But all the other
>> secondary CPUs can still keep in LP2. One of the secondary CPUs can also
>> be woken up alone, if the CPU0 already up.
>
> That seems like an implementation detail. Perhaps coupled cpuidle needs
> to be enhanced to best support Tegra30?

As is, coupled cpuidle will work on Tegra30, but it will unnecessarily
wake up the secondary cpus during the transitions to off and back on
again.  Those cpus will immediately go back to single-cpu LP2, so it
may not be a big deal, but there is a small optimization I've
discussed with a few other people that could avoid waking them up.  I
suggest adding an extra pre-idle hook to the Tegra30 that is called by
coupled cpuidle on the last cpu to go down.  It would return a cpumask
of cpus that have been prepared for idle by guaranteeing that they
will not wake up from an interrupt, and therefore don't need to be
woken up for the transitions.  I haven't worked with a cpu that needs
this optimization yet, so I haven't done it.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Lo - Oct. 12, 2012, 7:07 a.m.
On Fri, 2012-10-12 at 00:37 +0800, Stephen Warren wrote:
> On 10/11/2012 05:24 AM, Joseph Lo wrote:
> > On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
> >> On 10/08/2012 04:26 AM, Joseph Lo wrote:
> >>> The cpuidle LP2 is a power gating idle mode. It support power gating
> >>> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
> >>> be last one to go into LP2. We need to take care and make sure whole
> >>> secondary CPUs were in LP2 by checking CPU and power gate status.
> >>> After that, the CPU0 can go into LP2 safely. Then power gating the
> >>> CPU rail.
> >>
> >>> diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
> >>
> >>> +static bool tegra30_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
> >>> +					 struct cpuidle_driver *drv,
> >>> +					 int index)
> >>> +{
> >>> +	struct cpuidle_state *state = &drv->states[index];
> >>> +	u32 cpu_on_time = state->exit_latency;
> >>> +	u32 cpu_off_time = state->target_residency - state->exit_latency;
> >>> +
> >>> +	if (num_online_cpus() > 1 && !tegra_cpu_rail_off_ready()) {
> >>
> >> Should that be || not &&?
> >>
> >> Isn't the "num_online_cpus() > 1" condition effectively checked at the
> >> call site, i.e. in tegra30_idle_lp2() below via the if (last_cpu) check?
> >>
> > 
> > Should be "&&" here.
> > Because we need to check if there are still multi CPUs online, then we
> > need to make sure all the secondary CPUs be power gated first. After all
> > the secondary CPUs been power gated, the CPU0 could go into LP2 and the
> > CPU rail could be shut off.
> > If all the secondary CPUs been hot plugged, then the "num_online_cpus()
> >> 1" would be always false. Then the CPU0 can go into LP2 directly.
> > 
> > So it was used to check are there multi cpus online or not? It's
> > difference with the last_cpu check below. The last_cpu was used to check
> > all the CPUs were in LP2 process or not. If the CPU0 is the last one
> > went into LP2 process, then it would be true.
> > 
> > So the point here is. We can avoid to check the power status of the
> > secodarys CPUs if they be unplugged.
> 
> OK, so this condition is about ignoring the result of
> tegra_cpu_rail_off_ready() if there is only 1 CPU online. That makes
> sense, since we know in that case there cannot be any other CPUs to
> check if they're in LP2 or not.
> 
> But what about the case where 2 CPUs are online and 2 offline. In that
> case, num_online_cpus() > 1, so the call to tegra_cpu_rail_off_ready()
> is skipped. Yet, with 2 CPUs online, we do need to check whichever other
> CPU is online to see if it's in LP2 or not.
> 
> I think what we need to do is the following:
> 
> cpus_in_lp2_mask = generate_mask_from_pmc_registers();
> if (cpus_in_lp2_mask != cpus_online_mask) {
>     cpu_do_idle();
>     return;
> }
> enter lp2;
> 
> right?
> 

We are not only check the cpu_in_lp2_mask here. The most important thing
here was to check all the other secondary CPUs were already been power
gated. We need to check the physical power status of the CPUs not
logical status.

When there are only 2 CPU online, the "num_online_cpus() > 1" would be
true and the "tegra_cpu_rail_off_ready" still would be checked.

> >>> @@ -85,16 +108,22 @@ static int __cpuinit tegra30_idle_lp2(struct cpuidle_device *dev,
> >>>  				      int index)
> >>>  {
> >>>  	bool entered_lp2 = false;
> >>> +	bool last_cpu;
> >>>  
> >>>  	local_fiq_disable();
> >>>  
> >>> +	last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
> >>> +	if (dev->cpu == 0) {
> >>> +		if (last_cpu)
> >>> +			entered_lp2 = tegra30_idle_enter_lp2_cpu_0(dev, drv,
> >>> +								   index);
> >>> +		else
> >>> +			cpu_do_idle();
> >>> +	} else {
> >>>  		entered_lp2 = tegra30_idle_enter_lp2_cpu_n(dev, drv, index);
> >>> +	}
> >>
> >> Hmm. That means that if the last CPU to enter LP2 is e.g. CPU1, then
> >> even though all CPUs are now in LP2, the complex as a whole doesn't
> >> enter LP2. Is there a way to make the cluster as a whole enter LP2 in
> >> this case? Isn't that what coupled cpuidle is for?
> > 
> > It may look like the coupled cpuidle can satisfy the usage here. But it
> > didn't. Please check the criteria of coupled cpuidle. 
> 
> What about the first part of the question. What happens if:
> 
> CPU3 enters LP2
> CPU2 enters LP2
> CPU0 enters LP2
> CPU1 enters LP2
> 
> Since CPU1 is not CPU0, tegra30_idle_enter_lp2_cpu_n() is called, and
> hence I think the whole CPU complex is never rail-gated (just each CPU
> is power-gated) even though all CPUs are in LP2 and the complex could be
> rail-gated. Isn't this missing out on power-savings?

Yes, indeed.
> 
> So, we either need to:
> 
> a) Make tegra30_idle_enter_lp2_cpu_n() rail-gate if the last CPU is
> entering LP2, and then I'm not sure the implementation would be any
> different to tegra30_idle_enter_lp2_cpu_0, would it?
> 
No, we need to make sure the CPU0 is the last one go into LP2 state and
all other CPUs already be power gated. This is the only safe way to shut
off vdd_cpu rail (HW limitation).

> b) If CPUn can't trigger rail-gating, then when CPUn is the last to
> enter LP2 of the whole complex, it needs to IPI to CPU0 to tell it to
> rail-gate, and simply power-gate itself. I believe this IPI interaction
> is exactly what coupled cpuidle is about, isn't it?
> 

Yes, indeed. Actually, I had tried the coupled cpuidle on Tegra20. I
knew it a lot. But I met issues when porting it. It looks like a race
condition and becomes a dead lock caused by IPI missing. Anyway, we can
talk about it more detail when I try to upstream the coupled cpuidle
support for Tegra later.

> > /*
> >  * To use coupled cpuidle states, a cpuidle driver must:
> >  *
> >  *    Set struct cpuidle_device.coupled_cpus to the mask of all
> >  *    coupled cpus, usually the same as cpu_possible_mask if all cpus
> >  *    are part of the same cluster.  The coupled_cpus mask must be
> >  *    set in the struct cpuidle_device for each cpu.
> >  *
> >  *    Set struct cpuidle_device.safe_state to a state that is not a
> >  *    coupled state.  This is usually WFI.
> >  *
> >  *    Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
> >  *    state that affects multiple cpus.
> >  *
> >  *    Provide a struct cpuidle_state.enter function for each state
> >  *    that affects multiple cpus.  This function is guaranteed to be
> >  *    called on all cpus at approximately the same time.  The driver
> >  *    should ensure that the cpus all abort together if any cpu tries
> >  *    to abort once the function is called.  The function should return
> >  *    with interrupts still disabled.
> >  */
> > 
> > The Tegra30 can support the secondary CPUs go into LP2 (power-gate)
> > independently.
> 
> I think that just means that the safe state for CPUn (i.e. not CPU0) can
> do better than WFI on Tegra30, even though it can't on Tegra20.
> 
Yes, I understood.

> > The limitation of the CPU0 is the CPU0 must be the last
> > one to go into LP2 to shut off CPU rail.
> > 
> > It also no need for every CPU to leave LP2 at the same time. The CPU0
> > would be always the first one that woken up from LP2. But all the other
> > secondary CPUs can still keep in LP2. One of the secondary CPUs can also
> > be woken up alone, if the CPU0 already up.
> 
> That seems like an implementation detail. Perhaps coupled cpuidle needs
> to be enhanced to best support Tegra30?
> 
Yes. If the coupled cpuidle could support the CPUs can randomly leave
the coupled state, it would be perfect.

> >>> diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
> >>
> >>> +static void set_power_timers(unsigned long us_on, unsigned long us_off)
> >>
> >>> +	if (tegra_pclk == NULL) {
> >>> +		tegra_pclk = clk_get_sys(NULL, "pclk");
> >>> +		if (IS_ERR(tegra_pclk)) {
> >>> +			/*
> >>> +			 * pclk not been init or not exist.
> >>> +			 * Use sclk to take the place of it.
> >>> +			 * The default setting was pclk=sclk.
> >>> +			 */
> >>> +			tegra_pclk = clk_get_sys(NULL, "sclk");
> >>> +		}
> >>> +	}
> >>
> >> That's a little odd. Surely the HW has pclk or it doesn't? Why use
> >> different clocks at different times for what is apparently the same thing?
> > 
> > It just because the "pclk" is not available on the Tegra30's clock
> > framework but Tegra20 right now.
> 
> We should just fix that instead of working around it then. I assume it's
> a simple matter of adding the appropriate clock definition?

OK. I will add the "pclk" clock interface for Tegra30.

Thanks,
Joseph


--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Lo - Oct. 12, 2012, 7:11 a.m.
On Fri, 2012-10-12 at 00:48 +0800, Colin Cross wrote:
> On Thu, Oct 11, 2012 at 9:37 AM, Stephen Warren <swarren@wwwdotorg.org> wrote:
> > On 10/11/2012 05:24 AM, Joseph Lo wrote:
> >> On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
> >>> On 10/08/2012 04:26 AM, Joseph Lo wrote:
> >>>> The cpuidle LP2 is a power gating idle mode. It support power gating
> >>>> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
> >>>> be last one to go into LP2. We need to take care and make sure whole
> >>>> secondary CPUs were in LP2 by checking CPU and power gate status.
> >>>> After that, the CPU0 can go into LP2 safely. Then power gating the
> >>>> CPU rail.
> 
> <snip>
> 
> >>>> @@ -85,16 +108,22 @@ static int __cpuinit tegra30_idle_lp2(struct cpuidle_device *dev,
> >>>>                                   int index)
> >>>>  {
> >>>>     bool entered_lp2 = false;
> >>>> +   bool last_cpu;
> >>>>
> >>>>     local_fiq_disable();
> >>>>
> >>>> +   last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
> >>>> +   if (dev->cpu == 0) {
> >>>> +           if (last_cpu)
> >>>> +                   entered_lp2 = tegra30_idle_enter_lp2_cpu_0(dev, drv,
> >>>> +                                                              index);
> >>>> +           else
> >>>> +                   cpu_do_idle();
> >>>> +   } else {
> >>>>             entered_lp2 = tegra30_idle_enter_lp2_cpu_n(dev, drv, index);
> >>>> +   }
> >>>
> >>> Hmm. That means that if the last CPU to enter LP2 is e.g. CPU1, then
> >>> even though all CPUs are now in LP2, the complex as a whole doesn't
> >>> enter LP2. Is there a way to make the cluster as a whole enter LP2 in
> >>> this case? Isn't that what coupled cpuidle is for?
> >>
> >> It may look like the coupled cpuidle can satisfy the usage here. But it
> >> didn't. Please check the criteria of coupled cpuidle.
> >
> > What about the first part of the question. What happens if:
> >
> > CPU3 enters LP2
> > CPU2 enters LP2
> > CPU0 enters LP2
> > CPU1 enters LP2
> >
> > Since CPU1 is not CPU0, tegra30_idle_enter_lp2_cpu_n() is called, and
> > hence I think the whole CPU complex is never rail-gated (just each CPU
> > is power-gated) even though all CPUs are in LP2 and the complex could be
> > rail-gated. Isn't this missing out on power-savings?
> >
> > So, we either need to:
> >
> > a) Make tegra30_idle_enter_lp2_cpu_n() rail-gate if the last CPU is
> > entering LP2, and then I'm not sure the implementation would be any
> > different to tegra30_idle_enter_lp2_cpu_0, would it?
> >
> > b) If CPUn can't trigger rail-gating, then when CPUn is the last to
> > enter LP2 of the whole complex, it needs to IPI to CPU0 to tell it to
> > rail-gate, and simply power-gate itself. I believe this IPI interaction
> > is exactly what coupled cpuidle is about, isn't it?
> >
> >> /*
> >>  * To use coupled cpuidle states, a cpuidle driver must:
> >>  *
> >>  *    Set struct cpuidle_device.coupled_cpus to the mask of all
> >>  *    coupled cpus, usually the same as cpu_possible_mask if all cpus
> >>  *    are part of the same cluster.  The coupled_cpus mask must be
> >>  *    set in the struct cpuidle_device for each cpu.
> >>  *
> >>  *    Set struct cpuidle_device.safe_state to a state that is not a
> >>  *    coupled state.  This is usually WFI.
> >>  *
> >>  *    Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
> >>  *    state that affects multiple cpus.
> >>  *
> >>  *    Provide a struct cpuidle_state.enter function for each state
> >>  *    that affects multiple cpus.  This function is guaranteed to be
> >>  *    called on all cpus at approximately the same time.  The driver
> >>  *    should ensure that the cpus all abort together if any cpu tries
> >>  *    to abort once the function is called.  The function should return
> >>  *    with interrupts still disabled.
> >>  */
> >>
> >> The Tegra30 can support the secondary CPUs go into LP2 (power-gate)
> >> independently.
> >
> > I think that just means that the safe state for CPUn (i.e. not CPU0) can
> > do better than WFI on Tegra30, even though it can't on Tegra20.
> 
> Exactly.
> 
> >> The limitation of the CPU0 is the CPU0 must be the last
> >> one to go into LP2 to shut off CPU rail.
> >>
> >> It also no need for every CPU to leave LP2 at the same time. The CPU0
> >> would be always the first one that woken up from LP2. But all the other
> >> secondary CPUs can still keep in LP2. One of the secondary CPUs can also
> >> be woken up alone, if the CPU0 already up.
> >
> > That seems like an implementation detail. Perhaps coupled cpuidle needs
> > to be enhanced to best support Tegra30?
> 
> As is, coupled cpuidle will work on Tegra30, but it will unnecessarily
> wake up the secondary cpus during the transitions to off and back on
> again.  Those cpus will immediately go back to single-cpu LP2, so it
> may not be a big deal, but there is a small optimization I've
> discussed with a few other people that could avoid waking them up.  I
> suggest adding an extra pre-idle hook to the Tegra30 that is called by
> coupled cpuidle on the last cpu to go down.  It would return a cpumask
> of cpus that have been prepared for idle by guaranteeing that they
> will not wake up from an interrupt, and therefore don't need to be
> woken up for the transitions.  I haven't worked with a cpu that needs
> this optimization yet, so I haven't done it.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Lo - Oct. 12, 2012, 7:40 a.m.
On Fri, 2012-10-12 at 00:48 +0800, Colin Cross wrote:
> On Thu, Oct 11, 2012 at 9:37 AM, Stephen Warren <swarren@wwwdotorg.org> wrote:
> > On 10/11/2012 05:24 AM, Joseph Lo wrote:
> >> On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
> >>> On 10/08/2012 04:26 AM, Joseph Lo wrote:
> >>>> The cpuidle LP2 is a power gating idle mode. It support power gating
> >>>> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
> >>>> be last one to go into LP2. We need to take care and make sure whole
> >>>> secondary CPUs were in LP2 by checking CPU and power gate status.
> >>>> After that, the CPU0 can go into LP2 safely. Then power gating the
> >>>> CPU rail.
> 
> <snip>
> 
> >>>> @@ -85,16 +108,22 @@ static int __cpuinit tegra30_idle_lp2(struct cpuidle_device *dev,
> >>>>                                   int index)
> >>>>  {
> >>>>     bool entered_lp2 = false;
> >>>> +   bool last_cpu;
> >>>>
> >>>>     local_fiq_disable();
> >>>>
> >>>> +   last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
> >>>> +   if (dev->cpu == 0) {
> >>>> +           if (last_cpu)
> >>>> +                   entered_lp2 = tegra30_idle_enter_lp2_cpu_0(dev, drv,
> >>>> +                                                              index);
> >>>> +           else
> >>>> +                   cpu_do_idle();
> >>>> +   } else {
> >>>>             entered_lp2 = tegra30_idle_enter_lp2_cpu_n(dev, drv, index);
> >>>> +   }
> >>>
> >>> Hmm. That means that if the last CPU to enter LP2 is e.g. CPU1, then
> >>> even though all CPUs are now in LP2, the complex as a whole doesn't
> >>> enter LP2. Is there a way to make the cluster as a whole enter LP2 in
> >>> this case? Isn't that what coupled cpuidle is for?
> >>
> >> It may look like the coupled cpuidle can satisfy the usage here. But it
> >> didn't. Please check the criteria of coupled cpuidle.
> >
> > What about the first part of the question. What happens if:
> >
> > CPU3 enters LP2
> > CPU2 enters LP2
> > CPU0 enters LP2
> > CPU1 enters LP2
> >
> > Since CPU1 is not CPU0, tegra30_idle_enter_lp2_cpu_n() is called, and
> > hence I think the whole CPU complex is never rail-gated (just each CPU
> > is power-gated) even though all CPUs are in LP2 and the complex could be
> > rail-gated. Isn't this missing out on power-savings?
> >
> > So, we either need to:
> >
> > a) Make tegra30_idle_enter_lp2_cpu_n() rail-gate if the last CPU is
> > entering LP2, and then I'm not sure the implementation would be any
> > different to tegra30_idle_enter_lp2_cpu_0, would it?
> >
> > b) If CPUn can't trigger rail-gating, then when CPUn is the last to
> > enter LP2 of the whole complex, it needs to IPI to CPU0 to tell it to
> > rail-gate, and simply power-gate itself. I believe this IPI interaction
> > is exactly what coupled cpuidle is about, isn't it?
> >
> >> /*
> >>  * To use coupled cpuidle states, a cpuidle driver must:
> >>  *
> >>  *    Set struct cpuidle_device.coupled_cpus to the mask of all
> >>  *    coupled cpus, usually the same as cpu_possible_mask if all cpus
> >>  *    are part of the same cluster.  The coupled_cpus mask must be
> >>  *    set in the struct cpuidle_device for each cpu.
> >>  *
> >>  *    Set struct cpuidle_device.safe_state to a state that is not a
> >>  *    coupled state.  This is usually WFI.
> >>  *
> >>  *    Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
> >>  *    state that affects multiple cpus.
> >>  *
> >>  *    Provide a struct cpuidle_state.enter function for each state
> >>  *    that affects multiple cpus.  This function is guaranteed to be
> >>  *    called on all cpus at approximately the same time.  The driver
> >>  *    should ensure that the cpus all abort together if any cpu tries
> >>  *    to abort once the function is called.  The function should return
> >>  *    with interrupts still disabled.
> >>  */
> >>
> >> The Tegra30 can support the secondary CPUs go into LP2 (power-gate)
> >> independently.
> >
> > I think that just means that the safe state for CPUn (i.e. not CPU0) can
> > do better than WFI on Tegra30, even though it can't on Tegra20.
> 
> Exactly.
> 
> >> The limitation of the CPU0 is the CPU0 must be the last
> >> one to go into LP2 to shut off CPU rail.
> >>
> >> It also no need for every CPU to leave LP2 at the same time. The CPU0
> >> would be always the first one that woken up from LP2. But all the other
> >> secondary CPUs can still keep in LP2. One of the secondary CPUs can also
> >> be woken up alone, if the CPU0 already up.
> >
> > That seems like an implementation detail. Perhaps coupled cpuidle needs
> > to be enhanced to best support Tegra30?
> 
> As is, coupled cpuidle will work on Tegra30, but it will unnecessarily
> wake up the secondary cpus during the transitions to off and back on
> again.  Those cpus will immediately go back to single-cpu LP2, so it
> may not be a big deal, but there is a small optimization I've
> discussed with a few other people that could avoid waking them up.  I
> suggest adding an extra pre-idle hook to the Tegra30 that is called by
> coupled cpuidle on the last cpu to go down.  It would return a cpumask
> of cpus that have been prepared for idle by guaranteeing that they
> will not wake up from an interrupt, and therefore don't need to be
> woken up for the transitions.  I haven't worked with a cpu that needs
> this optimization yet, so I haven't done it.

Hi Colin,

Thanks for your update. I understand what you are talk about. Actually,
I had tried it in the background for both Tegra20 and Tegra30. But I can
just ran several times of coupled cpuidle state. Then it dead locked in
the coupled cpuidle state. It's look like a race condition and dead lock
by missing IPI. I am sure the GIC was on and the IPI be fired. But
sometimes the CPU just can't catch the IPI. And I had a WAR for it by
doing a very short local_irq_enable and local_irq_disable in the coupled
cpuidle ready waiting loop. Then everything works. Not sure, did you
meet this before? Any hint about this?

Very interesting about "guaranteeing that they will not wake up from an
interrupt", did these CPUs go into power-gete cpuidle mode by wfe? If
the CPUs not wake up from an interrupt when doing cpuidle, how did them
be woke up? By sending event to these CPUs?

Thanks,
Joseph

--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shawn Guo - Oct. 12, 2012, 7:54 a.m.
On Thu, Oct 11, 2012 at 09:48:45AM -0700, Colin Cross wrote:
> As is, coupled cpuidle will work on Tegra30, but it will unnecessarily
> wake up the secondary cpus during the transitions to off and back on
> again.  Those cpus will immediately go back to single-cpu LP2,

I'm sure coupled cpuidle will work like that.  We have the following
code at the end of  cpuidle_enter_state_coupled() to wait until all
coupled cpus have exited idle.

        while (!cpuidle_coupled_no_cpus_ready(coupled))
                cpu_relax();

The cpu woken up during the transitions will just loop there until all
other 3 cpus exit from idle function.

Shawn

> so it
> may not be a big deal, but there is a small optimization I've
> discussed with a few other people that could avoid waking them up.  I
> suggest adding an extra pre-idle hook to the Tegra30 that is called by
> coupled cpuidle on the last cpu to go down.  It would return a cpumask
> of cpus that have been prepared for idle by guaranteeing that they
> will not wake up from an interrupt, and therefore don't need to be
> woken up for the transitions.  I haven't worked with a cpu that needs
> this optimization yet, so I haven't done it.
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Lo - Oct. 12, 2012, 8:24 a.m.
On Fri, 2012-10-12 at 15:54 +0800, Shawn Guo wrote:
> On Thu, Oct 11, 2012 at 09:48:45AM -0700, Colin Cross wrote:
> > As is, coupled cpuidle will work on Tegra30, but it will unnecessarily
> > wake up the secondary cpus during the transitions to off and back on
> > again.  Those cpus will immediately go back to single-cpu LP2,
> 
> I'm sure coupled cpuidle will work like that.  We have the following
> code at the end of  cpuidle_enter_state_coupled() to wait until all
> coupled cpus have exited idle.
> 
>         while (!cpuidle_coupled_no_cpus_ready(coupled))
>                 cpu_relax();
> 
> The cpu woken up during the transitions will just loop there until all
> other 3 cpus exit from idle function.
> 

Did this a good idea if the CPU was been woken up from an interrupt but
it still needed to wait all other CPUs been woken up then it could
handle the interrupt?

Thanks,
Joseph

--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shawn Guo - Oct. 12, 2012, 8:30 a.m.
On Fri, Oct 12, 2012 at 04:24:24PM +0800, Joseph Lo wrote:
> On Fri, 2012-10-12 at 15:54 +0800, Shawn Guo wrote:
> > On Thu, Oct 11, 2012 at 09:48:45AM -0700, Colin Cross wrote:
> > > As is, coupled cpuidle will work on Tegra30, but it will unnecessarily
> > > wake up the secondary cpus during the transitions to off and back on
> > > again.  Those cpus will immediately go back to single-cpu LP2,
> > 
> > I'm sure coupled cpuidle will work like that.  We have the following
> > code at the end of  cpuidle_enter_state_coupled() to wait until all
> > coupled cpus have exited idle.
> > 
> >         while (!cpuidle_coupled_no_cpus_ready(coupled))
> >                 cpu_relax();
> > 
> > The cpu woken up during the transitions will just loop there until all
> > other 3 cpus exit from idle function.
> > 
> 
> Did this a good idea if the CPU was been woken up from an interrupt but
> it still needed to wait all other CPUs been woken up then it could
> handle the interrupt?
> 
This is how coupled cpuidle gets implemented right now.  And that's
why I see RCU stall warning reported on that cpu when I tried coupled
cpuidle on imx6q (CA9 Quad).

Shawn
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Colin Cross - Oct. 12, 2012, 8:46 p.m.
On Fri, Oct 12, 2012 at 1:24 AM, Joseph Lo <josephl@nvidia.com> wrote:
> On Fri, 2012-10-12 at 15:54 +0800, Shawn Guo wrote:
>> On Thu, Oct 11, 2012 at 09:48:45AM -0700, Colin Cross wrote:
>> > As is, coupled cpuidle will work on Tegra30, but it will unnecessarily
>> > wake up the secondary cpus during the transitions to off and back on
>> > again.  Those cpus will immediately go back to single-cpu LP2,
>>
>> I'm sure coupled cpuidle will work like that.  We have the following
>> code at the end of  cpuidle_enter_state_coupled() to wait until all
>> coupled cpus have exited idle.
>>
>>         while (!cpuidle_coupled_no_cpus_ready(coupled))
>>                 cpu_relax();
>>
>> The cpu woken up during the transitions will just loop there until all
>> other 3 cpus exit from idle function.
>>
>
> Did this a good idea if the CPU was been woken up from an interrupt but
> it still needed to wait all other CPUs been woken up then it could
> handle the interrupt?

Each cpu enables its local interrupts between when it clears it's
ready flag and when it enters the loop above, so it will already be
handling interrupts.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Colin Cross - Oct. 12, 2012, 8:50 p.m.
On Fri, Oct 12, 2012 at 1:30 AM, Shawn Guo <shawn.guo@linaro.org> wrote:
> On Fri, Oct 12, 2012 at 04:24:24PM +0800, Joseph Lo wrote:
>> On Fri, 2012-10-12 at 15:54 +0800, Shawn Guo wrote:
>> > On Thu, Oct 11, 2012 at 09:48:45AM -0700, Colin Cross wrote:
>> > > As is, coupled cpuidle will work on Tegra30, but it will unnecessarily
>> > > wake up the secondary cpus during the transitions to off and back on
>> > > again.  Those cpus will immediately go back to single-cpu LP2,
>> >
>> > I'm sure coupled cpuidle will work like that.  We have the following
>> > code at the end of  cpuidle_enter_state_coupled() to wait until all
>> > coupled cpus have exited idle.
>> >
>> >         while (!cpuidle_coupled_no_cpus_ready(coupled))
>> >                 cpu_relax();
>> >
>> > The cpu woken up during the transitions will just loop there until all
>> > other 3 cpus exit from idle function.
>> >
>>
>> Did this a good idea if the CPU was been woken up from an interrupt but
>> it still needed to wait all other CPUs been woken up then it could
>> handle the interrupt?
>>
> This is how coupled cpuidle gets implemented right now.  And that's
> why I see RCU stall warning reported on that cpu when I tried coupled
> cpuidle on imx6q (CA9 Quad).

Current coupled cpuidle requires all cpus to wake up together and go
back to the idle governor to select a new state, because that's what
all available ARM cpus did when I wrote it.  You should be able to
implement coupled idle on a cpu that does not need to wake all the
cpus if you wake them anyways, and then later you can optimize it to
avoid the extra wakeups when the extension to coupled cpuidle that I
discussed previously gets implemented.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Warren - Oct. 12, 2012, 9:04 p.m.
On 10/12/2012 01:07 AM, Joseph Lo wrote:
> On Fri, 2012-10-12 at 00:37 +0800, Stephen Warren wrote:
>> On 10/11/2012 05:24 AM, Joseph Lo wrote:
>>> On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
>>>> On 10/08/2012 04:26 AM, Joseph Lo wrote:
>>>>> The cpuidle LP2 is a power gating idle mode. It support power gating
>>>>> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
>>>>> be last one to go into LP2. We need to take care and make sure whole
>>>>> secondary CPUs were in LP2 by checking CPU and power gate status.
>>>>> After that, the CPU0 can go into LP2 safely. Then power gating the
>>>>> CPU rail.
>>>>
>>>>> diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
>>>>
>>>>> +static bool tegra30_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
>>>>> +					 struct cpuidle_driver *drv,
>>>>> +					 int index)
>>>>> +{
>>>>> +	struct cpuidle_state *state = &drv->states[index];
>>>>> +	u32 cpu_on_time = state->exit_latency;
>>>>> +	u32 cpu_off_time = state->target_residency - state->exit_latency;
>>>>> +
>>>>> +	if (num_online_cpus() > 1 && !tegra_cpu_rail_off_ready()) {
>>>>
>>>> Should that be || not &&?
>>>>
>>>> Isn't the "num_online_cpus() > 1" condition effectively checked at the
>>>> call site, i.e. in tegra30_idle_lp2() below via the if (last_cpu) check?
>>>>
>>>
>>> Should be "&&" here.
>>> Because we need to check if there are still multi CPUs online, then we
>>> need to make sure all the secondary CPUs be power gated first. After all
>>> the secondary CPUs been power gated, the CPU0 could go into LP2 and the
>>> CPU rail could be shut off.
>>> If all the secondary CPUs been hot plugged, then the "num_online_cpus()
>>>> 1" would be always false. Then the CPU0 can go into LP2 directly.
>>>
>>> So it was used to check are there multi cpus online or not? It's
>>> difference with the last_cpu check below. The last_cpu was used to check
>>> all the CPUs were in LP2 process or not. If the CPU0 is the last one
>>> went into LP2 process, then it would be true.
>>>
>>> So the point here is. We can avoid to check the power status of the
>>> secodarys CPUs if they be unplugged.
>>
>> OK, so this condition is about ignoring the result of
>> tegra_cpu_rail_off_ready() if there is only 1 CPU online. That makes
>> sense, since we know in that case there cannot be any other CPUs to
>> check if they're in LP2 or not.
>>
>> But what about the case where 2 CPUs are online and 2 offline. In that
>> case, num_online_cpus() > 1, so the call to tegra_cpu_rail_off_ready()
>> is skipped.

Uggh. I'm not thinking straight, so what I said there was backwards.

>> b) If CPUn can't trigger rail-gating, then when CPUn is the last to
>> enter LP2 of the whole complex, it needs to IPI to CPU0 to tell it to
>> rail-gate, and simply power-gate itself. I believe this IPI interaction
>> is exactly what coupled cpuidle is about, isn't it?
> 
> Yes, indeed. Actually, I had tried the coupled cpuidle on Tegra20. I
> knew it a lot. But I met issues when porting it. It looks like a race
> condition and becomes a dead lock caused by IPI missing. Anyway, we can
> talk about it more detail when I try to upstream the coupled cpuidle
> support for Tegra later.

Hmm. That sounds a little churny. Why can't we just use coupled cpuidle
right from the start if the plan is to use it eventually anyway? From
other comments, it sounds like you even already have the code basically
working, but just need to iron out a bug or two?
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Lo - Oct. 15, 2012, 7:56 a.m.
On Sat, 2012-10-13 at 05:04 +0800, Stephen Warren wrote:
> On 10/12/2012 01:07 AM, Joseph Lo wrote:
> > On Fri, 2012-10-12 at 00:37 +0800, Stephen Warren wrote:
> >> On 10/11/2012 05:24 AM, Joseph Lo wrote:
> >>> On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
> >>>> On 10/08/2012 04:26 AM, Joseph Lo wrote:
> >>>>> The cpuidle LP2 is a power gating idle mode. It support power gating
> >>>>> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
> >>>>> be last one to go into LP2. We need to take care and make sure whole
> >>>>> secondary CPUs were in LP2 by checking CPU and power gate status.
> >>>>> After that, the CPU0 can go into LP2 safely. Then power gating the
> >>>>> CPU rail.
> >>>>
> >>>>> diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
> >> b) If CPUn can't trigger rail-gating, then when CPUn is the last to
> >> enter LP2 of the whole complex, it needs to IPI to CPU0 to tell it to
> >> rail-gate, and simply power-gate itself. I believe this IPI interaction
> >> is exactly what coupled cpuidle is about, isn't it?
> > 
> > Yes, indeed. Actually, I had tried the coupled cpuidle on Tegra20. I
> > knew it a lot. But I met issues when porting it. It looks like a race
> > condition and becomes a dead lock caused by IPI missing. Anyway, we can
> > talk about it more detail when I try to upstream the coupled cpuidle
> > support for Tegra later.
> 
> Hmm. That sounds a little churny. Why can't we just use coupled cpuidle
> right from the start if the plan is to use it eventually anyway? From
> other comments, it sounds like you even already have the code basically
> working, but just need to iron out a bug or two?

Stephen,

Even though we have plan to use coupled cpuidle, I still prefer to go
with the LP2 driver first. Then adding one more patch to support coupled
cpuidle based on LP2 driver. This is good for history. And if there is
any issue, it's more easy to roll back to the stable one.

There is still one thing you should know. Because we are planning to
upstream "CPUquiet" framework that is a CPU auto hotplug mechanism. It
will auto hotplug the CPUs depends on the system is busy or not. So when
system is idle, there will be only one CPU online (i.e, CPU0). The
secondary CPUs will all be hot plugged (i.e, offline and power gate). We
need to think about do we still need coupled cpuidle on Tegra30 if we
are going to use "CPUquiet".

And Yes, I already had a work version on Tegra20 but I need to iron out
the IPI missing issue first. The LP2 driver of Tegra20 also not yet be
upstreamed.

Thanks,
Joseph

--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Warren - Oct. 15, 2012, 3:59 p.m.
On 10/15/2012 01:56 AM, Joseph Lo wrote:
> On Sat, 2012-10-13 at 05:04 +0800, Stephen Warren wrote:
>> On 10/12/2012 01:07 AM, Joseph Lo wrote:
>>> On Fri, 2012-10-12 at 00:37 +0800, Stephen Warren wrote:
>>>> On 10/11/2012 05:24 AM, Joseph Lo wrote:
>>>>> On Wed, 2012-10-10 at 06:49 +0800, Stephen Warren wrote:
>>>>>> On 10/08/2012 04:26 AM, Joseph Lo wrote:
>>>>>>> The cpuidle LP2 is a power gating idle mode. It support power gating
>>>>>>> vdd_cpu rail after all cpu cores in LP2. For Tegra30, the CPU0 must
>>>>>>> be last one to go into LP2. We need to take care and make sure whole
>>>>>>> secondary CPUs were in LP2 by checking CPU and power gate status.
>>>>>>> After that, the CPU0 can go into LP2 safely. Then power gating the
>>>>>>> CPU rail.
>>>>>>
>>>>>>> diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
>>>> b) If CPUn can't trigger rail-gating, then when CPUn is the last to
>>>> enter LP2 of the whole complex, it needs to IPI to CPU0 to tell it to
>>>> rail-gate, and simply power-gate itself. I believe this IPI interaction
>>>> is exactly what coupled cpuidle is about, isn't it?
>>>
>>> Yes, indeed. Actually, I had tried the coupled cpuidle on Tegra20. I
>>> knew it a lot. But I met issues when porting it. It looks like a race
>>> condition and becomes a dead lock caused by IPI missing. Anyway, we can
>>> talk about it more detail when I try to upstream the coupled cpuidle
>>> support for Tegra later.
>>
>> Hmm. That sounds a little churny. Why can't we just use coupled cpuidle
>> right from the start if the plan is to use it eventually anyway? From
>> other comments, it sounds like you even already have the code basically
>> working, but just need to iron out a bug or two?
> 
> Stephen,
> 
> Even though we have plan to use coupled cpuidle, I still prefer to go
> with the LP2 driver first. Then adding one more patch to support coupled
> cpuidle based on LP2 driver. This is good for history. And if there is
> any issue, it's more easy to roll back to the stable one.

I don't think that implementing it one way and then changing to a
different way will benefit history at all. It'll make the history more
complicated. What exactly is the problem with just using coupled cpuidle
from the start? If we did merge this implementation now, then switch to
coupled cpuidle later, when do you think the switch would happen?

> There is still one thing you should know. Because we are planning to
> upstream "CPUquiet" framework that is a CPU auto hotplug mechanism. It
> will auto hotplug the CPUs depends on the system is busy or not. So when
> system is idle, there will be only one CPU online (i.e, CPU0). The
> secondary CPUs will all be hot plugged (i.e, offline and power gate). We
> need to think about do we still need coupled cpuidle on Tegra30 if we
> are going to use "CPUquiet".

CPUquiet isn't relevant at all. First, a user may presumably disable
CPUquiet's Kconfig option (it had better have one, and the system had
better work with it disabled). Second, even if CPUquiet is enabled, I
don't imagine there is a 100% guarantee that hot(un)plug will happen
before cpuidle kicks in, is there? Finally, is there any guarantee that
CPUquiet will actually be accepted upstream?
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shawn Guo - Oct. 15, 2012, 4:28 p.m.
Changed subject (was: Re: [PATCH 7/7] ARM: tegra30: cpuidle: add LP2
driver for CPU0) ...

On Fri, Oct 12, 2012 at 01:50:35PM -0700, Colin Cross wrote:
> Current coupled cpuidle requires all cpus to wake up together and go
> back to the idle governor to select a new state, because that's what
> all available ARM cpus did when I wrote it.  You should be able to
> implement coupled idle on a cpu that does not need to wake all the
> cpus if you wake them anyways,

Can you please elaborate it a little bit?  I do not quite understand
what you mean.

Basically, imx6q has a low-power mode named WAIT.  When all 4 cores
are in WFI, imx6q will go into WAIT mode, and whenever there is a
wakeup interrupt, it will exit WAIT mode.  Software can configure
hardware behavior during WAIT mode, clock gating or power gating for
ARM core.

I'm trying to implement this low-power mode with coupled cpuidle.
I initially have the following code as the coupled cpuidle enter
function for clock gating case.

static int imx6q_enter_wait_coupled(struct cpuidle_device *dev,
				    struct cpuidle_driver *drv, int index)
{
	int cpu = dev->cpu;

	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);

	if (atomic_inc_return(&master) == num_online_cpus())
		imx6q_set_lpm(WAIT_UNCLOCKED);

	cpu_do_idle();

	if (atomic_dec_return(&master) == num_online_cpus() - 1)
		imx6q_set_lpm(WAIT_CLOCKED);

	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
	return index;
}

However I think there are a couple of problems.  The first one is the
extra wakeups you have mentioned.  When last cpu is in call
imx6q_set_lpm(), some of the first 3 cpus that are already in WFI may
exit from there.  The second one is when hardware exits WAIT mode and
has ARM clock resumed, some cpus may stay in WFI if scheduler has
nothing to wake them up.  This breaks the requirement of coupled cpuidle
that all cpus need to exit together.  I can force the function to work
around the problems by adding cpuidle_coupled_parallel_barrier() just
above cpu_do_idle() and arch_send_wakeup_ipi_mask(cpu_online_mask)
right after imx6q_set_lpm(WAIT_CLOCKED).  But I doubt it's appropriate.

So I rewrite the function as below to not use coupled cpuidle, and it
works fine.

static int imx6q_enter_wait(struct cpuidle_device *dev,
			    struct cpuidle_driver *drv, int index)
{
	int cpu = dev->cpu;

	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);

	if (atomic_inc_return(&master) == num_online_cpus()) {
		/*
		 * With this lock, we prevent the other cpu to exit and
		 * enter this function again and become the master.
		 */
		if (!spin_trylock(&master_lock))
			goto idle;

		imx6q_set_lpm(WAIT_UNCLOCKED);
		cpu_do_idle();
		imx6q_set_lpm(WAIT_CLOCKED);

		spin_unlock(&master_lock);
		goto out;
	}

idle:
	cpu_do_idle();
out:
	atomic_dec(&master);
	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
	return index;
}

For power gating case, it may better fit coupled cpuidle usage model,
since all the cores will have to run resume routine and thus will not
be in WFI when hardware exits from WAIT mode.  But we still need to
work out the extra wakeup issue which will also be seen in this case.

> and then later you can optimize it to
> avoid the extra wakeups when the extension to coupled cpuidle that I
> discussed previously gets implemented.

Do you have a pointer to the initial discussion for the extension, or
you have a draft patch for that already (hopefully:)?

Shawn
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Colin Cross - Oct. 15, 2012, 10:33 p.m.
On Mon, Oct 15, 2012 at 8:59 AM, Stephen Warren <swarren@wwwdotorg.org> wrote:
> On 10/15/2012 01:56 AM, Joseph Lo wrote:
>> There is still one thing you should know. Because we are planning to
>> upstream "CPUquiet" framework that is a CPU auto hotplug mechanism. It
>> will auto hotplug the CPUs depends on the system is busy or not. So when
>> system is idle, there will be only one CPU online (i.e, CPU0). The
>> secondary CPUs will all be hot plugged (i.e, offline and power gate). We
>> need to think about do we still need coupled cpuidle on Tegra30 if we
>> are going to use "CPUquiet".
>
> CPUquiet isn't relevant at all. First, a user may presumably disable
> CPUquiet's Kconfig option (it had better have one, and the system had
> better work with it disabled). Second, even if CPUquiet is enabled, I
> don't imagine there is a 100% guarantee that hot(un)plug will happen
> before cpuidle kicks in, is there? Finally, is there any guarantee that
> CPUquiet will actually be accepted upstream?

CPUquiet is a glorified hotplug governor, and hotplug governors have
been widely rejected upstream, so I wouldn't plan on seeing it
accepted.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Colin Cross - Oct. 15, 2012, 10:58 p.m.
On Mon, Oct 15, 2012 at 9:28 AM, Shawn Guo <shawn.guo@linaro.org> wrote:
> Changed subject (was: Re: [PATCH 7/7] ARM: tegra30: cpuidle: add LP2
> driver for CPU0) ...
>
> On Fri, Oct 12, 2012 at 01:50:35PM -0700, Colin Cross wrote:
>> Current coupled cpuidle requires all cpus to wake up together and go
>> back to the idle governor to select a new state, because that's what
>> all available ARM cpus did when I wrote it.  You should be able to
>> implement coupled idle on a cpu that does not need to wake all the
>> cpus if you wake them anyways,
>
> Can you please elaborate it a little bit?  I do not quite understand
> what you mean.

In the current coupled cpuidle implementation, no cpu will return back
to the scheduler until all cpus have returned from their idle
function.  That means you need to force each cpu to wake up whenever
one of them wakes up.  This is a limitation in the current coupled
cpuidle design due to all existing coupled cpuidle implemenations
having it as a hardware requirement, but is not a hardware requirement
for your WAIT mode.  Until somebody fixes that, you have to fake
wakeups to the other cpus.

> Basically, imx6q has a low-power mode named WAIT.  When all 4 cores
> are in WFI, imx6q will go into WAIT mode, and whenever there is a
> wakeup interrupt, it will exit WAIT mode.  Software can configure
> hardware behavior during WAIT mode, clock gating or power gating for
> ARM core.
>
> I'm trying to implement this low-power mode with coupled cpuidle.
> I initially have the following code as the coupled cpuidle enter
> function for clock gating case.
>
> static int imx6q_enter_wait_coupled(struct cpuidle_device *dev,
>                                     struct cpuidle_driver *drv, int index)
> {
>         int cpu = dev->cpu;
>
>         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
>
>         if (atomic_inc_return(&master) == num_online_cpus())
>                 imx6q_set_lpm(WAIT_UNCLOCKED);
>
>         cpu_do_idle();
>
>         if (atomic_dec_return(&master) == num_online_cpus() - 1)
>                 imx6q_set_lpm(WAIT_CLOCKED);
>
>         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
>         return index;
> }
>
> However I think there are a couple of problems.  The first one is the
> extra wakeups you have mentioned.  When last cpu is in call
> imx6q_set_lpm(), some of the first 3 cpus that are already in WFI may
> exit from there.  The second one is when hardware exits WAIT mode and
> has ARM clock resumed, some cpus may stay in WFI if scheduler has
> nothing to wake them up.  This breaks the requirement of coupled cpuidle
> that all cpus need to exit together.  I can force the function to work
> around the problems by adding cpuidle_coupled_parallel_barrier() just
> above cpu_do_idle() and arch_send_wakeup_ipi_mask(cpu_online_mask)
> right after imx6q_set_lpm(WAIT_CLOCKED).  But I doubt it's appropriate.

That is the appropriate way of getting it working with today's coupled cpuidle.

> So I rewrite the function as below to not use coupled cpuidle, and it
> works fine.
>
> static int imx6q_enter_wait(struct cpuidle_device *dev,
>                             struct cpuidle_driver *drv, int index)
> {
>         int cpu = dev->cpu;
>
>         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
>
>         if (atomic_inc_return(&master) == num_online_cpus()) {
>                 /*
>                  * With this lock, we prevent the other cpu to exit and
>                  * enter this function again and become the master.
>                  */
>                 if (!spin_trylock(&master_lock))
>                         goto idle;
>
>                 imx6q_set_lpm(WAIT_UNCLOCKED);
>                 cpu_do_idle();
>                 imx6q_set_lpm(WAIT_CLOCKED);
>
>                 spin_unlock(&master_lock);
>                 goto out;
>         }
>
> idle:
>         cpu_do_idle();
> out:
>         atomic_dec(&master);
>         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
>         return index;
> }

Coupled cpuidle provides two features, and you only need one of them
for your clock gating mode.  The more complex one is sequencing, which
you don't need since your hardware can check if all cpus are in WFI
for you.  The other feature is voting, to make sure all cpus can
accept the exit latency of the deeper idle state.  The
atomic_inc_return is handling voting in your example, but it can't
handle one cpu requesting power gating while the other requests clock
gating.

> For power gating case, it may better fit coupled cpuidle usage model,
> since all the cores will have to run resume routine and thus will not
> be in WFI when hardware exits from WAIT mode.  But we still need to
> work out the extra wakeup issue which will also be seen in this case.

Yes, but using coupled cpuidle for both clock gating and power gating
will allow you to get into clock gating when one cpu wants clock
gating and the other wants power gating.

>> and then later you can optimize it to
>> avoid the extra wakeups when the extension to coupled cpuidle that I
>> discussed previously gets implemented.
>
> Do you have a pointer to the initial discussion for the extension, or
> you have a draft patch for that already (hopefully:)?

I can't find the initial discussion, and I'm not going to have a
chance to work on it for at least a few weeks.  The basic idea is:
Add "preidle" and "postidle" function pointers to the coupled cpuidle
states (better names TBD).
When all cpus are waiting, and before sending any pokes, call the
preidle hook on the last cpu to start waiting.  It must only be called
on one cpu.  The preidle hook is responsible for disabling wakeups on
secondary cpus so they can't wake up, and returning a mask of the cpus
it has disabled.
The coupled core can increment ready count by the number of cpus that
are disabled and skip poking them.

After coupled idle returns (or when aborting coupled idle), call the
postidle hook with the mask returned by the pre-idle hook, which
re-enables wakeups on secondary cpus and returns a mask of cpus that
are not waking because they are already in a state that can handle
interrupts (WFI).  The calling cpu will then decrement the ready count
by the number of cpus that are not waking.

There are some tricky synchronization problems to solve:
Only one cpu can call the preidle and postidle hooks.  On some
hardware, they may not even be the same cpu.
A cpu that is left in the WFI state by the postidle hook is back in
the "waiting but not ready" state, and the current coupled cpuidle
code does not support going from waiting->ready->waiting.
Once the postidle hook has returned, some cpus may be able to
immediately handle interrupts and return to the scheduler.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter De Schrijver - Oct. 16, 2012, 8:06 a.m.
> > Even though we have plan to use coupled cpuidle, I still prefer to go
> > with the LP2 driver first. Then adding one more patch to support coupled
> > cpuidle based on LP2 driver. This is good for history. And if there is
> > any issue, it's more easy to roll back to the stable one.
> 
> I don't think that implementing it one way and then changing to a
> different way will benefit history at all. It'll make the history more
> complicated. What exactly is the problem with just using coupled cpuidle
> from the start? If we did merge this implementation now, then switch to
> coupled cpuidle later, when do you think the switch would happen?
> 

Before we consider doing this, I think we should have some idea on how
frequently we run into the situation where CPU0 is idle but a secondary
core is not. Depending on that we can then decide how useful coupled cpuidle
would be for us.

Cheers,

Peter.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter De Schrijver - Oct. 16, 2012, 8:13 a.m.
On Tue, Oct 16, 2012 at 12:33:07AM +0200, Colin Cross wrote:
> On Mon, Oct 15, 2012 at 8:59 AM, Stephen Warren <swarren@wwwdotorg.org> wrote:
> > On 10/15/2012 01:56 AM, Joseph Lo wrote:
> >> There is still one thing you should know. Because we are planning to
> >> upstream "CPUquiet" framework that is a CPU auto hotplug mechanism. It
> >> will auto hotplug the CPUs depends on the system is busy or not. So when
> >> system is idle, there will be only one CPU online (i.e, CPU0). The
> >> secondary CPUs will all be hot plugged (i.e, offline and power gate). We
> >> need to think about do we still need coupled cpuidle on Tegra30 if we
> >> are going to use "CPUquiet".
> >
> > CPUquiet isn't relevant at all. First, a user may presumably disable
> > CPUquiet's Kconfig option (it had better have one, and the system had
> > better work with it disabled). Second, even if CPUquiet is enabled, I
> > don't imagine there is a 100% guarantee that hot(un)plug will happen
> > before cpuidle kicks in, is there? Finally, is there any guarantee that
> > CPUquiet will actually be accepted upstream?
> 
> CPUquiet is a glorified hotplug governor, and hotplug governors have
> been widely rejected upstream, so I wouldn't plan on seeing it
> accepted.

Note that nothing in CPUquiet enforces the use of hotplug. It assumes there
is a way to put a CPU in a quiet state which means it doesn't get interrupted,
doesn't do any work and doesn't respond to IPIs. Currently only hotplug
provides this state, but it's possible to provide a driver which uses a
different mechanism to provide the same state. We need this state to be able
to switch to the low power cluster which has only 1 CPU. IPIs to non-existing
cores would hang the system.

Cheers,

Peter.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Warren - Oct. 16, 2012, 5:03 p.m.
On 10/16/2012 02:06 AM, Peter De Schrijver wrote:
>>> Even though we have plan to use coupled cpuidle, I still prefer to go
>>> with the LP2 driver first. Then adding one more patch to support coupled
>>> cpuidle based on LP2 driver. This is good for history. And if there is
>>> any issue, it's more easy to roll back to the stable one.
>>
>> I don't think that implementing it one way and then changing to a
>> different way will benefit history at all. It'll make the history more
>> complicated. What exactly is the problem with just using coupled cpuidle
>> from the start? If we did merge this implementation now, then switch to
>> coupled cpuidle later, when do you think the switch would happen?
> 
> Before we consider doing this, I think we should have some idea on how
> frequently we run into the situation where CPU0 is idle but a secondary
> core is not. Depending on that we can then decide how useful coupled cpuidle
> would be for us.

Would it not be 75% of the time where we have 1 of 4 CPUs active? At
least, that's assuming that all work is evenly distributed amongst CPUs,
and hence it's random which CPU is the last to go idle, but perhaps
that's not the case if CPU0 is somehow special workload-wise?

But yes, some stats gathered from real-world use-cases could well be
interesting.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter De Schrijver - Oct. 18, 2012, 9:24 a.m.
On Tue, Oct 16, 2012 at 07:03:43PM +0200, Stephen Warren wrote:
> On 10/16/2012 02:06 AM, Peter De Schrijver wrote:
> >>> Even though we have plan to use coupled cpuidle, I still prefer to go
> >>> with the LP2 driver first. Then adding one more patch to support coupled
> >>> cpuidle based on LP2 driver. This is good for history. And if there is
> >>> any issue, it's more easy to roll back to the stable one.
> >>
> >> I don't think that implementing it one way and then changing to a
> >> different way will benefit history at all. It'll make the history more
> >> complicated. What exactly is the problem with just using coupled cpuidle
> >> from the start? If we did merge this implementation now, then switch to
> >> coupled cpuidle later, when do you think the switch would happen?
> > 
> > Before we consider doing this, I think we should have some idea on how
> > frequently we run into the situation where CPU0 is idle but a secondary
> > core is not. Depending on that we can then decide how useful coupled cpuidle
> > would be for us.
> 
> Would it not be 75% of the time where we have 1 of 4 CPUs active? At
> least, that's assuming that all work is evenly distributed amongst CPUs,
> and hence it's random which CPU is the last to go idle, but perhaps
> that's not the case if CPU0 is somehow special workload-wise?
> 

Depends, at least it used to be possible to tune the scheduler to prefer
CPU0 if the workload can run on a single CPU.

Cheers,

Peter.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter De Schrijver - Oct. 25, 2012, 2:08 p.m.
On Thu, Oct 18, 2012 at 11:24:40AM +0200, Peter De Schrijver wrote:
> On Tue, Oct 16, 2012 at 07:03:43PM +0200, Stephen Warren wrote:
> > On 10/16/2012 02:06 AM, Peter De Schrijver wrote:
> > >>> Even though we have plan to use coupled cpuidle, I still prefer to go
> > >>> with the LP2 driver first. Then adding one more patch to support coupled
> > >>> cpuidle based on LP2 driver. This is good for history. And if there is
> > >>> any issue, it's more easy to roll back to the stable one.
> > >>
> > >> I don't think that implementing it one way and then changing to a
> > >> different way will benefit history at all. It'll make the history more
> > >> complicated. What exactly is the problem with just using coupled cpuidle
> > >> from the start? If we did merge this implementation now, then switch to
> > >> coupled cpuidle later, when do you think the switch would happen?
> > > 
> > > Before we consider doing this, I think we should have some idea on how
> > > frequently we run into the situation where CPU0 is idle but a secondary
> > > core is not. Depending on that we can then decide how useful coupled cpuidle
> > > would be for us.
> > 
> > Would it not be 75% of the time where we have 1 of 4 CPUs active? At
> > least, that's assuming that all work is evenly distributed amongst CPUs,
> > and hence it's random which CPU is the last to go idle, but perhaps
> > that's not the case if CPU0 is somehow special workload-wise?
> > 
> 
> Depends, at least it used to be possible to tune the scheduler to prefer
> CPU0 if the workload can run on a single CPU.
> 

I just noticed https://lwn.net/Articles/518834/. If we can configure this to
prefer CPU0 in case all work can be done on a single core, we shouldn't
hit the case were a secondary CPU is the only active CPU for a significant
period of time.

Cheers,

Peter.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
index 842fef4..1fd938b 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra30.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra30.c
@@ -28,6 +28,7 @@ 
 
 #include "pm.h"
 #include "sleep.h"
+#include "tegra_cpu_car.h"
 
 #ifdef CONFIG_PM_SLEEP
 static int tegra30_idle_lp2(struct cpuidle_device *dev,
@@ -59,6 +60,28 @@  static struct cpuidle_driver tegra_idle_driver = {
 static DEFINE_PER_CPU(struct cpuidle_device, tegra_idle_device);
 
 #ifdef CONFIG_PM_SLEEP
+static bool tegra30_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
+					 struct cpuidle_driver *drv,
+					 int index)
+{
+	struct cpuidle_state *state = &drv->states[index];
+	u32 cpu_on_time = state->exit_latency;
+	u32 cpu_off_time = state->target_residency - state->exit_latency;
+
+	if (num_online_cpus() > 1 && !tegra_cpu_rail_off_ready()) {
+		cpu_do_idle();
+		return false;
+	}
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+
+	tegra_idle_lp2_last(cpu_on_time, cpu_off_time);
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+
+	return true;
+}
+
 static bool tegra30_idle_enter_lp2_cpu_n(struct cpuidle_device *dev,
 					 struct cpuidle_driver *drv,
 					 int index)
@@ -85,16 +108,22 @@  static int __cpuinit tegra30_idle_lp2(struct cpuidle_device *dev,
 				      int index)
 {
 	bool entered_lp2 = false;
+	bool last_cpu;
 
 	local_fiq_disable();
 
-	tegra_set_cpu_in_lp2(dev->cpu);
+	last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
 	cpu_pm_enter();
 
-	if (dev->cpu == 0)
-		cpu_do_idle();
-	else
+	if (dev->cpu == 0) {
+		if (last_cpu)
+			entered_lp2 = tegra30_idle_enter_lp2_cpu_0(dev, drv,
+								   index);
+		else
+			cpu_do_idle();
+	} else {
 		entered_lp2 = tegra30_idle_enter_lp2_cpu_n(dev, drv, index);
+	}
 
 	cpu_pm_exit();
 	tegra_clear_cpu_in_lp2(dev->cpu);
@@ -116,6 +145,8 @@  int __init tegra30_cpuidle_init(void)
 
 #ifndef CONFIG_PM_SLEEP
 	drv->state_count = 1;	/* support clockgating only */
+#else
+	tegra_tear_down_cpu = tegra30_tear_down_cpu;
 #endif
 
 	ret = cpuidle_register_driver(&tegra_idle_driver);
diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
index 4655383..2e71bad 100644
--- a/arch/arm/mach-tegra/pm.c
+++ b/arch/arm/mach-tegra/pm.c
@@ -20,15 +20,38 @@ 
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/cpu_pm.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+
+#include <asm/smp_plat.h>
+#include <asm/cacheflush.h>
+#include <asm/suspend.h>
+#include <asm/idmap.h>
+#include <asm/proc-fns.h>
+#include <asm/tlbflush.h>
 
 #include <mach/iomap.h>
 
 #include "reset.h"
+#include "flowctrl.h"
+#include "sleep.h"
+#include "tegra_cpu_car.h"
+
+#define TEGRA_POWER_CPU_PWRREQ_OE	(1 << 16)  /* CPU pwr req enable */
+
+#define PMC_CTRL		0x0
+#define PMC_CPUPWRGOOD_TIMER	0xc8
+#define PMC_CPUPWROFF_TIMER	0xcc
 
 #ifdef CONFIG_PM_SLEEP
 static unsigned int g_diag_reg;
 static DEFINE_SPINLOCK(tegra_lp2_lock);
 static cpumask_t tegra_in_lp2;
+static void __iomem *pmc = IO_ADDRESS(TEGRA_PMC_BASE);
+static struct clk *tegra_pclk;
+void (*tegra_tear_down_cpu)(void);
 
 void save_cpu_arch_register(void)
 {
@@ -44,6 +67,96 @@  void restore_cpu_arch_register(void)
 	return;
 }
 
+static void set_power_timers(unsigned long us_on, unsigned long us_off)
+{
+	unsigned long long ticks;
+	unsigned long long pclk;
+	unsigned long rate;
+	static unsigned long tegra_last_pclk;
+
+	if (tegra_pclk == NULL) {
+		tegra_pclk = clk_get_sys(NULL, "pclk");
+		if (IS_ERR(tegra_pclk)) {
+			/*
+			 * pclk not been init or not exist.
+			 * Use sclk to take the place of it.
+			 * The default setting was pclk=sclk.
+			 */
+			tegra_pclk = clk_get_sys(NULL, "sclk");
+		}
+	}
+
+	rate = clk_get_rate(tegra_pclk);
+
+	if (WARN_ON_ONCE(rate <= 0))
+		pclk = 100000000;
+	else
+		pclk = rate;
+
+	if ((rate != tegra_last_pclk)) {
+		ticks = (us_on * pclk) + 999999ull;
+		do_div(ticks, 1000000);
+		writel((unsigned long)ticks, pmc + PMC_CPUPWRGOOD_TIMER);
+
+		ticks = (us_off * pclk) + 999999ull;
+		do_div(ticks, 1000000);
+		writel((unsigned long)ticks, pmc + PMC_CPUPWROFF_TIMER);
+		wmb();
+	}
+	tegra_last_pclk = pclk;
+}
+
+/*
+ * restore_cpu_complex
+ *
+ * restores cpu clock setting, clears flow controller
+ *
+ * Always called on CPU 0.
+ */
+static void restore_cpu_complex(void)
+{
+	int cpu = smp_processor_id();
+
+	BUG_ON(cpu != 0);
+
+#ifdef CONFIG_SMP
+	cpu = cpu_logical_map(cpu);
+#endif
+
+	/* Restore the CPU clock settings */
+	tegra_cpu_clock_resume();
+
+	flowctrl_cpu_suspend_exit(cpu);
+
+	restore_cpu_arch_register();
+}
+
+/*
+ * suspend_cpu_complex
+ *
+ * saves pll state for use by restart_plls, prepares flow controller for
+ * transition to suspend state
+ *
+ * Must always be called on cpu 0.
+ */
+static void suspend_cpu_complex(void)
+{
+	int cpu = smp_processor_id();
+
+	BUG_ON(cpu != 0);
+
+#ifdef CONFIG_SMP
+	cpu = cpu_logical_map(cpu);
+#endif
+
+	/* Save the CPU clock settings */
+	tegra_cpu_clock_suspend();
+
+	flowctrl_cpu_suspend_enter(cpu);
+
+	save_cpu_arch_register();
+}
+
 void __cpuinit tegra_clear_cpu_in_lp2(int cpu)
 {
 	spin_lock(&tegra_lp2_lock);
@@ -85,4 +198,43 @@  bool __cpuinit tegra_set_cpu_in_lp2(int cpu)
 	spin_unlock(&tegra_lp2_lock);
 	return last_cpu;
 }
+
+static int tegra_sleep_cpu(unsigned long v2p)
+{
+	/* Switch to the identity mapping. */
+	cpu_switch_mm(idmap_pgd, &init_mm);
+
+	/* Flush the TLB. */
+	local_flush_tlb_all();
+
+	tegra_sleep_cpu_finish(v2p);
+
+	/* should never here */
+	BUG();
+
+	return 0;
+}
+
+void tegra_idle_lp2_last(u32 cpu_on_time, u32 cpu_off_time)
+{
+	u32 mode;
+
+	/* Only the last cpu down does the final suspend steps */
+	mode = readl(pmc + PMC_CTRL);
+	mode |= TEGRA_POWER_CPU_PWRREQ_OE;
+	writel(mode, pmc + PMC_CTRL);
+
+	set_power_timers(cpu_on_time, cpu_off_time);
+
+	cpu_cluster_pm_enter();
+	suspend_cpu_complex();
+	flush_cache_all();
+	outer_disable();
+
+	cpu_suspend(PHYS_OFFSET - PAGE_OFFSET, &tegra_sleep_cpu);
+
+	outer_resume();
+	restore_cpu_complex();
+	cpu_cluster_pm_exit();
+}
 #endif
diff --git a/arch/arm/mach-tegra/pm.h b/arch/arm/mach-tegra/pm.h
index 21858d6..63ffe02 100644
--- a/arch/arm/mach-tegra/pm.h
+++ b/arch/arm/mach-tegra/pm.h
@@ -27,4 +27,7 @@  void restore_cpu_arch_register(void);
 void tegra_clear_cpu_in_lp2(int cpu);
 bool tegra_set_cpu_in_lp2(int cpu);
 
+void tegra_idle_lp2_last(u32 cpu_on_time, u32 cpu_off_time);
+extern void (*tegra_tear_down_cpu)(void);
+
 #endif /* _MACH_TEGRA_PM_H_ */
diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S
index 67b1cba..aef5ce7 100644
--- a/arch/arm/mach-tegra/sleep-tegra30.S
+++ b/arch/arm/mach-tegra/sleep-tegra30.S
@@ -130,4 +130,48 @@  ENTRY(tegra30_sleep_cpu_secondary_finish)
 	mov	r0, #1                          @ never return here
 	mov	pc, r7
 ENDPROC(tegra30_sleep_cpu_secondary_finish)
+
+/*
+ * tegra30_tear_down_cpu
+ *
+ * Switches the CPU to enter sleep.
+ */
+ENTRY(tegra30_tear_down_cpu)
+	mov32	r6, TEGRA_FLOW_CTRL_BASE
+
+	b	tegra30_enter_sleep
+ENDPROC(tegra30_tear_down_cpu)
+
+/*
+ * tegra30_enter_sleep
+ *
+ * uses flow controller to enter sleep state
+ * executes from IRAM with SDRAM in selfrefresh when target state is LP0 or LP1
+ * executes from SDRAM with target state is LP2
+ * r6 = TEGRA_FLOW_CTRL_BASE
+ */
+tegra30_enter_sleep:
+	cpu_id	r1
+
+	cpu_to_csr_reg	r2, r1
+	ldr	r0, [r6, r2]
+	orr	r0, r0, #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG
+	orr	r0, r0, #FLOW_CTRL_CSR_ENABLE
+	str	r0, [r6, r2]
+
+	mov	r0, #FLOW_CTRL_WAIT_FOR_INTERRUPT
+	orr	r0, r0, #FLOW_CTRL_HALT_CPU_IRQ | FLOW_CTRL_HALT_CPU_FIQ
+	cpu_to_halt_reg r2, r1
+	str	r0, [r6, r2]
+	dsb
+	ldr	r0, [r6, r2] /* memory barrier */
+
+halted:
+	isb
+	dsb
+	wfi	/* CPU should be power gated here */
+
+	/* !!!FIXME!!! Implement halt failure handler */
+	b	halted
+
 #endif
diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S
index 7748fdb..3ab1e82 100644
--- a/arch/arm/mach-tegra/sleep.S
+++ b/arch/arm/mach-tegra/sleep.S
@@ -25,6 +25,7 @@ 
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cp15.h>
 
 #include <mach/iomap.h>
@@ -96,4 +97,47 @@  finished:
 	mov	pc, lr
 ENDPROC(tegra_flush_l1_cache)
 
+/*
+ * tegra_sleep_cpu_finish(unsigned long v2p)
+ *
+ * enters suspend in LP2 by turning off the mmu and jumping to
+ * tegra?_tear_down_cpu
+ */
+ENTRY(tegra_sleep_cpu_finish)
+	/* Flush and disable the L1 data cache */
+	bl	tegra_flush_l1_cache
+	/* Trun off coherency */
+	exit_smp r4, r5
+
+	mov32	r6, tegra_tear_down_cpu
+	ldr	r1, [r6]
+	add	r1, r1, r0
+
+	mov32	r3, tegra_shut_off_mmu
+	add	r3, r3, r0
+	mov	r0, r1
+
+	mov	pc, r3
+ENDPROC(tegra_sleep_cpu_finish)
+
+/*
+ * tegra_shut_off_mmu
+ *
+ * r0 = physical address to jump to with mmu off
+ *
+ * called with VA=PA mapping
+ * turns off MMU, icache, dcache and branch prediction
+ */
+	.align	L1_CACHE_SHIFT
+	.pushsection	.idmap.text, "ax"
+ENTRY(tegra_shut_off_mmu)
+	mrc	p15, 0, r3, c1, c0, 0
+	movw	r2, #CR_I | CR_Z | CR_C | CR_M
+	bic	r3, r3, r2
+	dsb
+	mcr	p15, 0, r3, c1, c0, 0
+	isb
+	mov	pc, r0
+ENDPROC(tegra_shut_off_mmu)
+	.popsection
 #endif
diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h
index 220fbd1..001920f 100644
--- a/arch/arm/mach-tegra/sleep.h
+++ b/arch/arm/mach-tegra/sleep.h
@@ -73,6 +73,7 @@ 
 .endm
 #else
 void tegra_resume(void);
+int tegra_sleep_cpu_finish(unsigned long);
 
 #ifdef CONFIG_HOTPLUG_CPU
 void tegra20_hotplug_init(void);
@@ -83,6 +84,7 @@  static inline void tegra30_hotplug_init(void) {}
 #endif
 
 int tegra30_sleep_cpu_secondary_finish(unsigned long);
+void tegra30_tear_down_cpu(void);
 
 #endif
 #endif