diff mbox

cpufreq: powernv: Register the driver with reboot notifier

Message ID 1408015178-21745-1-git-send-email-shilpa.bhat@linux.vnet.ibm.com (mailing list archive)
State Not Applicable
Delegated to: Michael Ellerman
Headers show

Commit Message

Shilpasri G Bhat Aug. 14, 2014, 11:19 a.m. UTC
This patch ensures the cpus to kexec/reboot at nominal frequency.
Nominal frequency is the highest cpu frequency on PowerPC at
which the cores can run without getting throttled.

If the host kernel had set the cpus to a low pstate and then it
kexecs/reboots to a cpufreq disabled kernel it would cause the target
kernel to perform poorly. It will also increase the boot up time of
the target kernel. So set the cpus to high pstate, in this case to
nominal frequency before rebooting to avoid such scenarios.

The reboot notifier will suspend the cpufreq governor and enable
nominal frequency to be set during a reboot/kexec similar to the
suspend operartion.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---
 drivers/cpufreq/powernv-cpufreq.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

Comments

Viresh Kumar Aug. 18, 2014, 7:46 a.m. UTC | #1
On 14 August 2014 16:49, Shilpasri G Bhat
<shilpa.bhat@linux.vnet.ibm.com> wrote:
> This patch ensures the cpus to kexec/reboot at nominal frequency.
> Nominal frequency is the highest cpu frequency on PowerPC at
> which the cores can run without getting throttled.
>
> If the host kernel had set the cpus to a low pstate and then it
> kexecs/reboots to a cpufreq disabled kernel it would cause the target
> kernel to perform poorly. It will also increase the boot up time of
> the target kernel. So set the cpus to high pstate, in this case to
> nominal frequency before rebooting to avoid such scenarios.
>
> The reboot notifier will suspend the cpufreq governor and enable
> nominal frequency to be set during a reboot/kexec similar to the
> suspend operartion.
>
> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
> Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
>
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> index 379c083..e9f3d3a 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -26,6 +26,7 @@
>  #include <linux/cpufreq.h>
>  #include <linux/smp.h>
>  #include <linux/of.h>
> +#include <linux/reboot.h>
>
>  #include <asm/cputhreads.h>
>  #include <asm/firmware.h>
> @@ -314,9 +315,21 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
>         for (i = 0; i < threads_per_core; i++)
>                 cpumask_set_cpu(base + i, policy->cpus);
>
> +       policy->suspend_freq = pstate_id_to_freq(powernv_pstate_info.nominal);
>         return cpufreq_table_validate_and_show(policy, powernv_freqs);
>  }
>
> +static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
> +                               unsigned long action, void *unused)
> +{
> +       cpufreq_suspend();
> +       return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block powernv_cpufreq_reboot_nb = {
> +       .notifier_call = powernv_cpufreq_reboot_notifier,
> +};
> +
>  static struct cpufreq_driver powernv_cpufreq_driver = {
>         .name           = "powernv-cpufreq",
>         .flags          = CPUFREQ_CONST_LOOPS,
> @@ -325,6 +338,7 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
>         .target_index   = powernv_cpufreq_target_index,
>         .get            = powernv_cpufreq_get,
>         .attr           = powernv_cpu_freq_attr,
> +       .suspend        = cpufreq_generic_suspend,

I couldn't understand why you have added a notifier here. This callback
by itself should be enough. Isn't it?

And then you have called cpufreq_suspend(), which is absolutely wrong,
from that notifier..
Shilpasri G Bhat Aug. 21, 2014, 5:06 a.m. UTC | #2
On 08/18/2014 01:16 PM, Viresh Kumar wrote:
> On 14 August 2014 16:49, Shilpasri G Bhat
> <shilpa.bhat@linux.vnet.ibm.com> wrote:
>> This patch ensures the cpus to kexec/reboot at nominal frequency.
>> Nominal frequency is the highest cpu frequency on PowerPC at
>> which the cores can run without getting throttled.
>>
>> If the host kernel had set the cpus to a low pstate and then it
>> kexecs/reboots to a cpufreq disabled kernel it would cause the target
>> kernel to perform poorly. It will also increase the boot up time of
>> the target kernel. So set the cpus to high pstate, in this case to
>> nominal frequency before rebooting to avoid such scenarios.
>>
>> The reboot notifier will suspend the cpufreq governor and enable
>> nominal frequency to be set during a reboot/kexec similar to the
>> suspend operartion.
>>
>> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
>> Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
>> ---
>>   drivers/cpufreq/powernv-cpufreq.c | 16 ++++++++++++++++
>>   1 file changed, 16 insertions(+)
>>
>> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
>> index 379c083..e9f3d3a 100644
>> --- a/drivers/cpufreq/powernv-cpufreq.c
>> +++ b/drivers/cpufreq/powernv-cpufreq.c
>> @@ -26,6 +26,7 @@
>>   #include <linux/cpufreq.h>
>>   #include <linux/smp.h>
>>   #include <linux/of.h>
>> +#include <linux/reboot.h>
>>
>>   #include <asm/cputhreads.h>
>>   #include <asm/firmware.h>
>> @@ -314,9 +315,21 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
>>          for (i = 0; i < threads_per_core; i++)
>>                  cpumask_set_cpu(base + i, policy->cpus);
>>
>> +       policy->suspend_freq = pstate_id_to_freq(powernv_pstate_info.nominal);
>>          return cpufreq_table_validate_and_show(policy, powernv_freqs);
>>   }
>>
>> +static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
>> +                               unsigned long action, void *unused)
>> +{
>> +       cpufreq_suspend();
>> +       return NOTIFY_DONE;
>> +}
>> +
>> +static struct notifier_block powernv_cpufreq_reboot_nb = {
>> +       .notifier_call = powernv_cpufreq_reboot_notifier,
>> +};
>> +
>>   static struct cpufreq_driver powernv_cpufreq_driver = {
>>          .name           = "powernv-cpufreq",
>>          .flags          = CPUFREQ_CONST_LOOPS,
>> @@ -325,6 +338,7 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
>>          .target_index   = powernv_cpufreq_target_index,
>>          .get            = powernv_cpufreq_get,
>>          .attr           = powernv_cpu_freq_attr,
>> +       .suspend        = cpufreq_generic_suspend,
> I couldn't understand why you have added a notifier here. This callback
> by itself should be enough. Isn't it?
>
> And then you have called cpufreq_suspend(), which is absolutely wrong,
> from that notifier..

Hi Viresh,

The intention here is stop the cpufreq governor and then to set the cpus to
nominal frequency so as to ensure that the frequency won't be changed later.

The .suspend callback of the driver is not called during reboot/kexec.
So we need an explicit reboot notifier to call cpufreq-suspend() to
suffice the requirement.

Thanks and Regards,
Shilpa
Viresh Kumar Aug. 21, 2014, 6:26 a.m. UTC | #3
On 21 August 2014 10:36, Shilpasri G Bhat <shilpabhatppc@gmail.com> wrote:
> The intention here is stop the cpufreq governor and then to set the cpus to
> nominal frequency so as to ensure that the frequency won't be changed later.
>
> The .suspend callback of the driver is not called during reboot/kexec.
> So we need an explicit reboot notifier to call cpufreq-suspend() to
> suffice the requirement.

Hi Shilpa,

No, we can't allow any platform driver to misuse cpufreq_suspend().
Platform drivers aren't *allowed* to call this routine.

Now the deal is how do we move to nominal frequency on reboot..
@Rafael: Any suggestions? How do we ensure that governors
are stopped on these notifiers, or if there is some other solution here?
Preeti U Murthy Aug. 22, 2014, 2:48 a.m. UTC | #4
Hi Viresh,

On 08/21/2014 11:56 AM, Viresh Kumar wrote:
> On 21 August 2014 10:36, Shilpasri G Bhat <shilpabhatppc@gmail.com> wrote:
>> The intention here is stop the cpufreq governor and then to set the cpus to
>> nominal frequency so as to ensure that the frequency won't be changed later.
>>
>> The .suspend callback of the driver is not called during reboot/kexec.
>> So we need an explicit reboot notifier to call cpufreq-suspend() to
>> suffice the requirement.
> 
> Hi Shilpa,
> 
> No, we can't allow any platform driver to misuse cpufreq_suspend().
> Platform drivers aren't *allowed* to call this routine.

At the moment this looks like the best way forward. We need to do this
cleanly by ensuring that we stop the governors and then call into the
driver to deal with the cpu frequency in its own way during reboot. The
best way to do this would be by calling this routine. Either this or
cpufreq_suspend() should be called in the reboot path generically. The
latter might not be an enticing option for other platforms.

Regards
Preeti U Murthy
> 
> Now the deal is how do we move to nominal frequency on reboot..
> @Rafael: Any suggestions? How do we ensure that governors
> are stopped on these notifiers, or if there is some other solution here?
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
Viresh Kumar Aug. 25, 2014, 10:30 a.m. UTC | #5
On 22 August 2014 08:18, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
> At the moment this looks like the best way forward. We need to do this
> cleanly by ensuring that we stop the governors and then call into the
> driver to deal with the cpu frequency in its own way during reboot. The
> best way to do this would be by calling this routine. Either this or
> cpufreq_suspend() should be called in the reboot path generically. The
> latter might not be an enticing option for other platforms.

Its not that I am doubting if this will work or not. But this Hack is using
routines not meant for this purpose. And that being a core routine,
things aren't that straightforward anymore.

@Rafael: Ping!!
diff mbox

Patch

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 379c083..e9f3d3a 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -26,6 +26,7 @@ 
 #include <linux/cpufreq.h>
 #include <linux/smp.h>
 #include <linux/of.h>
+#include <linux/reboot.h>
 
 #include <asm/cputhreads.h>
 #include <asm/firmware.h>
@@ -314,9 +315,21 @@  static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	for (i = 0; i < threads_per_core; i++)
 		cpumask_set_cpu(base + i, policy->cpus);
 
+	policy->suspend_freq = pstate_id_to_freq(powernv_pstate_info.nominal);
 	return cpufreq_table_validate_and_show(policy, powernv_freqs);
 }
 
+static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
+				unsigned long action, void *unused)
+{
+	cpufreq_suspend();
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block powernv_cpufreq_reboot_nb = {
+	.notifier_call = powernv_cpufreq_reboot_notifier,
+};
+
 static struct cpufreq_driver powernv_cpufreq_driver = {
 	.name		= "powernv-cpufreq",
 	.flags		= CPUFREQ_CONST_LOOPS,
@@ -325,6 +338,7 @@  static struct cpufreq_driver powernv_cpufreq_driver = {
 	.target_index	= powernv_cpufreq_target_index,
 	.get		= powernv_cpufreq_get,
 	.attr		= powernv_cpu_freq_attr,
+	.suspend        = cpufreq_generic_suspend,
 };
 
 static int __init powernv_cpufreq_init(void)
@@ -342,12 +356,14 @@  static int __init powernv_cpufreq_init(void)
 		return rc;
 	}
 
+	register_reboot_notifier(&powernv_cpufreq_reboot_nb);
 	return cpufreq_register_driver(&powernv_cpufreq_driver);
 }
 module_init(powernv_cpufreq_init);
 
 static void __exit powernv_cpufreq_exit(void)
 {
+	unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
 	cpufreq_unregister_driver(&powernv_cpufreq_driver);
 }
 module_exit(powernv_cpufreq_exit);