diff mbox

[2/2] cpufreq: powernv: Register for OCC related opal_message notification

Message ID 1429722265-2953-2-git-send-email-shilpa.bhat@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Shilpasri G Bhat April 22, 2015, 5:04 p.m. UTC
OCC is an On-Chip-Controller which takes care of power and thermal
safety of the chip. During runtime due to power failure or
overtemperature the OCC may throttle the frequencies of the CPUs to
remain within the power budget.

We want the cpufreq driver to be aware of such situations to be able
to report it to the user. We register to opal_message_notifier to
receive OCC messages from opal.

powernv_cpufreq_throttle_check() reports any frequency throttling and
this patch will report the reason or event that caused throttling. We
can be throttled if OCC is reset or OCC limits Pmax due to power or
thermal reasons. We are also notified of unthrottling after an OCC
reset or if OCC restores Pmax on the chip.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
CC: "Rafael J. Wysocki" <rjw@rjwysocki.net>
CC: Viresh Kumar <viresh.kumar@linaro.org>
CC: linux-pm@vger.kernel.org
---
 drivers/cpufreq/powernv-cpufreq.c | 70 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

Comments

Preeti U Murthy April 23, 2015, 11:58 a.m. UTC | #1
Hi Shilpa,

On 04/22/2015 10:34 PM, Shilpasri G Bhat wrote:
> OCC is an On-Chip-Controller which takes care of power and thermal
> safety of the chip. During runtime due to power failure or
> overtemperature the OCC may throttle the frequencies of the CPUs to
> remain within the power budget.
> 
> We want the cpufreq driver to be aware of such situations to be able
> to report it to the user. We register to opal_message_notifier to
> receive OCC messages from opal.
> 
> powernv_cpufreq_throttle_check() reports any frequency throttling and
> this patch will report the reason or event that caused throttling. We
> can be throttled if OCC is reset or OCC limits Pmax due to power or
> thermal reasons. We are also notified of unthrottling after an OCC
> reset or if OCC restores Pmax on the chip.
> 
> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
> CC: "Rafael J. Wysocki" <rjw@rjwysocki.net>
> CC: Viresh Kumar <viresh.kumar@linaro.org>
> CC: linux-pm@vger.kernel.org
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 70 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 69 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> index ebef0d8..5718765 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -32,6 +32,7 @@
>  #include <asm/firmware.h>
>  #include <asm/reg.h>
>  #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
> +#include <asm/opal.h>
> 
>  #define POWERNV_MAX_PSTATES	256
>  #define PMSR_PSAFE_ENABLE	(1UL << 30)
> @@ -40,7 +41,7 @@
>  #define PMSR_LP(x)		((x >> 48) & 0xFF)
> 
>  static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
> -static bool rebooting, throttled;
> +static bool rebooting, throttled, occ_reset;
> 
>  /*
>   * Note: The set of pstates consists of contiguous integers, the
> @@ -395,6 +396,72 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
>  	.notifier_call = powernv_cpufreq_reboot_notifier,
>  };
> 
> +static char throttle_reason[6][50] = {	"No throttling",
> +					"Power Cap",
> +					"Processor Over Temperature",
> +					"Power Supply Failure",
> +					"OverCurrent",
> +					"OCC Reset"
> +				     };
> +
> +static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
> +		unsigned long msg_type, void *msg)
> +{
> +	struct opal_msg *occ_msg = msg;
> +	uint64_t token;
> +	uint64_t chip_id, reason;
> +
> +	if (msg_type != OPAL_MSG_OCC)
> +		return 0;
> +	token = be64_to_cpu(occ_msg->params[0]);
> +	switch (token) {
> +	case 0:
> +		occ_reset = true;
> +		/*
> +		 * powernv_cpufreq_throttle_check() is called in
> +		 * target() callback which can detect the throttle state
> +		 * for governors like ondemand.
> +		 * But static governors will not call target() often thus
> +		 * report throttling here.
> +		 */
> +		if (!throttled) {
> +			throttled = true;
> +			pr_crit("CPU Frequency is throttled\n");
> +		}
> +		pr_info("OCC in Reset\n");
> +		break;
> +	case 1:
> +		pr_info("OCC is Loaded\n");
> +		break;
> +	case 2:

You may want to replace the numbers with macros. Like
OCC_RESET,OCC_LOAD, OCC_THROTTLE for better readability.

> +		chip_id = be64_to_cpu(occ_msg->params[1]);
> +		reason = be64_to_cpu(occ_msg->params[2]);
> +		if (occ_reset) {
> +			occ_reset = false;
> +			throttled = false;
> +			pr_info("OCC is Active\n");
> +			/* Sanity check for static governors */
> +			powernv_cpufreq_throttle_check(smp_processor_id());
> +		} else if (reason) {
> +			throttled = true;
> +			pr_info("Pmax reduced due to %s on chip %x\n",
> +					throttle_reason[reason], (int)chip_id);
> +		} else {
> +			throttled = false;
> +			pr_info("%s on chip %x\n",
> +					throttle_reason[reason], (int)chip_id);

Don't you need a powernv_cpufreq_throttle_check() here?  Or is it ok to
rely on the OCC notification for unthrottle ?

Regards
Preeti U Murthy

> +		}
> +		break;
> +	}
> +	return 0;
> +}
> +
> +static struct notifier_block powernv_cpufreq_opal_nb = {
> +	.notifier_call	= powernv_cpufreq_occ_msg,
> +	.next		= NULL,
> +	.priority	= 0,
> +};
> +
>  static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
>  {
>  	struct powernv_smp_call_data freq_data;
> @@ -430,6 +497,7 @@ static int __init powernv_cpufreq_init(void)
>  	}
> 
>  	register_reboot_notifier(&powernv_cpufreq_reboot_nb);
> +	opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb);
>  	return cpufreq_register_driver(&powernv_cpufreq_driver);
>  }
>  module_init(powernv_cpufreq_init);
>
Viresh Kumar April 27, 2015, 4:32 a.m. UTC | #2
On 22 April 2015 at 22:34, Shilpasri G Bhat
<shilpa.bhat@linux.vnet.ibm.com> wrote:
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c

> +static char throttle_reason[6][50] = { "No throttling",

Don't need to mention 6 here.

And the max length you need right now is 27, so maybe s/50/30 ?

Also, start 'No Throttling' in a new line, like below.

> +                                       "Power Cap",
> +                                       "Processor Over Temperature",
> +                                       "Power Supply Failure",
> +                                       "OverCurrent",

s/OverCurrent/Over Current/ ?

> +                                       "OCC Reset"
> +                                    };
> +
> +static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
> +               unsigned long msg_type, void *msg)
> +{
> +       struct opal_msg *occ_msg = msg;
> +       uint64_t token;
> +       uint64_t chip_id, reason;
> +
> +       if (msg_type != OPAL_MSG_OCC)
> +               return 0;

Blank line here.

> +       token = be64_to_cpu(occ_msg->params[0]);

Here as well..

> +       switch (token) {
> +       case 0:
> +               occ_reset = true;
> +               /*
> +                * powernv_cpufreq_throttle_check() is called in
> +                * target() callback which can detect the throttle state
> +                * for governors like ondemand.
> +                * But static governors will not call target() often thus
> +                * report throttling here.
> +                */

Now, do I understand correctly that this notifier will be called as
soon as we switch throttling state ?

If yes, then do we still need the throttle_check() routine you added
earlier ? Maybe not.

> +               if (!throttled) {
> +                       throttled = true;
> +                       pr_crit("CPU Frequency is throttled\n");
> +               }
> +               pr_info("OCC in Reset\n");
> +               break;
> +       case 1:
> +               pr_info("OCC is Loaded\n");
> +               break;
> +       case 2:
> +               chip_id = be64_to_cpu(occ_msg->params[1]);
> +               reason = be64_to_cpu(occ_msg->params[2]);

Blank line here.

> +               if (occ_reset) {
> +                       occ_reset = false;
> +                       throttled = false;
> +                       pr_info("OCC is Active\n");
> +                       /* Sanity check for static governors */
> +                       powernv_cpufreq_throttle_check(smp_processor_id());
> +               } else if (reason) {
> +                       throttled = true;
> +                       pr_info("Pmax reduced due to %s on chip %x\n",
> +                                       throttle_reason[reason], (int)chip_id);
> +               } else {
> +                       throttled = false;
> +                       pr_info("%s on chip %x\n",
> +                                       throttle_reason[reason], (int)chip_id);
> +               }

Run checkpatch with --strict option, and you will see some warnings.

> +               break;
> +       }
> +       return 0;
> +}
> +
> +static struct notifier_block powernv_cpufreq_opal_nb = {
> +       .notifier_call  = powernv_cpufreq_occ_msg,
> +       .next           = NULL,
> +       .priority       = 0,
> +};
> +
>  static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
>  {
>         struct powernv_smp_call_data freq_data;
> @@ -430,6 +497,7 @@ static int __init powernv_cpufreq_init(void)
>         }
>
>         register_reboot_notifier(&powernv_cpufreq_reboot_nb);
> +       opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb);
>         return cpufreq_register_driver(&powernv_cpufreq_driver);
>  }
>  module_init(powernv_cpufreq_init);
> --
> 1.9.3
>
Shilpasri G Bhat April 28, 2015, 5:36 a.m. UTC | #3
Hi Viresh,

On 04/27/2015 10:02 AM, Viresh Kumar wrote:
> On 22 April 2015 at 22:34, Shilpasri G Bhat
> <shilpa.bhat@linux.vnet.ibm.com> wrote:
>> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> 
>> +static char throttle_reason[6][50] = { "No throttling",
> 
> Don't need to mention 6 here.
> 
> And the max length you need right now is 27, so maybe s/50/30 ?
> 
> Also, start 'No Throttling' in a new line, like below.

Will do.
> 
>> +                                       "Power Cap",
>> +                                       "Processor Over Temperature",
>> +                                       "Power Supply Failure",
>> +                                       "OverCurrent",
> 
> s/OverCurrent/Over Current/ ?

Okay.
> 
>> +                                       "OCC Reset"
>> +                                    };
>> +
>> +static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
>> +               unsigned long msg_type, void *msg)
>> +{
>> +       struct opal_msg *occ_msg = msg;
>> +       uint64_t token;
>> +       uint64_t chip_id, reason;
>> +
>> +       if (msg_type != OPAL_MSG_OCC)
>> +               return 0;
> 
> Blank line here.

Okay
> 
>> +       token = be64_to_cpu(occ_msg->params[0]);
> 
> Here as well..
> 
>> +       switch (token) {
>> +       case 0:
>> +               occ_reset = true;
>> +               /*
>> +                * powernv_cpufreq_throttle_check() is called in
>> +                * target() callback which can detect the throttle state
>> +                * for governors like ondemand.
>> +                * But static governors will not call target() often thus
>> +                * report throttling here.
>> +                */
> 
> Now, do I understand correctly that this notifier will be called as
> soon as we switch throttling state ?
> 
> If yes, then do we still need the throttle_check() routine you added
> earlier ? Maybe not.

We cannot remove throttle_check() routine for the following reasons:
1) To report old firmware bugs which do not restore frequency control to host
after an OCC reset.
2) In BMC based boxes if OCC crashes currently firmware will not send 'reset'
and 'load' messages, in such cases throttle_check() will be sufficient to
monitor a throttled state caused by 'reset'.
3) Throttle reporting in old firmwares which do not have this notification.

> 
>> +               if (!throttled) {
>> +                       throttled = true;
>> +                       pr_crit("CPU Frequency is throttled\n");
>> +               }
>> +               pr_info("OCC in Reset\n");
>> +               break;
>> +       case 1:
>> +               pr_info("OCC is Loaded\n");
>> +               break;
>> +       case 2:
>> +               chip_id = be64_to_cpu(occ_msg->params[1]);
>> +               reason = be64_to_cpu(occ_msg->params[2]);
> 
> Blank line here.

Okay
> 
>> +               if (occ_reset) {
>> +                       occ_reset = false;
>> +                       throttled = false;
>> +                       pr_info("OCC is Active\n");
>> +                       /* Sanity check for static governors */
>> +                       powernv_cpufreq_throttle_check(smp_processor_id());
>> +               } else if (reason) {
>> +                       throttled = true;
>> +                       pr_info("Pmax reduced due to %s on chip %x\n",
>> +                                       throttle_reason[reason], (int)chip_id);
>> +               } else {
>> +                       throttled = false;
>> +                       pr_info("%s on chip %x\n",
>> +                                       throttle_reason[reason], (int)chip_id);
>> +               }
> 
> Run checkpatch with --strict option, and you will see some warnings.

Okay will do.

Thanks and Regards,
Shilpa
Shilpasri G Bhat April 28, 2015, 5:40 a.m. UTC | #4
Hi Preeti,

On 04/23/2015 05:28 PM, Preeti U Murthy wrote:
> Hi Shilpa,
> 
> On 04/22/2015 10:34 PM, Shilpasri G Bhat wrote:
>> OCC is an On-Chip-Controller which takes care of power and thermal
>> safety of the chip. During runtime due to power failure or
>> overtemperature the OCC may throttle the frequencies of the CPUs to
>> remain within the power budget.
>>
>> We want the cpufreq driver to be aware of such situations to be able
>> to report it to the user. We register to opal_message_notifier to
>> receive OCC messages from opal.
>>
>> powernv_cpufreq_throttle_check() reports any frequency throttling and
>> this patch will report the reason or event that caused throttling. We
>> can be throttled if OCC is reset or OCC limits Pmax due to power or
>> thermal reasons. We are also notified of unthrottling after an OCC
>> reset or if OCC restores Pmax on the chip.
>>
>> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
>> CC: "Rafael J. Wysocki" <rjw@rjwysocki.net>
>> CC: Viresh Kumar <viresh.kumar@linaro.org>
>> CC: linux-pm@vger.kernel.org
>> ---
>>  drivers/cpufreq/powernv-cpufreq.c | 70 ++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 69 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
>> index ebef0d8..5718765 100644
>> --- a/drivers/cpufreq/powernv-cpufreq.c
>> +++ b/drivers/cpufreq/powernv-cpufreq.c
>> @@ -32,6 +32,7 @@
>>  #include <asm/firmware.h>
>>  #include <asm/reg.h>
>>  #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
>> +#include <asm/opal.h>
>>
>>  #define POWERNV_MAX_PSTATES	256
>>  #define PMSR_PSAFE_ENABLE	(1UL << 30)
>> @@ -40,7 +41,7 @@
>>  #define PMSR_LP(x)		((x >> 48) & 0xFF)
>>
>>  static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
>> -static bool rebooting, throttled;
>> +static bool rebooting, throttled, occ_reset;
>>
>>  /*
>>   * Note: The set of pstates consists of contiguous integers, the
>> @@ -395,6 +396,72 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
>>  	.notifier_call = powernv_cpufreq_reboot_notifier,
>>  };
>>
>> +static char throttle_reason[6][50] = {	"No throttling",
>> +					"Power Cap",
>> +					"Processor Over Temperature",
>> +					"Power Supply Failure",
>> +					"OverCurrent",
>> +					"OCC Reset"
>> +				     };
>> +
>> +static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
>> +		unsigned long msg_type, void *msg)
>> +{
>> +	struct opal_msg *occ_msg = msg;
>> +	uint64_t token;
>> +	uint64_t chip_id, reason;
>> +
>> +	if (msg_type != OPAL_MSG_OCC)
>> +		return 0;
>> +	token = be64_to_cpu(occ_msg->params[0]);
>> +	switch (token) {
>> +	case 0:
>> +		occ_reset = true;
>> +		/*
>> +		 * powernv_cpufreq_throttle_check() is called in
>> +		 * target() callback which can detect the throttle state
>> +		 * for governors like ondemand.
>> +		 * But static governors will not call target() often thus
>> +		 * report throttling here.
>> +		 */
>> +		if (!throttled) {
>> +			throttled = true;
>> +			pr_crit("CPU Frequency is throttled\n");
>> +		}
>> +		pr_info("OCC in Reset\n");
>> +		break;
>> +	case 1:
>> +		pr_info("OCC is Loaded\n");
>> +		break;
>> +	case 2:
> 
> You may want to replace the numbers with macros. Like
> OCC_RESET,OCC_LOAD, OCC_THROTTLE for better readability.

Okay will do.

> 
>> +		chip_id = be64_to_cpu(occ_msg->params[1]);
>> +		reason = be64_to_cpu(occ_msg->params[2]);
>> +		if (occ_reset) {
>> +			occ_reset = false;
>> +			throttled = false;
>> +			pr_info("OCC is Active\n");
>> +			/* Sanity check for static governors */
>> +			powernv_cpufreq_throttle_check(smp_processor_id());
>> +		} else if (reason) {
>> +			throttled = true;
>> +			pr_info("Pmax reduced due to %s on chip %x\n",
>> +					throttle_reason[reason], (int)chip_id);
>> +		} else {
>> +			throttled = false;
>> +			pr_info("%s on chip %x\n",
>> +					throttle_reason[reason], (int)chip_id);
> 
> Don't you need a powernv_cpufreq_throttle_check() here?  Or is it ok to
> rely on the OCC notification for unthrottle ?

Yes we need to check. Fixing this in v2.

Thanks and Regards,
Shilpa
diff mbox

Patch

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index ebef0d8..5718765 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -32,6 +32,7 @@ 
 #include <asm/firmware.h>
 #include <asm/reg.h>
 #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
+#include <asm/opal.h>
 
 #define POWERNV_MAX_PSTATES	256
 #define PMSR_PSAFE_ENABLE	(1UL << 30)
@@ -40,7 +41,7 @@ 
 #define PMSR_LP(x)		((x >> 48) & 0xFF)
 
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
-static bool rebooting, throttled;
+static bool rebooting, throttled, occ_reset;
 
 /*
  * Note: The set of pstates consists of contiguous integers, the
@@ -395,6 +396,72 @@  static struct notifier_block powernv_cpufreq_reboot_nb = {
 	.notifier_call = powernv_cpufreq_reboot_notifier,
 };
 
+static char throttle_reason[6][50] = {	"No throttling",
+					"Power Cap",
+					"Processor Over Temperature",
+					"Power Supply Failure",
+					"OverCurrent",
+					"OCC Reset"
+				     };
+
+static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
+		unsigned long msg_type, void *msg)
+{
+	struct opal_msg *occ_msg = msg;
+	uint64_t token;
+	uint64_t chip_id, reason;
+
+	if (msg_type != OPAL_MSG_OCC)
+		return 0;
+	token = be64_to_cpu(occ_msg->params[0]);
+	switch (token) {
+	case 0:
+		occ_reset = true;
+		/*
+		 * powernv_cpufreq_throttle_check() is called in
+		 * target() callback which can detect the throttle state
+		 * for governors like ondemand.
+		 * But static governors will not call target() often thus
+		 * report throttling here.
+		 */
+		if (!throttled) {
+			throttled = true;
+			pr_crit("CPU Frequency is throttled\n");
+		}
+		pr_info("OCC in Reset\n");
+		break;
+	case 1:
+		pr_info("OCC is Loaded\n");
+		break;
+	case 2:
+		chip_id = be64_to_cpu(occ_msg->params[1]);
+		reason = be64_to_cpu(occ_msg->params[2]);
+		if (occ_reset) {
+			occ_reset = false;
+			throttled = false;
+			pr_info("OCC is Active\n");
+			/* Sanity check for static governors */
+			powernv_cpufreq_throttle_check(smp_processor_id());
+		} else if (reason) {
+			throttled = true;
+			pr_info("Pmax reduced due to %s on chip %x\n",
+					throttle_reason[reason], (int)chip_id);
+		} else {
+			throttled = false;
+			pr_info("%s on chip %x\n",
+					throttle_reason[reason], (int)chip_id);
+		}
+		break;
+	}
+	return 0;
+}
+
+static struct notifier_block powernv_cpufreq_opal_nb = {
+	.notifier_call	= powernv_cpufreq_occ_msg,
+	.next		= NULL,
+	.priority	= 0,
+};
+
 static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 {
 	struct powernv_smp_call_data freq_data;
@@ -430,6 +497,7 @@  static int __init powernv_cpufreq_init(void)
 	}
 
 	register_reboot_notifier(&powernv_cpufreq_reboot_nb);
+	opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb);
 	return cpufreq_register_driver(&powernv_cpufreq_driver);
 }
 module_init(powernv_cpufreq_init);