Patchwork [v2,1/6] powernv: cpufreq driver for powernv platform

login
register
mail settings
Submitter Gautham R. Shenoy
Date March 10, 2014, 11:10 a.m.
Message ID <1394449861-8688-2-git-send-email-ego@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/328531/
State Superseded
Headers show

Comments

Gautham R. Shenoy - March 10, 2014, 11:10 a.m.
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>

Backend driver to dynamically set voltage and frequency on
IBM POWER non-virtualized platforms.  Power management SPRs
are used to set the required PState.

This driver works in conjunction with cpufreq governors
like 'ondemand' to provide a demand based frequency and
voltage setting on IBM POWER non-virtualized platforms.

PState table is obtained from OPAL v3 firmware through device
tree.

powernv_cpufreq back-end driver would parse the relevant device-tree
nodes and initialise the cpufreq subsystem on powernv platform.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/reg.h         |   4 +
 arch/powerpc/platforms/powernv/Kconfig |   1 +
 drivers/cpufreq/Kconfig                |   1 +
 drivers/cpufreq/Kconfig.powerpc        |  13 ++
 drivers/cpufreq/Makefile               |   1 +
 drivers/cpufreq/powernv-cpufreq.c      | 277 +++++++++++++++++++++++++++++++++
 6 files changed, 297 insertions(+)
 create mode 100644 drivers/cpufreq/powernv-cpufreq.c

Patch

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 90c06ec..84f92ca 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -271,6 +271,10 @@ 
 #define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
 #define SPRN_IC		0x350	/* Virtual Instruction Count */
 #define SPRN_VTB	0x351	/* Virtual Time Base */
+#define SPRN_PMICR	0x354   /* Power Management Idle Control Reg */
+#define SPRN_PMSR	0x355   /* Power Management Status Reg */
+#define SPRN_PMCR	0x374	/* Power Management Control Register */
+
 /* HFSCR and FSCR bit numbers are the same */
 #define FSCR_TAR_LG	8	/* Enable Target Address Register */
 #define FSCR_EBB_LG	7	/* Enable Event Based Branching */
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 895e8a2..1fe12b1 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -11,6 +11,7 @@  config PPC_POWERNV
 	select PPC_UDBG_16550
 	select PPC_SCOM
 	select ARCH_RANDOM
+	select CPU_FREQ
 	default y
 
 config PPC_POWERNV_RTAS
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 4b029c0..4ba1632 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -48,6 +48,7 @@  config CPU_FREQ_STAT_DETAILS
 choice
 	prompt "Default CPUFreq governor"
 	default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ
+	default CPU_FREQ_DEFAULT_GOV_ONDEMAND if POWERNV_CPUFREQ
 	default CPU_FREQ_DEFAULT_GOV_PERFORMANCE
 	help
 	  This option sets which CPUFreq governor shall be loaded at
diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc
index ca0021a..93f8689 100644
--- a/drivers/cpufreq/Kconfig.powerpc
+++ b/drivers/cpufreq/Kconfig.powerpc
@@ -54,3 +54,16 @@  config PPC_PASEMI_CPUFREQ
 	help
 	  This adds the support for frequency switching on PA Semi
 	  PWRficient processors.
+
+config POWERNV_CPUFREQ
+       tristate "CPU frequency scaling for IBM POWERNV platform"
+       depends on PPC_POWERNV
+       select CPU_FREQ_GOV_PERFORMANCE
+       select CPU_FREQ_GOV_POWERSAVE
+       select CPU_FREQ_GOV_USERSPACE
+       select CPU_FREQ_GOV_ONDEMAND
+       select CPU_FREQ_GOV_CONSERVATIVE
+       default y
+       help
+	 This adds support for CPU frequency switching on IBM POWERNV
+	 platform
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 7494565..0dbb963 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -86,6 +86,7 @@  obj-$(CONFIG_PPC_CORENET_CPUFREQ)   += ppc-corenet-cpufreq.o
 obj-$(CONFIG_CPU_FREQ_PMAC)		+= pmac32-cpufreq.o
 obj-$(CONFIG_CPU_FREQ_PMAC64)		+= pmac64-cpufreq.o
 obj-$(CONFIG_PPC_PASEMI_CPUFREQ)	+= pasemi-cpufreq.o
+obj-$(CONFIG_POWERNV_CPUFREQ)		+= powernv-cpufreq.o
 
 ##################################################################################
 # Other platform drivers
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
new file mode 100644
index 0000000..ab1551f
--- /dev/null
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -0,0 +1,277 @@ 
+/*
+ * POWERNV cpufreq driver for the IBM POWER processors
+ *
+ * (C) Copyright IBM 2014
+ *
+ * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt)	"powernv-cpufreq: " fmt
+
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/of.h>
+#include <asm/cputhreads.h>
+
+/* FIXME: Make this per-core */
+static DEFINE_MUTEX(freq_switch_mutex);
+
+#define POWERNV_MAX_PSTATES	256
+
+static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
+static int powernv_pstate_ids[POWERNV_MAX_PSTATES+1];
+
+/*
+ * Initialize the freq table based on data obtained
+ * from the firmware passed via device-tree
+ */
+
+static int init_powernv_pstates(void)
+{
+	struct device_node *power_mgt;
+	int nr_pstates = 0;
+	int pstate_min, pstate_max, pstate_nominal;
+	const __be32 *pstate_ids, *pstate_freqs;
+	int i;
+	u32 len_ids, len_freqs;
+
+	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+	if (!power_mgt) {
+		pr_warn("power-mgt node not found\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32(power_mgt, "ibm,pstate-min", &pstate_min)) {
+		pr_warn("ibm,pstate-min node not found\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32(power_mgt, "ibm,pstate-max", &pstate_max)) {
+		pr_warn("ibm,pstate-max node not found\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32(power_mgt, "ibm,pstate-nominal",
+				 &pstate_nominal)) {
+		pr_warn("ibm,pstate-nominal not found\n");
+		return -ENODEV;
+	}
+	pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
+		pstate_nominal, pstate_max);
+
+	pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
+	if (!pstate_ids) {
+		pr_warn("ibm,pstate-ids not found\n");
+		return -ENODEV;
+	}
+
+	pstate_freqs = of_get_property(power_mgt, "ibm,pstate-frequencies-mhz",
+				      &len_freqs);
+	if (!pstate_freqs) {
+		pr_warn("ibm,pstate-frequencies-mhz not found\n");
+		return -ENODEV;
+	}
+
+	WARN_ON(len_ids != len_freqs);
+	nr_pstates = min(len_ids, len_freqs) / sizeof(u32);
+	WARN_ON(!nr_pstates);
+
+	pr_debug("NR PStates %d\n", nr_pstates);
+	for (i = 0; i < nr_pstates; i++) {
+		u32 id = be32_to_cpu(pstate_ids[i]);
+		u32 freq = be32_to_cpu(pstate_freqs[i]);
+
+		pr_debug("PState id %d freq %d MHz\n", id, freq);
+		powernv_freqs[i].driver_data = i;
+		powernv_freqs[i].frequency = freq * 1000; /* kHz */
+		powernv_pstate_ids[i] = id;
+	}
+	/* End of list marker entry */
+	powernv_freqs[i].driver_data = 0;
+	powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
+
+	/* Print frequency table */
+	for (i = 0; powernv_freqs[i].frequency != CPUFREQ_TABLE_END; i++)
+		pr_debug("%d: %d\n", i, powernv_freqs[i].frequency);
+
+	return 0;
+}
+
+static struct freq_attr *powernv_cpu_freq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+/* Helper routines */
+
+/* Access helpers to power mgt SPR */
+
+static inline unsigned long get_pmspr(unsigned long sprn)
+{
+	switch (sprn) {
+	case SPRN_PMCR:
+		return mfspr(SPRN_PMCR);
+
+	case SPRN_PMICR:
+		return mfspr(SPRN_PMICR);
+
+	case SPRN_PMSR:
+		return mfspr(SPRN_PMSR);
+	}
+	BUG();
+}
+
+static inline void set_pmspr(unsigned long sprn, unsigned long val)
+{
+	switch (sprn) {
+	case SPRN_PMCR:
+		mtspr(SPRN_PMCR, val);
+		return;
+
+	case SPRN_PMICR:
+		mtspr(SPRN_PMICR, val);
+		return;
+
+	case SPRN_PMSR:
+		mtspr(SPRN_PMSR, val);
+		return;
+	}
+	BUG();
+}
+
+static void set_pstate(void *pstate)
+{
+	unsigned long val;
+	unsigned long pstate_ul = *(unsigned long *) pstate;
+
+	val = get_pmspr(SPRN_PMCR);
+	val = val & 0x0000ffffffffffffULL;
+	/* Set both global(bits 56..63) and local(bits 48..55) PStates */
+	val = val | (pstate_ul << 56) | (pstate_ul << 48);
+	pr_debug("Setting cpu %d pmcr to %016lX\n", smp_processor_id(), val);
+	set_pmspr(SPRN_PMCR, val);
+}
+
+static int powernv_set_freq(cpumask_var_t cpus, unsigned int new_index)
+{
+	unsigned long val = (unsigned long) powernv_pstate_ids[new_index];
+
+	/*
+	 * Use smp_call_function to send IPI and execute the
+	 * mtspr on target cpu.  We could do that without IPI
+	 * if current CPU is within policy->cpus (core)
+	 */
+
+	val = val & 0xFF;
+	smp_call_function_any(cpus, set_pstate, &val, 1);
+	return 0;
+}
+
+static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	int base, i;
+
+#ifdef CONFIG_SMP
+	base = cpu_first_thread_sibling(policy->cpu);
+
+	for (i = 0; i < threads_per_core; i++)
+		cpumask_set_cpu(base + i, policy->cpus);
+#endif
+	policy->cpuinfo.transition_latency = 25000;
+
+	policy->cur = powernv_freqs[0].frequency;
+	cpufreq_frequency_table_get_attr(powernv_freqs, policy->cpu);
+	return cpufreq_frequency_table_cpuinfo(policy, powernv_freqs);
+}
+
+static int powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static int powernv_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, powernv_freqs);
+}
+
+static int powernv_cpufreq_target(struct cpufreq_policy *policy,
+			      unsigned int target_freq,
+			      unsigned int relation)
+{
+	int rc;
+	struct cpufreq_freqs freqs;
+	unsigned int new_index;
+
+	cpufreq_frequency_table_target(policy, powernv_freqs, target_freq,
+				       relation, &new_index);
+
+	freqs.old = policy->cur;
+	freqs.new = powernv_freqs[new_index].frequency;
+	freqs.cpu = policy->cpu;
+
+	mutex_lock(&freq_switch_mutex);
+	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
+
+	pr_debug("setting frequency for cpu %d to %d kHz index %d pstate %d",
+		 policy->cpu,
+		 powernv_freqs[new_index].frequency,
+		 new_index,
+		 powernv_pstate_ids[new_index]);
+
+	rc = powernv_set_freq(policy->cpus, new_index);
+
+	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
+	mutex_unlock(&freq_switch_mutex);
+
+	return rc;
+}
+
+static struct cpufreq_driver powernv_cpufreq_driver = {
+	.verify		= powernv_cpufreq_verify,
+	.target		= powernv_cpufreq_target,
+	.init		= powernv_cpufreq_cpu_init,
+	.exit		= powernv_cpufreq_cpu_exit,
+	.name		= "powernv-cpufreq",
+	.flags		= CPUFREQ_CONST_LOOPS,
+	.attr		= powernv_cpu_freq_attr,
+};
+
+static int __init powernv_cpufreq_init(void)
+{
+	int rc = 0;
+
+	/* Discover pstates from device tree and init */
+
+	rc = init_powernv_pstates();
+
+	if (rc) {
+		pr_info("powernv-cpufreq disabled\n");
+		return rc;
+	}
+
+	rc = cpufreq_register_driver(&powernv_cpufreq_driver);
+	return rc;
+}
+
+static void __exit powernv_cpufreq_exit(void)
+{
+	cpufreq_unregister_driver(&powernv_cpufreq_driver);
+}
+
+module_init(powernv_cpufreq_init);
+module_exit(powernv_cpufreq_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>");