[RFC,4/4] cpu: initial power management implementation for POWER9

Message ID 20170813135959.23863-5-npiggin@gmail.com
State Superseded
Headers show

Commit Message

Nicholas Piggin Aug. 13, 2017, 1:59 p.m.
This is an RFC patch only at the moment, it should be split into at
least 2 patches, and has not been tested on real hardware yet.

The idea is to add basic stop support to POWER9 for OPAL's idle loops.
To do this, we split the "pm enable" into two parts, enablement for
IPI facility, and enablement for sreset vector wakeups. POWER9 can use
the EC=ESL=0 (lite) stop when sreset is not installed. It can also use
global doorbell for IPIs before the interrupt controller is set up, so
there are some differences from P8.

The EC=ESL=1 state that is enabled when we have a sreset wakeup is
currently just level 3 which is like nap. It should allow thread
switch but not full state loss and core power down.
---
 asm/head.S          |  70 +++++++++++++------
 core/cpu.c          | 198 +++++++++++++++++++++++++++++++++++++++++++++-------
 core/fast-reboot.c  |   3 +-
 core/init.c         |  12 +++-
 hdata/cpu-common.c  |  12 ----
 hw/slw.c            |   2 +-
 include/cpu.h       |   8 ++-
 include/processor.h |  44 ++++++++++++
 include/skiboot.h   |   4 +-
 9 files changed, 286 insertions(+), 67 deletions(-)

Patch

diff --git a/asm/head.S b/asm/head.S
index badb567c..d6b58be9 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -28,6 +28,8 @@ 
 #define PPC_INST_SLEEP		.long 0x4c0003a4
 #define PPC_INST_RVWINKLE	.long 0x4c0003e4
 
+#define PPC_INST_STOP		.long 0x4c0002e4
+
 #define GET_STACK(stack_reg,pir_reg)				\
 	sldi	stack_reg,pir_reg,STACK_SHIFT;			\
 	addis	stack_reg,stack_reg,CPU_STACKS_OFFSET@ha;	\
@@ -471,27 +473,7 @@  call_relocate:
        .long 0xa6037b7d; /* mtsrr1 r11                         */ \
        .long 0x2400004c  /* rfid                               */
 
-.global enter_pm_state
-enter_pm_state:
-	/* Before entering map or rvwinkle, we create a stack frame
-	 * and save our non-volatile registers.
-	 *
-	 * We also save these SPRs:
-	 *
-	 *  - HSPRG0	in GPR0 slot
-	 *  - HSPRG1	in GPR1 slot
-	 *
-	 *  - xxx TODO: HIDs
-	 *  - TODO: Mask MSR:ME during the process
-	 *
-	 * On entry, r3 indicates:
-	 *
-	 *    0 = nap
-	 *    1 = rvwinkle
-	 */
-	mflr	%r0
-	std	%r0,16(%r1)
-	stdu	%r1,-STACK_FRAMESIZE(%r1)
+pm_save_regs:
 	SAVE_GPR(2,%r1)
 	SAVE_GPR(14,%r1)
 	SAVE_GPR(15,%r1)
@@ -519,6 +501,31 @@  enter_pm_state:
 	stw	%r5,STACK_XER(%r1)
 	std	%r6,STACK_GPR0(%r1)
 	std	%r7,STACK_GPR1(%r1)
+	blr
+
+.global enter_p8_pm_state
+enter_p8_pm_state:
+	/* Before entering map or rvwinkle, we create a stack frame
+	 * and save our non-volatile registers.
+	 *
+	 * We also save these SPRs:
+	 *
+	 *  - HSPRG0	in GPR0 slot
+	 *  - HSPRG1	in GPR1 slot
+	 *
+	 *  - xxx TODO: HIDs
+	 *  - TODO: Mask MSR:ME during the process
+	 *
+	 * On entry, r3 indicates:
+	 *
+	 *    0 = nap
+	 *    1 = rvwinkle
+	 */
+	mflr	%r0
+	std	%r0,16(%r1)
+	stdu	%r1,-STACK_FRAMESIZE(%r1)
+
+	bl	pm_save_regs
 
 	/* Save stack pointer in struct cpu_thread */
 	std	%r1,CPUTHREAD_SAVE_R1(%r13)
@@ -543,6 +550,27 @@  enter_pm_state:
 	PPC_INST_RVWINKLE
 	b	.
 
+.global enter_p9_pm_lite_state
+enter_p9_pm_lite_state:
+	mtspr	SPR_PSSCR,%r3
+	PPC_INST_STOP
+	blr
+
+.global enter_p9_pm_state
+enter_p9_pm_state:
+	mflr	%r0
+	std	%r0,16(%r1)
+	stdu	%r1,-STACK_FRAMESIZE(%r1)
+
+	bl	pm_save_regs
+
+	/* Save stack pointer in struct cpu_thread */
+	std	%r1,CPUTHREAD_SAVE_R1(%r13)
+
+	mtspr	SPR_PSSCR,%r3
+	PPC_INST_STOP
+	b	.
+
 /* This is a little piece of code that is copied down to
  * 0x100 for handling power management wakeups
  */
diff --git a/core/cpu.c b/core/cpu.c
index 2a95dff6..927ef45e 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -52,6 +52,8 @@  static bool hile_supported;
 static bool radix_supported;
 static unsigned long hid0_hile;
 static unsigned long hid0_attn;
+static bool sreset_enabled;
+static bool ipi_enabled;
 static bool pm_enabled;
 static bool current_hile_mode;
 static bool current_radix_mode;
@@ -92,8 +94,12 @@  static void cpu_wake(struct cpu_thread *cpu)
 	if (!cpu->in_idle)
 		return;
 
-	/* Poke IPI */
-	icp_kick_cpu(cpu);
+	if (proc_gen == proc_gen_p8) {
+		/* Poke IPI */
+		icp_kick_cpu(cpu);
+	} else if (proc_gen == proc_gen_p9) {
+		p9_dbell_send(cpu->pir);
+	}
 }
 
 static struct cpu_thread *cpu_find_job_target(void)
@@ -317,6 +323,7 @@  static void cpu_idle_p8(enum cpu_wake_cause wake_on)
 		if (cpu_check_jobs(cpu) || !pm_enabled)
 			goto skip_sleep;
 
+		/* Setup wakup cause in LPCR: EE (for IPI) */
 		lpcr |= SPR_LPCR_P8_PECE2;
 		mtspr(SPR_LPCR, lpcr);
 
@@ -331,12 +338,13 @@  static void cpu_idle_p8(enum cpu_wake_cause wake_on)
 		if (!pm_enabled)
 			goto skip_sleep;
 
+		/* EE and DEC */
 		lpcr |= SPR_LPCR_P8_PECE2 | SPR_LPCR_P8_PECE3;
 		mtspr(SPR_LPCR, lpcr);
 	}
 
 	/* Enter nap */
-	enter_pm_state(false);
+	enter_p8_pm_state(false);
 
 skip_sleep:
 	/* Restore */
@@ -346,33 +354,67 @@  skip_sleep:
 	reset_cpu_icp();
 }
 
-void cpu_set_pm_enable(bool enabled)
+static void cpu_idle_p9(enum cpu_wake_cause wake_on)
 {
-	struct cpu_thread *cpu;
-
-	prlog(PR_INFO, "CPU: %sing power management\n",
-	      enabled ? "enabl" : "disabl");
+	uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P9_PECE;
+	uint64_t psscr;
+	struct cpu_thread *cpu = this_cpu();
 
-	if (proc_gen != proc_gen_p8)
+	if (!pm_enabled) {
+		prlog_once(PR_DEBUG, "cpu_idle_p9 called pm disabled\n");
 		return;
+	}
 
-	/* Public P8 Mambo has broken NAP */
-	if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
-		return;
+	msgclr(); /* flush pending messages */
 
-	pm_enabled = enabled;
+	/* Synchronize with wakers */
+	if (wake_on == cpu_wake_on_job) {
+		/* Mark ourselves in idle so other CPUs know to send an IPI */
+		cpu->in_idle = true;
+		sync();
 
-	if (enabled)
-		return;
+		/* Check for jobs again */
+		if (cpu_check_jobs(cpu) || !pm_enabled)
+			goto skip_sleep;
 
-	/* If disabling, take everybody out of PM */
-	sync();
-	for_each_available_cpu(cpu) {
-		while (cpu->in_sleep || cpu->in_idle) {
-			icp_kick_cpu(cpu);
-			cpu_relax();
-		}
+		/* HV DBELL for IPI */
+		lpcr |= SPR_LPCR_P9_PECEL1;
+	} else {
+		/* Mark outselves sleeping so cpu_set_pm_enable knows to
+		 * send an IPI
+		 */
+		cpu->in_sleep = true;
+		sync();
+
+		/* Check if PM got disabled */
+		if (!pm_enabled)
+			goto skip_sleep;
+
+		/* HV DBELL and DEC */
+		lpcr |= SPR_LPCR_P9_PECEL1 | SPR_LPCR_P9_PECEL3;
+		mtspr(SPR_LPCR, lpcr);
 	}
+
+	mtspr(SPR_LPCR, lpcr);
+
+	if (sreset_enabled) {
+		/* stop with EC=1 (sreset) and ESL=1 (enable thread switch). */
+		/* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=3 */
+		psscr = PPC_BIT(42) | PPC_BIT(43) |
+			PPC_BITMASK(54, 55) | PPC_BITMASK(62,63);
+		enter_p9_pm_state(psscr);
+	} else {
+		/* stop with EC=0 (resumes) which does not require sreset. */
+		/* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=3 */
+		psscr = PPC_BITMASK(54, 55) | PPC_BITMASK(62,63);
+		enter_p9_pm_lite_state(psscr);
+	}
+
+skip_sleep:
+	/* Restore */
+	cpu->in_idle = false;
+	cpu->in_sleep = false;
+	p9_dbell_receive();
 }
 
 static void cpu_idle_pm(enum cpu_wake_cause wake_on)
@@ -381,6 +423,9 @@  static void cpu_idle_pm(enum cpu_wake_cause wake_on)
 	case proc_gen_p8:
 		cpu_idle_p8(wake_on);
 		break;
+	case proc_gen_p9:
+		cpu_idle_p9(wake_on);
+		break;
 	default:
 		prlog_once(PR_DEBUG, "cpu_idle_pm called with bad processor type\n");
 		break;
@@ -396,8 +441,11 @@  void cpu_idle_job(void)
 
 		smt_lowest();
 		/* Check for jobs again */
-		while (!cpu_check_jobs(cpu))
+		while (!cpu_check_jobs(cpu)) {
+			if (pm_enabled)
+				break;
 			barrier();
+		}
 		smt_medium();
 	}
 }
@@ -409,6 +457,7 @@  void cpu_idle_delay(unsigned long delay)
 	unsigned long min_pm = usecs_to_tb(10);
 
 	if (pm_enabled && delay > min_pm) {
+pm:
 		for (;;) {
 			if (delay >= 0x7fffffff)
 				delay = 0x7fffffff;
@@ -419,17 +468,114 @@  void cpu_idle_delay(unsigned long delay)
 			now = mftb();
 			if (tb_compare(now, end) == TB_AAFTERB)
 				break;
-
 			delay = end - now;
+			if (!(pm_enabled && delay > min_pm))
+				goto no_pm;
 		}
 	} else {
+no_pm:
 		smt_lowest();
-		while (tb_compare(mftb(), end) != TB_AAFTERB)
-			barrier();
+		for (;;) {
+			now = mftb();
+			if (tb_compare(now, end) == TB_AAFTERB)
+				break;
+			delay = end - now;
+			if (pm_enabled && delay > min_pm)
+				goto pm;
+		}
 		smt_medium();
 	}
 }
 
+static void cpu_pm_disable(void)
+{
+	struct cpu_thread *cpu;
+
+	pm_enabled = false;
+	sync();
+
+	if (proc_gen == proc_gen_p8) {
+		for_each_available_cpu(cpu) {
+			while (cpu->in_sleep || cpu->in_idle) {
+				icp_kick_cpu(cpu);
+				cpu_relax();
+			}
+		}
+	} else if (proc_gen == proc_gen_p9) {
+		for_each_available_cpu(cpu) {
+			if (cpu->in_sleep || cpu->in_idle)
+				p9_dbell_send(cpu->pir);
+		}
+
+		smt_lowest();
+		for_each_available_cpu(cpu) {
+			while (cpu->in_sleep || cpu->in_idle)
+				barrier();
+		}
+		smt_medium();
+	}
+}
+
+void cpu_set_sreset_enable(bool enabled)
+{
+	if (sreset_enabled == enabled)
+		return;
+
+	if (proc_gen == proc_gen_p8) {
+		/* Public P8 Mambo has broken NAP */
+		if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+			return;
+
+		sreset_enabled = enabled;
+		sync();
+
+		if (!enabled) {
+			cpu_pm_disable();
+		} else {
+			if (ipi_enabled)
+				pm_enabled = true;
+		}
+	} else if (proc_gen == proc_gen_p9) {
+		sreset_enabled = enabled;
+		sync();
+		/*
+		 * Kick everybody out of PM so they can adjust the PM
+		 * mode they are using (EC=0/1).
+		 */
+		cpu_pm_disable();
+		if (ipi_enabled)
+			pm_enabled = true;
+	}
+}
+
+void cpu_set_ipi_enable(bool enabled)
+{
+	if (ipi_enabled == enabled)
+		return;
+
+	if (proc_gen == proc_gen_p8) {
+		ipi_enabled = enabled;
+		sync();
+		if (!enabled) {
+			cpu_pm_disable();
+		} else {
+			if (sreset_enabled)
+				pm_enabled = true;
+		}
+	} else if (proc_gen == proc_gen_p9) {
+		/* DD1 has global doorbell msgsync missing */
+		uint32_t version = mfspr(SPR_PVR);
+		if ((PVR_VERS_MAJ(version) == 1) && is_power9n(version))
+			return;
+		ipi_enabled = enabled;
+		sync();
+		if (!enabled)
+			cpu_pm_disable();
+		else
+			pm_enabled = true;
+	}
+}
+
 void cpu_process_local_jobs(void)
 {
 	struct cpu_thread *cpu = first_available_cpu();
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 7bfc06de..8af5c590 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -564,7 +564,8 @@  void __noreturn fast_reboot_entry(void)
 	cpu_fast_reboot_complete();
 
 	/* We can now do NAP mode */
-	cpu_set_pm_enable(true);
+	cpu_set_sreset_enable(true);
+	cpu_set_ipi_enable(true);
 
 	/* Start preloading kernel and ramdisk */
 	start_preload_kernel();
diff --git a/core/init.c b/core/init.c
index 01fe12c9..dbdcc465 100644
--- a/core/init.c
+++ b/core/init.c
@@ -380,7 +380,7 @@  static bool load_kernel(void)
 		 * by our vectors.
 		 */
 		if (kernel_entry < 0x2000) {
-			cpu_set_pm_enable(false);
+			cpu_set_sreset_enable(false);
 			memcpy(NULL, old_vectors, 0x2000);
 			sync_icache();
 		}
@@ -543,7 +543,8 @@  void __noreturn load_and_boot_kernel(bool is_reboot)
 	mem_dump_free();
 
 	/* Take processours out of nap */
-	cpu_set_pm_enable(false);
+	cpu_set_sreset_enable(false);
+	cpu_set_ipi_enable(false);
 
 	/* Dump the selected console */
 	stdoutp = dt_prop_get_def(dt_chosen, "linux,stdout-path", NULL);
@@ -723,6 +724,7 @@  void setup_reset_vector(void)
 	while(src < &reset_patch_end)
 		*(dst++) = *(src++);
 	sync_icache();
+	cpu_set_sreset_enable(true);
 }
 
 void copy_exception_vectors(void)
@@ -918,12 +920,16 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 
 	/* Initialize the rest of the cpu thread structs */
 	init_all_cpus();
+	if (proc_gen == proc_gen_p9)
+		cpu_set_ipi_enable(true);
 
 	/* Allocate our split trace buffers now. Depends add_opal_node() */
 	init_trace_buffers();
 
 	/* On P7/P8, get the ICPs and make sure they are in a sane state */
 	init_interrupts();
+	if (proc_gen == proc_gen_p7 || proc_gen == proc_gen_p8)
+		cpu_set_ipi_enable(true);
 
 	/* On P9, initialize XIVE */
 	init_xive();
@@ -949,7 +955,7 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	setup_reset_vector();
 
 	/* We can now do NAP mode */
-	cpu_set_pm_enable(true);
+	cpu_set_sreset_enable(true);
 
 	/*
 	 * Synchronize time bases. Thi resets all the TB values to a small
diff --git a/hdata/cpu-common.c b/hdata/cpu-common.c
index a1a93121..f40d08b3 100644
--- a/hdata/cpu-common.c
+++ b/hdata/cpu-common.c
@@ -21,18 +21,6 @@ 
 
 #include "hdata.h"
 
-static bool is_power9n(uint32_t version)
-{
-	/*
-	 * Bit 13 tells us:
-	 *   0 = Scale out (aka Nimbus)
-	 *   1 = Scale up  (aka Cumulus)
-	 */
-	if ((version >> 13) & 1)
-		return false;
-	return true;
-}
-
 struct dt_node * add_core_common(struct dt_node *cpus,
 				 const struct sppcia_cpu_cache *cache,
 				 const struct sppaca_cpu_timebase *tb,
diff --git a/hw/slw.c b/hw/slw.c
index c0ab9dea..6452e93a 100644
--- a/hw/slw.c
+++ b/hw/slw.c
@@ -82,7 +82,7 @@  static void slw_do_rvwinkle(void *data)
 	/* Tell that we got it */
 	cpu->state = cpu_state_rvwinkle;
 
-	enter_pm_state(1);
+	enter_p8_pm_state(1);
 
 	/* Restore SPRs */
 	init_shared_sprs();
diff --git a/include/cpu.h b/include/cpu.h
index f08940f7..0b873178 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -256,8 +256,12 @@  extern void cpu_process_jobs(void);
 extern void cpu_process_local_jobs(void);
 /* Check if there's any job pending */
 bool cpu_check_jobs(struct cpu_thread *cpu);
-/* Enable/disable PM */
-void cpu_set_pm_enable(bool pm_enabled);
+
+/* OPAL sreset vector in place at 0x100 */
+void cpu_set_sreset_enable(bool sreset_enabled);
+
+/* IPI for PM modes is enabled */
+void cpu_set_ipi_enable(bool sreset_enabled);
 
 static inline void cpu_give_self_os(void)
 {
diff --git a/include/processor.h b/include/processor.h
index 2e1ac37d..11992248 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -77,6 +77,7 @@ 
 #define SPR_HMER	0x150	/* Hypervisor Maintenance Exception */
 #define SPR_HMEER	0x151	/* HMER interrupt enable mask */
 #define SPR_AMOR	0x15d
+#define SPR_PSSCR	0x357   /* RW: Stop status and control (ISA 3) */
 #define SPR_TSCR	0x399
 #define SPR_HID0	0x3f0
 #define SPR_HID1	0x3f1
@@ -85,6 +86,7 @@ 
 #define SPR_HID5	0x3f6
 #define SPR_PIR		0x3ff	/* RO: Processor Identification */
 
+
 /* Bits in LPCR */
 
 /* Powersave Exit Cause Enable is different for P7 and P8 */
@@ -99,6 +101,14 @@ 
 #define SPR_LPCR_P8_PECE2	PPC_BIT(49)   /* Wake on external interrupts */
 #define SPR_LPCR_P8_PECE3	PPC_BIT(50)   /* Wake on decrementer */
 #define SPR_LPCR_P8_PECE4	PPC_BIT(51)   /* Wake on MCs, HMIs, etc... */
+
+#define SPR_LPCR_P9_PECE	(PPC_BITMASK(47,51) | PPC_BITMASK(17,17))
+#define SPR_LPCR_P9_PECEU0	PPC_BIT(17)   /* Wake on HVI */
+#define SPR_LPCR_P9_PECEL0	PPC_BIT(47)   /* Wake on priv doorbell */
+#define SPR_LPCR_P9_PECEL1	PPC_BIT(48)   /* Wake on hv doorbell */
+#define SPR_LPCR_P9_PECEL2	PPC_BIT(49)   /* Wake on external interrupts */
+#define SPR_LPCR_P9_PECEL3	PPC_BIT(50)   /* Wake on decrementer */
+#define SPR_LPCR_P9_PECEL4	PPC_BIT(51)   /* Wake on MCs, HMIs, etc... */
 #define SPR_LPCR_P9_LD		PPC_BIT(46)   /* Large decrementer mode bit */
 
 
@@ -206,6 +216,19 @@ 
 
 #include <compiler.h>
 #include <stdint.h>
+#include <stdbool.h>
+
+static inline bool is_power9n(uint32_t version)
+{
+	/*
+	 * Bit 13 tells us:
+	 *   0 = Scale out (aka Nimbus)
+	 *   1 = Scale up  (aka Cumulus)
+	 */
+	if ((version >> 13) & 1)
+		return false;
+	return true;
+}
 
 /*
  * SMT priority
@@ -294,6 +317,27 @@  static inline void sync_icache(void)
 	asm volatile("sync; icbi 0,%0; sync; isync" : : "r" (0) : "memory");
 }
 
+/*
+ * Doorbells
+ */
+static inline void msgclr(void)
+{
+	uint64_t rb = (0x05 << (63-36));
+	asm volatile("msgclr %0" : : "r"(rb));
+}
+
+static inline void p9_dbell_receive(void)
+{
+	uint64_t rb = (0x05 << (63-36));
+	/* msgclr ; msgsync ; lwsync */
+	asm volatile("msgclr %0 ; .long 0x7c0006ec ; lwsync" : : "r"(rb));
+}
+
+static inline void p9_dbell_send(uint32_t pir)
+{
+	uint64_t rb = (0x05 << (63-36)) | pir;
+	asm volatile("sync ; msgsnd %0" : : "r"(rb));
+}
 
 /*
  * Byteswap load/stores
diff --git a/include/skiboot.h b/include/skiboot.h
index 4b7d5197..2ed18397 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -301,7 +301,9 @@  extern void fast_sleep_exit(void);
 extern void fake_rtc_init(void);
 
 /* Assembly in head.S */
-extern void enter_pm_state(bool winkle);
+extern void enter_p8_pm_state(bool winkle);
+extern void enter_p9_pm_state(uint64_t psscr);
+extern void enter_p9_pm_lite_state(uint64_t psscr);
 extern uint32_t reset_patch_start;
 extern uint32_t reset_patch_end;