[1/9] Add basic P9 fused core support

Message ID 20180927044849.28322-2-benh@kernel.crashing.org
State Under Review
Headers show
Series
  • Initial "big cores" support for POWER9
Related show

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied

Commit Message

Benjamin Herrenschmidt Sept. 27, 2018, 4:48 a.m.
From: Ryan Grimm <grimm@linux.vnet.ibm.com>

P9 cores can be configured into fused core mode where two core chiplets
function as an 8-threaded, single core.  So, bump four to eight in boot_entry
when in fused core mode and cpu_thread_count in init_boot_cpu.

The HID, AMOR, TSCR, RPR require the first active thread on that core chiplet
to load the copy for that core chiplet.  So, send thread 1 of a fused core to
init_shared_sprs in boot_entry.

The code checks for fused core mode in the core thead state register and puts a
field in struct cpu_thread.  This flag is checked when updating the HID and in
XIVE code when setting the special bar.

For XSCOM, the core ID is the non-fused EX.  So, create macros to arrange the
bits.  It's fairly verbose but somewhat readable.

This was tested on a P9 ZZ with 16 fused cores and ran HTX for over 24 hours.

Signed-off-by: Ryan Grimm <grimm@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
---
 asm/head.S         | 22 ++++++++++++++++++++--
 core/chip.c        | 15 +++++++++++----
 core/cpu.c         | 39 ++++++++++++++++++++++++++++++++++-----
 core/fast-reboot.c |  2 +-
 hw/xive.c          |  2 +-
 include/chip.h     | 31 +++++++++++++++++++++++++++++++
 include/cpu.h      |  6 ++++++
 include/xscom.h    |  3 +++
 8 files changed, 107 insertions(+), 13 deletions(-)

Patch

diff --git a/asm/head.S b/asm/head.S
index 803fbf1a..5aad6ca6 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -259,6 +259,7 @@  boot_offset:
  *   r28 :  PVR
  *   r27 :  DTB pointer (or NULL)
  *   r26 :  PIR thread mask
+ *   r25 :  P9 fused core flag
  */
 .global boot_entry
 boot_entry:
@@ -277,11 +278,19 @@  boot_entry:
 	cmpwi	cr0,%r3,PVR_TYPE_P8NVL
 	beq	2f
 	cmpwi	cr0,%r3,PVR_TYPE_P9
-	beq 	1f
+	beq	3f
 	attn		/* Unsupported CPU type... what do we do ? */
 	b 	.	/* loop here, just in case attn is disabled */
 
-	/* P8 -> 8 threads */
+	/* Check for fused core and set flag */
+3:
+	li	%r3, 0x1e0
+	mtspr   SPR_SPRC, %r3
+	mfspr	%r3, SPR_SPRD
+	andi.	%r25, %r3, 1
+	beq 1f
+
+	/* P8 or P9 fused -> 8 threads */
 2:	li	%r26,7
 
 	/* Get our reloc offset into r30 */
@@ -303,6 +312,15 @@  boot_entry:
 	LOAD_IMM64(%r3, (MSR_HV | MSR_SF))
 	mtmsrd	%r3,0
 
+	/* If fused, t1 is primary chiplet and must init shared sprs */
+	andi.	%r3,%r25,1
+	beq	not_fused
+
+	mfspr	%r31,SPR_PIR
+	andi.	%r3,%r31,1
+	bnel	init_shared_sprs
+
+not_fused:
 	/* Check our PIR, avoid threads */
 	mfspr	%r31,SPR_PIR
 	and.	%r0,%r31,%r26
diff --git a/core/chip.c b/core/chip.c
index 65263253..2b9b6ef9 100644
--- a/core/chip.c
+++ b/core/chip.c
@@ -20,6 +20,7 @@ 
 #include <console.h>
 #include <device.h>
 #include <timebase.h>
+#include <cpu.h>
 
 static struct proc_chip *chips[MAX_CHIPS];
 enum proc_chip_quirks proc_chip_quirks;
@@ -37,7 +38,10 @@  uint32_t pir_to_chip_id(uint32_t pir)
 uint32_t pir_to_core_id(uint32_t pir)
 {
 	if (proc_gen == proc_gen_p9)
-		return P9_PIR2COREID(pir);
+		if (this_cpu()->is_fused_core)
+			return P9_PIRFUSED2NORMALCOREID(pir);
+		else
+			return P9_PIR2COREID(pir);
 	else if (proc_gen == proc_gen_p8)
 		return P8_PIR2COREID(pir);
 	else
@@ -46,9 +50,12 @@  uint32_t pir_to_core_id(uint32_t pir)
 
 uint32_t pir_to_thread_id(uint32_t pir)
 {
-	if (proc_gen == proc_gen_p9)
-		return P9_PIR2THREADID(pir);
-	else if (proc_gen == proc_gen_p8)
+	if (proc_gen == proc_gen_p9) {
+		if (this_cpu()->is_fused_core)
+			return P9_PIR2FUSEDTHREADID(pir);
+		else
+			return P9_PIR2THREADID(pir);
+	} else if (proc_gen == proc_gen_p8)
 		return P8_PIR2THREADID(pir);
 	else
 		return P7_PIR2THREADID(pir);
diff --git a/core/cpu.c b/core/cpu.c
index cc5b88c5..4b7bd059 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -893,6 +893,14 @@  void cpu_disable_all_threads(struct cpu_thread *cpu)
 	/* XXX Do something to actually stop the core */
 }
 
+static int is_fused_core (void)
+{
+	unsigned int core_thread_state;
+	mtspr(SPR_SPRC, 0x00000000000001e0ULL);
+	core_thread_state = mfspr(SPR_SPRD);
+	return core_thread_state & PPC_BIT(63);
+}
+
 static void init_cpu_thread(struct cpu_thread *t,
 			    enum cpu_thread_state state,
 			    unsigned int pir)
@@ -912,6 +920,7 @@  static void init_cpu_thread(struct cpu_thread *t,
 #ifdef STACK_CHECK_ENABLED
 	t->stack_bot_mark = LONG_MAX;
 #endif
+	t->is_fused_core = is_fused_core();
 	assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
 }
 
@@ -1004,14 +1013,16 @@  void init_boot_cpu(void)
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	case proc_gen_p9:
-		cpu_thread_count = 4;
+		if (is_fused_core())
+			cpu_thread_count = 8;
+		else
+			cpu_thread_count = 4;
 		prlog(PR_INFO, "CPU: P9 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	default:
 		prerror("CPU: Unknown PVR, assuming 1 thread\n");
 		cpu_thread_count = 1;
-		cpu_max_pir = mfspr(SPR_PIR);
 	}
 
 	prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n",
@@ -1134,7 +1145,7 @@  void init_all_cpus(void)
 
 	/* Iterate all CPUs in the device-tree */
 	dt_for_each_child(cpus, cpu) {
-		unsigned int pir, server_no, chip_id;
+		unsigned int pir, server_no, chip_id, threads;
 		enum cpu_thread_state state;
 		const struct dt_property *p;
 		struct cpu_thread *t, *pt;
@@ -1162,6 +1173,14 @@  void init_all_cpus(void)
 		prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
 		      " State=%d\n", pir, server_no, state);
 
+		/* Check max PIR */
+		if (cpu_max_pir < (pir + cpu_thread_count - 1)) {
+			prlog(PR_WARNING, "CPU: CPU potentially out of range"
+			      "PIR=0x%04x MAX=0x%04x !\n",
+			      pir, cpu_max_pir);
+			continue;
+		}
+
 		/* Setup thread 0 */
 		assert(pir <= cpu_max_pir);
 		t = pt = &cpu_stacks[pir].cpu;
@@ -1187,11 +1206,21 @@  void init_all_cpus(void)
 		/* Add the decrementer width property */
 		dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
 
+		if (t->is_fused_core)
+			dt_add_property(t->node, "ibm,fused-core", NULL, 0);
+
 		/* Iterate threads */
 		p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
 		if (!p)
 			continue;
-		for (thread = 1; thread < (p->len / 4); thread++) {
+		threads = p->len / 4;
+		if (threads > cpu_thread_count) {
+			prlog(PR_WARNING, "CPU: Threads out of range for PIR 0x%04x"
+			      " threads=%d max=%d\n",
+			      pir, threads, cpu_thread_count);
+			threads = cpu_thread_count;
+		}
+		for (thread = 1; thread < threads; thread++) {
 			prlog(PR_TRACE, "CPU:   secondary thread %d found\n",
 			      thread);
 			t = &cpu_stacks[pir + thread].cpu;
@@ -1377,7 +1406,7 @@  static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
 	assert(jobs);
 
 	for_each_available_cpu(cpu) {
-		if (!cpu_is_thread0(cpu))
+		if (!cpu_is_thread0(cpu) && !cpu_is_core_chiplet_primary(cpu))
 			continue;
 		if (cpu == this_cpu())
 			continue;
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 8e95d834..f3fa42e2 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -206,7 +206,7 @@  static void cleanup_cpu_state(void)
 	struct cpu_thread *cpu = this_cpu();
 
 	/* Per core cleanup */
-	if (cpu_is_thread0(cpu)) {
+	if (cpu_is_thread0(cpu) | cpu_is_core_chiplet_primary(cpu)) {
 		/* Shared SPRs whacked back to normal */
 
 		/* XXX Update the SLW copies ! Also dbl check HIDs etc... */
diff --git a/hw/xive.c b/hw/xive.c
index 515f154d..470cf5e3 100644
--- a/hw/xive.c
+++ b/hw/xive.c
@@ -3313,7 +3313,7 @@  static void xive_init_cpu(struct cpu_thread *c)
 	 * of a pair is present we just do the setup for each of them, which
 	 * is harmless.
 	 */
-	if (cpu_is_thread0(c))
+	if (cpu_is_thread0(c) || cpu_is_core_chiplet_primary(c))
 		xive_configure_ex_special_bar(x, c);
 
 	/* Initialize the state structure */
diff --git a/include/chip.h b/include/chip.h
index 2fb8126d..8eeea37e 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -91,6 +91,26 @@ 
  * thus we have a 6-bit core number.
  *
  * Note: XIVE Only supports 4-bit chip numbers ...
+ *
+ * Upper PIR Bits
+ * --------------
+ *
+ * Normal-Core Mode:
+ * 57:61 CoreID
+ * 62:62 ThreadID
+ *
+ * Fused-Core Mode:
+ * 57:59 FusedQuadID
+ * 60    FusedCoreID
+ * 61:63 FusedThreadID
+ *
+ * FusedCoreID 0 contains normal-core chiplet 0 and 1
+ * FusedCoreID 1 contains normal-core chiplet 2 and 3
+ *
+ * Fused cores have interleaved threads:
+ * core chiplet 0/2 = t0, t2, t4, t6
+ * core chiplet 1/3 = t1, t3, t5, t7
+ *
  */
 #define P9_PIR2GCID(pir) (((pir) >> 8) & 0x7f)
 
@@ -102,6 +122,17 @@ 
 
 #define P9_GCID2CHIPID(gcid) ((gcid) & 0x7)
 
+#define P9_PIR2FUSEDQUADID(pir) (((pir) >> 4) & 0x7)
+
+#define P9_PIR2FUSEDCOREID(pir) (((pir) >> 3) & 0x1)
+
+#define P9_PIR2FUSEDTHREADID(pir) ((pir) & 0x7)
+
+#define P9_PIRFUSED2NORMALCOREID(pir) \
+	(P9_PIR2FUSEDQUADID(pir) << 2) | \
+	(P9_PIR2FUSEDCOREID(pir) << 1) | \
+	(P9_PIR2FUSEDTHREADID(pir) & 1)
+
 /* P9 specific ones mostly used by XIVE */
 #define P9_PIR2LOCALCPU(pir) ((pir) & 0xff)
 #define P9_PIRFROMLOCALCPU(chip, cpu)	(((chip) << 8) | (cpu))
diff --git a/include/cpu.h b/include/cpu.h
index 2fe47982..2fb5edd2 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -54,6 +54,7 @@  struct cpu_thread {
 	uint32_t			server_no;
 	uint32_t			chip_id;
 	bool				is_secondary;
+	bool				is_fused_core;
 	struct cpu_thread		*primary;
 	enum cpu_thread_state		state;
 	struct dt_node			*node;
@@ -260,6 +261,11 @@  static inline bool cpu_is_thread0(struct cpu_thread *cpu)
 	return cpu->primary == cpu;
 }
 
+static inline bool cpu_is_core_chiplet_primary(struct cpu_thread *cpu)
+{
+	return cpu->is_fused_core & (cpu_get_thread_index(cpu) == 1);
+}
+
 static inline bool cpu_is_sibling(struct cpu_thread *cpu1,
 				  struct cpu_thread *cpu2)
 {
diff --git a/include/xscom.h b/include/xscom.h
index 98532240..4e6ce92d 100644
--- a/include/xscom.h
+++ b/include/xscom.h
@@ -123,6 +123,9 @@ 
 
 /*
  * Additional useful definitions for P9
+ *
+ * Note: In all of these, the core numbering is the *small* core
+ *       number.
  */
 
 /* An EQ is a quad (also named an EP) */