diff mbox series

[v2,09/16] powernv/fadump: process architected register state data provided by firmware

Message ID 155541089317.812.14447001298006010972.stgit@hbathini.in.ibm.com (mailing list archive)
State Superseded
Headers show
Series Add FADump support on PowerNV platform | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (8c2ffd9174779014c3fe1f96d9dc3641d9175f00)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 1 checks, 295 lines checked

Commit Message

Hari Bathini April 16, 2019, 10:35 a.m. UTC
From: Hari Bathini <hbathini@linux.vnet.ibm.com>

Firmware provides architected register state data at the time of crash.
Process this data and build CPU notes to append to ELF core.

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
---

Changes in v2:
* Updated reg type values according to recent OPAL changes


 arch/powerpc/include/asm/opal-api.h          |   23 +++
 arch/powerpc/kernel/fadump-common.h          |    3 
 arch/powerpc/platforms/powernv/opal-fadump.c |  187 ++++++++++++++++++++++++--
 arch/powerpc/platforms/powernv/opal-fadump.h |    4 +
 4 files changed, 206 insertions(+), 11 deletions(-)

Comments

Mahesh J Salgaonkar May 7, 2019, 2:13 p.m. UTC | #1
On 2019-04-16 16:05:06 Tue, Hari Bathini wrote:
> From: Hari Bathini <hbathini@linux.vnet.ibm.com>
> 
> Firmware provides architected register state data at the time of crash.
> Process this data and build CPU notes to append to ELF core.
> 
> Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
> ---
> 
> Changes in v2:
> * Updated reg type values according to recent OPAL changes
> 
> 
>  arch/powerpc/include/asm/opal-api.h          |   23 +++
>  arch/powerpc/kernel/fadump-common.h          |    3 
>  arch/powerpc/platforms/powernv/opal-fadump.c |  187 ++++++++++++++++++++++++--
>  arch/powerpc/platforms/powernv/opal-fadump.h |    4 +
>  4 files changed, 206 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
> index 75471c2..91f2735 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -976,6 +976,29 @@ struct opal_sg_list {
>   * Firmware-Assisted Dump (FADump)
>   */
>  
> +/* FADump thread header for register entries */
> +struct opal_fadump_thread_hdr {
> +	__be32  pir;
> +	/* 0x00 - 0x0F - The corresponding stop state of the core */
> +	u8      core_state;
> +	u8      reserved[3];
> +
> +	__be32	offset;	/* Offset to Register Entries array */
> +	__be32	ecnt;	/* Number of entries */
> +	__be32	esize;	/* Alloc size of each array entry in bytes */
> +	__be32	eactsz;	/* Actual size of each array entry in bytes */
> +} __packed;
> +
> +#define OPAL_REG_TYPE_GPR		0x01
> +#define OPAL_REG_TYPE_SPR		0x02
> +
> +/* FADump register entry. */
> +struct opal_fadump_reg_entry {
> +	__be32		reg_type;
> +	__be32		reg_num;
> +	__be64		reg_val;
> +};
> +
>  /* The maximum number of dump sections supported by OPAL */
>  #define OPAL_FADUMP_NR_SECTIONS			64
>  
> diff --git a/arch/powerpc/kernel/fadump-common.h b/arch/powerpc/kernel/fadump-common.h
> index ff764d4..8d47382 100644
> --- a/arch/powerpc/kernel/fadump-common.h
> +++ b/arch/powerpc/kernel/fadump-common.h
> @@ -117,6 +117,9 @@ struct fadump_memory_range {
>  
>  /* Firmware-assisted dump configuration details. */
>  struct fw_dump {
> +	unsigned long	cpu_state_destination_addr;
> +	unsigned long	cpu_state_data_version;
> +	unsigned long	cpu_state_entry_size;
>  	unsigned long	cpu_state_data_size;
>  	unsigned long	hpte_region_size;
>  	unsigned long	boot_memory_size;
> diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
> index da8480d..853f663 100644
> --- a/arch/powerpc/platforms/powernv/opal-fadump.c
> +++ b/arch/powerpc/platforms/powernv/opal-fadump.c
> @@ -94,6 +94,12 @@ static void update_fadump_config(struct fw_dump *fadump_conf,
>  
>  				last_end = base + size;
>  				j++;
> +			} else if (fdm->section[i].src_type ==
> +				   OPAL_FADUMP_CPU_STATE_DATA) {
> +				fadump_conf->cpu_state_destination_addr =
> +					be64_to_cpu(fdm->section[i].dest_addr);
> +				fadump_conf->cpu_state_data_size =
> +					be64_to_cpu(fdm->section[i].dest_size);
>  			}
>  		}
>  		fadump_conf->rmr_regions_cnt = j;
> @@ -199,6 +205,75 @@ static int opal_invalidate_fadump(struct fw_dump *fadump_conf)
>  	return 0;
>  }
>  
> +static inline void fadump_set_regval_regnum(struct pt_regs *regs, u32 reg_type,
> +					    u32 reg_num, u64 reg_val)
> +{
> +	if (reg_type == OPAL_REG_TYPE_GPR) {
> +		if (reg_num < 32)
> +			regs->gpr[reg_num] = reg_val;
> +		return;
> +	}
> +
> +	switch (reg_num) {
> +	case 2000:
> +		regs->nip = reg_val;
> +		break;
> +	case 2001:
> +		regs->msr = reg_val;
> +		break;
> +	case 9:
> +		regs->ctr = reg_val;
> +		break;
> +	case 8:
> +		regs->link = reg_val;
> +		break;
> +	case 1:
> +		regs->xer = reg_val;
> +		break;
> +	case 2002:
> +		regs->ccr = reg_val;
> +		break;
> +	case 19:
> +		regs->dar = reg_val;
> +		break;
> +	case 18:
> +		regs->dsisr = reg_val;
> +		break;

Can we use SPRN_* #defines which are already present in asm/reg.h instead of
hard coding numbers for switch cases ? You may want to add new #defines
for NIP, MSR and CCR.

Thanks,
-Mahesh.
Segher Boessenkool May 7, 2019, 4 p.m. UTC | #2
On Tue, May 07, 2019 at 07:43:56PM +0530, Mahesh J Salgaonkar wrote:
> Can we use SPRN_* #defines which are already present in asm/reg.h instead of
> hard coding numbers for switch cases ? You may want to add new #defines
> for NIP, MSR and CCR.

But none of those three are SPRs.  Please don't pollute that namespace.
Put such defines into some fadump header, instead?


Segher
Hari Bathini May 9, 2019, 4:42 a.m. UTC | #3
On 07/05/19 9:30 PM, Segher Boessenkool wrote:
> On Tue, May 07, 2019 at 07:43:56PM +0530, Mahesh J Salgaonkar wrote:
>> Can we use SPRN_* #defines which are already present in asm/reg.h instead of
>> hard coding numbers for switch cases ? You may want to add new #defines
>> for NIP, MSR and CCR.
> But none of those three are SPRs.  Please don't pollute that namespace.
> Put such defines into some fadump header, instead?

Sure, Segher.
I will stick with adding the defines in fadump header for the missing regs..
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 75471c2..91f2735 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -976,6 +976,29 @@  struct opal_sg_list {
  * Firmware-Assisted Dump (FADump)
  */
 
+/* FADump thread header for register entries */
+struct opal_fadump_thread_hdr {
+	__be32  pir;
+	/* 0x00 - 0x0F - The corresponding stop state of the core */
+	u8      core_state;
+	u8      reserved[3];
+
+	__be32	offset;	/* Offset to Register Entries array */
+	__be32	ecnt;	/* Number of entries */
+	__be32	esize;	/* Alloc size of each array entry in bytes */
+	__be32	eactsz;	/* Actual size of each array entry in bytes */
+} __packed;
+
+#define OPAL_REG_TYPE_GPR		0x01
+#define OPAL_REG_TYPE_SPR		0x02
+
+/* FADump register entry. */
+struct opal_fadump_reg_entry {
+	__be32		reg_type;
+	__be32		reg_num;
+	__be64		reg_val;
+};
+
 /* The maximum number of dump sections supported by OPAL */
 #define OPAL_FADUMP_NR_SECTIONS			64
 
diff --git a/arch/powerpc/kernel/fadump-common.h b/arch/powerpc/kernel/fadump-common.h
index ff764d4..8d47382 100644
--- a/arch/powerpc/kernel/fadump-common.h
+++ b/arch/powerpc/kernel/fadump-common.h
@@ -117,6 +117,9 @@  struct fadump_memory_range {
 
 /* Firmware-assisted dump configuration details. */
 struct fw_dump {
+	unsigned long	cpu_state_destination_addr;
+	unsigned long	cpu_state_data_version;
+	unsigned long	cpu_state_entry_size;
 	unsigned long	cpu_state_data_size;
 	unsigned long	hpte_region_size;
 	unsigned long	boot_memory_size;
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
index da8480d..853f663 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.c
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -94,6 +94,12 @@  static void update_fadump_config(struct fw_dump *fadump_conf,
 
 				last_end = base + size;
 				j++;
+			} else if (fdm->section[i].src_type ==
+				   OPAL_FADUMP_CPU_STATE_DATA) {
+				fadump_conf->cpu_state_destination_addr =
+					be64_to_cpu(fdm->section[i].dest_addr);
+				fadump_conf->cpu_state_data_size =
+					be64_to_cpu(fdm->section[i].dest_size);
 			}
 		}
 		fadump_conf->rmr_regions_cnt = j;
@@ -199,6 +205,75 @@  static int opal_invalidate_fadump(struct fw_dump *fadump_conf)
 	return 0;
 }
 
+static inline void fadump_set_regval_regnum(struct pt_regs *regs, u32 reg_type,
+					    u32 reg_num, u64 reg_val)
+{
+	if (reg_type == OPAL_REG_TYPE_GPR) {
+		if (reg_num < 32)
+			regs->gpr[reg_num] = reg_val;
+		return;
+	}
+
+	switch (reg_num) {
+	case 2000:
+		regs->nip = reg_val;
+		break;
+	case 2001:
+		regs->msr = reg_val;
+		break;
+	case 9:
+		regs->ctr = reg_val;
+		break;
+	case 8:
+		regs->link = reg_val;
+		break;
+	case 1:
+		regs->xer = reg_val;
+		break;
+	case 2002:
+		regs->ccr = reg_val;
+		break;
+	case 19:
+		regs->dar = reg_val;
+		break;
+	case 18:
+		regs->dsisr = reg_val;
+		break;
+	}
+}
+
+static inline void fadump_read_registers(char *bufp, unsigned int regs_cnt,
+					 unsigned int reg_entry_size,
+					 struct pt_regs *regs)
+{
+	int i;
+	struct opal_fadump_reg_entry *reg_entry;
+
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+		reg_entry = (struct opal_fadump_reg_entry *)bufp;
+		fadump_set_regval_regnum(regs,
+					 be32_to_cpu(reg_entry->reg_type),
+					 be32_to_cpu(reg_entry->reg_num),
+					 be64_to_cpu(reg_entry->reg_val));
+	}
+}
+
+static inline bool __init is_thread_core_inactive(u8 core_state)
+{
+	bool is_inactive = false;
+
+	/*
+	 * TODO: Check what each state value means and implement accordingly.
+	 *       Also, make sure F/W is updating this field appropriately.
+	 */
+	if (core_state == CORE_STATE_INACTIVE)
+		is_inactive = true;
+
+	return is_inactive;
+}
+
 /*
  * Read CPU state dump data and convert it into ELF notes.
  *
@@ -210,8 +285,32 @@  static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
 {
 	u32 num_cpus, *note_buf;
 	struct fadump_crash_info_header *fdh = NULL;
+	struct opal_fadump_thread_hdr *thdr;
+	unsigned long addr;
+	u32 thread_pir;
+	char *bufp;
+	struct pt_regs regs;
+	unsigned int size_of_each_thread;
+	unsigned int regs_offset, regs_cnt, reg_esize;
+	int i;
+
+	size_of_each_thread = fadump_conf->cpu_state_entry_size;
+	num_cpus = (fadump_conf->cpu_state_data_size / size_of_each_thread);
+
+	addr = fadump_conf->cpu_state_destination_addr;
+	bufp = __va(addr);
+
+	/*
+	 * Offset for register entries, entry size and registers count is
+	 * duplicated in every thread header in keeping with HDAT format.
+	 * Use these values from the first thread header.
+	 */
+	thdr = (struct opal_fadump_thread_hdr *)bufp;
+	regs_offset = (offsetof(struct opal_fadump_thread_hdr, offset) +
+		       be32_to_cpu(thdr->offset));
+	reg_esize = be32_to_cpu(thdr->esize);
+	regs_cnt  = be32_to_cpu(thdr->ecnt);
 
-	num_cpus = 1;
 	/* Allocate buffer to hold cpu crash notes. */
 	fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
 	fadump_conf->cpu_notes_buf_size =
@@ -230,10 +329,53 @@  static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
 	if (fadump_conf->fadumphdr_addr)
 		fdh = __va(fadump_conf->fadumphdr_addr);
 
-	if (fdh && (fdh->crashing_cpu != CPU_UNKNOWN)) {
-		note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
-		final_note(note_buf);
+	pr_debug("--------CPU State Data------------\n");
+	pr_debug("NumCpus     : %u\n", num_cpus);
+	pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+		 regs_offset, reg_esize, regs_cnt);
+
+	for (i = 0; i < num_cpus; i++, bufp += size_of_each_thread) {
+		thdr = (struct opal_fadump_thread_hdr *)bufp;
+
+		thread_pir = be32_to_cpu(thdr->pir);
+		pr_debug("%04d) PIR: 0x%x, core state: 0x%02x\n",
+			 (i + 1), thread_pir, thdr->core_state);
+
+		/*
+		 * Register state data of MAX cores is provided by firmware,
+		 * but some of this cores may not be active. So, while
+		 * processing register state data, check core state and
+		 * skip threads that belong to inactive cores.
+		 */
+		if (is_thread_core_inactive(thdr->core_state))
+			continue;
+
+		/*
+		 * If this is kernel initiated crash, crashing_cpu would be set
+		 * appropriately and register data of the crashing CPU saved by
+		 * crashing kernel. Add this saved register data of crashing CPU
+		 * to elf notes and populate the pt_regs for the remaining CPUs
+		 * from register state data provided by firmware.
+		 */
+		if (fdh && (fdh->crashing_cpu == thread_pir)) {
+			note_buf = fadump_regs_to_elf_notes(note_buf,
+							    &fdh->regs);
+			pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+				 fdh->crashing_cpu, fdh->regs.gpr[1],
+				 fdh->regs.nip);
+			continue;
+		}
+
+		fadump_read_registers((bufp + regs_offset), regs_cnt,
+				      reg_esize, &regs);
+
+		note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+		pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+			 thread_pir, regs.gpr[1], regs.nip);
+	}
+	final_note(note_buf);
 
+	if (fdh) {
 		pr_debug("Updating elfcore header (%llx) with cpu notes\n",
 			 fdh->elfcorehdr_addr);
 		fadump_update_elfcore_header(fadump_conf,
@@ -258,13 +400,6 @@  static int __init opal_process_fadump(struct fw_dump *fadump_conf)
 		return -EINVAL;
 	}
 
-	/*
-	 * TODO: To build cpu notes, find a way to map PIR to logical id.
-	 *       Also, we may need different method for pseries and powernv.
-	 *       The currently booted kernel could have a different PIR to
-	 *       logical id mapping. So, try saving info of previous kernel's
-	 *       paca to get the right PIR to logical id mapping.
-	 */
 	rc = fadump_build_cpu_notes(fadump_conf);
 	if (rc)
 		return rc;
@@ -319,6 +454,14 @@  static void opal_crash_fadump(struct fadump_crash_info_header *fdh,
 {
 	int rc;
 
+	/*
+	 * Unlike on pseries platform, logical CPU number is not provided
+	 * with architected register state data. So, store the crashing
+	 * CPU's PIR instead to plug the appropriate register data for
+	 * crashing CPU in the vmcore file.
+	 */
+	fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
+
 	rc = opal_cec_reboot2(OPAL_REBOOT_OS_ERROR, msg);
 	if (rc == OPAL_UNSUPPORTED) {
 		pr_emerg("Reboot type %d not supported.\n",
@@ -340,6 +483,7 @@  static struct fadump_ops opal_fadump_ops = {
 int __init opal_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
 {
 	unsigned long dn;
+	const __be32 *prop;
 
 	/*
 	 * Check if Firmware-Assisted Dump is supported. if yes, check
@@ -351,6 +495,17 @@  int __init opal_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
 		return 1;
 	}
 
+	prop = of_get_flat_dt_prop(dn, "cpu-data-version", NULL);
+	if (prop)
+		fadump_conf->cpu_state_data_version = of_read_number(prop, 1);
+
+	if (fadump_conf->cpu_state_data_version != CPU_STATE_DATA_VERSION) {
+		pr_err("CPU state data format version mismatch!\n");
+		pr_err("Kernel: %u, OPAL: %lu\n", CPU_STATE_DATA_VERSION,
+		       fadump_conf->cpu_state_data_version);
+		return 1;
+	}
+
 	/*
 	 * Firmware currently supports only 32-bit value for size,
 	 * align it to 1MB size.
@@ -365,6 +520,16 @@  int __init opal_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
 		pr_info("Firmware-assisted dump is active.\n");
 		fadump_conf->dump_active = 1;
 		update_fadump_config(fadump_conf, (void *)__pa(fdm_active));
+
+		/*
+		 * Doesn't need to populate these fields while registering dump
+		 * as destination address and size are provided by F/W.
+		 */
+		prop = of_get_flat_dt_prop(dn, "cpu-data-size", NULL);
+		if (prop) {
+			fadump_conf->cpu_state_entry_size =
+						of_read_number(prop, 1);
+		}
 	}
 
 	fadump_conf->ops		= &opal_fadump_ops;
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h
index a5eeb2c..80aee2d 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.h
+++ b/arch/powerpc/platforms/powernv/opal-fadump.h
@@ -13,6 +13,8 @@ 
 #ifndef __PPC64_OPAL_FA_DUMP_H__
 #define __PPC64_OPAL_FA_DUMP_H__
 
+#define CPU_STATE_DATA_VERSION		1
+
 #define OPAL_FADUMP_CPU_STATE_DATA	0x0000
 /* OPAL : 0x01 – 0x39 */
 #define OPAL_FADUMP_OPAL_REGION	0x0001
@@ -37,4 +39,6 @@  enum opal_fadump_section_types {
 #define OPAL_MAX_SECTIONS		(OPAL_SECTIONS + \
 					 MAX_REAL_MEM_REGIONS - 1)
 
+#define CORE_STATE_INACTIVE		(0x0F)
+
 #endif /* __PPC64_OPAL_FA_DUMP_H__ */