diff mbox series

[v3,11/15] powerpc/64s: machine check interrupt update NMI accounting

Message ID 20200407051636.648369-12-npiggin@gmail.com (mailing list archive)
State Superseded
Headers show
Series powerpc/64: machine check and system reset fixes | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (2c0ce4ff35994a7b12cc9879ced52c9e7c2e6667)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 2 warnings, 0 checks, 57 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Nicholas Piggin April 7, 2020, 5:16 a.m. UTC
machine_check_early is taken as an NMI, so nmi_enter is used there.
machine_check_exception is no longer taken as an NMI (it's invoked
via irq_work in the case a machine check hits in kernel mode), so
remove the nmi_enter from that case.

In NMI context, hash faults don't try to refill the hash table, which
can lead to crashes accessing non-pinned kernel pages. System reset
still has this potential problem.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/mce.c     |  7 +++++++
 arch/powerpc/kernel/process.c |  2 +-
 arch/powerpc/kernel/traps.c   | 13 +------------
 3 files changed, 9 insertions(+), 13 deletions(-)

Comments

Christophe Leroy April 7, 2020, 5:37 a.m. UTC | #1
Le 07/04/2020 à 07:16, Nicholas Piggin a écrit :
> machine_check_early is taken as an NMI, so nmi_enter is used there.
> machine_check_exception is no longer taken as an NMI (it's invoked
> via irq_work in the case a machine check hits in kernel mode), so
> remove the nmi_enter from that case.

Euh ... Is that also the case for PPC32 ?

AFAIK machine_check_exception() is called as an NMI on PPC32.

Christophe

> 
> In NMI context, hash faults don't try to refill the hash table, which
> can lead to crashes accessing non-pinned kernel pages. System reset
> still has this potential problem.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/kernel/mce.c     |  7 +++++++
>   arch/powerpc/kernel/process.c |  2 +-
>   arch/powerpc/kernel/traps.c   | 13 +------------
>   3 files changed, 9 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index 8077b5fb18a7..be7e3f92a7b5 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -574,6 +574,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
>   long machine_check_early(struct pt_regs *regs)
>   {
>   	long handled = 0;
> +	bool nested = in_nmi();
> +	if (!nested)
> +		nmi_enter();
>   
>   	hv_nmi_check_nonrecoverable(regs);
>   
> @@ -582,6 +585,10 @@ long machine_check_early(struct pt_regs *regs)
>   	 */
>   	if (ppc_md.machine_check_early)
>   		handled = ppc_md.machine_check_early(regs);
> +
> +	if (!nested)
> +		nmi_exit();
> +
>   	return handled;
>   }
>   
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 9c21288f8645..44410dd3029f 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -1421,7 +1421,7 @@ void show_regs(struct pt_regs * regs)
>   		pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
>   #endif
>   #ifdef CONFIG_PPC64
> -	pr_cont("IRQMASK: %lx ", regs->softe);
> +	pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
>   #endif
>   #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>   	if (MSR_TM_ACTIVE(regs->msr))
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 3fca22276bb1..9f221772eb73 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -823,9 +823,6 @@ int machine_check_generic(struct pt_regs *regs)
>   void machine_check_exception(struct pt_regs *regs)
>   {
>   	int recover = 0;
> -	bool nested = in_nmi();
> -	if (!nested)
> -		nmi_enter();
>   
>   	__this_cpu_inc(irq_stat.mce_exceptions);
>   
> @@ -851,20 +848,12 @@ void machine_check_exception(struct pt_regs *regs)
>   	if (check_io_access(regs))
>   		goto bail;
>   
> -	if (!nested)
> -		nmi_exit();
> -
>   	die("Machine check", regs, SIGBUS);
>   
> +bail:
>   	/* Must die if the interrupt is not recoverable */
>   	if (!(regs->msr & MSR_RI))
>   		nmi_panic(regs, "Unrecoverable Machine check");
> -
> -	return;
> -
> -bail:
> -	if (!nested)
> -		nmi_exit();
>   }
>   
>   void SMIException(struct pt_regs *regs)
>
kernel test robot April 12, 2020, 8:46 a.m. UTC | #2
Hi Nicholas,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20200412]
[cannot apply to tip/perf/core v5.6]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64-machine-check-and-system-reset-fixes/20200407-134803
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-a001-20200412 (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:15,
                    from include/asm-generic/bug.h:19,
                    from arch/powerpc/include/asm/bug.h:109,
                    from include/linux/bug.h:5,
                    from arch/powerpc/include/asm/mmu.h:130,
                    from arch/powerpc/include/asm/paca.h:18,
                    from arch/powerpc/include/asm/current.h:13,
                    from include/linux/sched.h:12,
                    from arch/powerpc/kernel/process.c:14:
   arch/powerpc/kernel/process.c: In function 'show_regs':
>> arch/powerpc/kernel/process.c:1424:74: error: 'struct paca_struct' has no member named 'in_nmi'
    1424 |  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
         |                                                                          ^~
   include/linux/printk.h:317:26: note: in definition of macro 'pr_cont'
     317 |  printk(KERN_CONT fmt, ##__VA_ARGS__)
         |                          ^~~~~~~~~~~
>> arch/powerpc/kernel/process.c:1424:99: error: 'struct paca_struct' has no member named 'in_mce'
    1424 |  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
         |                                                                                                   ^~
   include/linux/printk.h:317:26: note: in definition of macro 'pr_cont'
     317 |  printk(KERN_CONT fmt, ##__VA_ARGS__)
         |                          ^~~~~~~~~~~

vim +1424 arch/powerpc/kernel/process.c

  1400	
  1401	void show_regs(struct pt_regs * regs)
  1402	{
  1403		int i, trap;
  1404	
  1405		show_regs_print_info(KERN_DEFAULT);
  1406	
  1407		printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
  1408		       regs->nip, regs->link, regs->ctr);
  1409		printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
  1410		       regs, regs->trap, print_tainted(), init_utsname()->release);
  1411		printk("MSR:  "REG" ", regs->msr);
  1412		print_msr_bits(regs->msr);
  1413		pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
  1414		trap = TRAP(regs);
  1415		if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
  1416			pr_cont("CFAR: "REG" ", regs->orig_gpr3);
  1417		if (trap == 0x200 || trap == 0x300 || trap == 0x600)
  1418	#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
  1419			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
  1420	#else
  1421			pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
  1422	#endif
  1423	#ifdef CONFIG_PPC64
> 1424		pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
  1425	#endif
  1426	#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  1427		if (MSR_TM_ACTIVE(regs->msr))
  1428			pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
  1429	#endif
  1430	
  1431		for (i = 0;  i < 32;  i++) {
  1432			if ((i % REGS_PER_LINE) == 0)
  1433				pr_cont("\nGPR%02d: ", i);
  1434			pr_cont(REG " ", regs->gpr[i]);
  1435			if (i == LAST_VOLATILE && !FULL_REGS(regs))
  1436				break;
  1437		}
  1438		pr_cont("\n");
  1439	#ifdef CONFIG_KALLSYMS
  1440		/*
  1441		 * Lookup NIP late so we have the best change of getting the
  1442		 * above info out without failing
  1443		 */
  1444		printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
  1445		printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
  1446	#endif
  1447		show_stack(current, (unsigned long *) regs->gpr[1]);
  1448		if (!user_mode(regs))
  1449			show_instructions(regs);
  1450	}
  1451	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Nicholas Piggin May 6, 2020, 3:50 a.m. UTC | #3
Excerpts from Christophe Leroy's message of April 7, 2020 3:37 pm:
> 
> 
> Le 07/04/2020 à 07:16, Nicholas Piggin a écrit :
>> machine_check_early is taken as an NMI, so nmi_enter is used there.
>> machine_check_exception is no longer taken as an NMI (it's invoked
>> via irq_work in the case a machine check hits in kernel mode), so
>> remove the nmi_enter from that case.
> 
> Euh ... Is that also the case for PPC32 ?
> 
> AFAIK machine_check_exception() is called as an NMI on PPC32.

Sorry I missed your comment.  You're right, I'll make this change
depend on 64S. Thanks for reviewing them.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 8077b5fb18a7..be7e3f92a7b5 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -574,6 +574,9 @@  EXPORT_SYMBOL_GPL(machine_check_print_event_info);
 long machine_check_early(struct pt_regs *regs)
 {
 	long handled = 0;
+	bool nested = in_nmi();
+	if (!nested)
+		nmi_enter();
 
 	hv_nmi_check_nonrecoverable(regs);
 
@@ -582,6 +585,10 @@  long machine_check_early(struct pt_regs *regs)
 	 */
 	if (ppc_md.machine_check_early)
 		handled = ppc_md.machine_check_early(regs);
+
+	if (!nested)
+		nmi_exit();
+
 	return handled;
 }
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9c21288f8645..44410dd3029f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1421,7 +1421,7 @@  void show_regs(struct pt_regs * regs)
 		pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 #endif
 #ifdef CONFIG_PPC64
-	pr_cont("IRQMASK: %lx ", regs->softe);
+	pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 #endif
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	if (MSR_TM_ACTIVE(regs->msr))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3fca22276bb1..9f221772eb73 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -823,9 +823,6 @@  int machine_check_generic(struct pt_regs *regs)
 void machine_check_exception(struct pt_regs *regs)
 {
 	int recover = 0;
-	bool nested = in_nmi();
-	if (!nested)
-		nmi_enter();
 
 	__this_cpu_inc(irq_stat.mce_exceptions);
 
@@ -851,20 +848,12 @@  void machine_check_exception(struct pt_regs *regs)
 	if (check_io_access(regs))
 		goto bail;
 
-	if (!nested)
-		nmi_exit();
-
 	die("Machine check", regs, SIGBUS);
 
+bail:
 	/* Must die if the interrupt is not recoverable */
 	if (!(regs->msr & MSR_RI))
 		nmi_panic(regs, "Unrecoverable Machine check");
-
-	return;
-
-bail:
-	if (!nested)
-		nmi_exit();
 }
 
 void SMIException(struct pt_regs *regs)