Message ID | 153051042206.30541.2156877677180900261.stgit@jupiter.in.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | powerpc/pseries: Machien check handler improvements. | expand |
On Mon, 02 Jul 2018 11:17:06 +0530 Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote: > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > On pseries, as of today system crashes if we get a machine check > exceptions due to SLB errors. These are soft errors and can be fixed by > flushing the SLBs so the kernel can continue to function instead of > system crash. We do this in real mode before turning on MMU. Otherwise > we would run into nested machine checks. This patch now fetches the > rtas error log in real mode and flushes the SLBs on SLB errors. > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 > arch/powerpc/include/asm/machdep.h | 1 > arch/powerpc/kernel/exceptions-64s.S | 42 +++++++++++++++++++++ > arch/powerpc/kernel/mce.c | 16 +++++++- > arch/powerpc/mm/slb.c | 6 +++ > arch/powerpc/platforms/powernv/opal.c | 1 > arch/powerpc/platforms/pseries/pseries.h | 1 > arch/powerpc/platforms/pseries/ras.c | 51 +++++++++++++++++++++++++ > arch/powerpc/platforms/pseries/setup.c | 1 > 9 files changed, 116 insertions(+), 4 deletions(-) > > +TRAMP_REAL_BEGIN(machine_check_pSeries_early) > +BEGIN_FTR_SECTION > + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) > + mr r10,r1 /* Save r1 */ > + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */ > + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ > + mfspr r11,SPRN_SRR0 /* Save SRR0 */ > + mfspr r12,SPRN_SRR1 /* Save SRR1 */ > + EXCEPTION_PROLOG_COMMON_1() > + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) > + EXCEPTION_PROLOG_COMMON_3(0x200) > + addi r3,r1,STACK_FRAME_OVERHEAD > + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */ Is there any reason you can't use the existing machine_check_powernv_early code to do all this? > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index efdd16a79075..221271c96a57 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs) > { > long handled = 0; > > - __this_cpu_inc(irq_stat.mce_exceptions); > + /* > + * For pSeries we count mce when we go into virtual mode machine > + * check handler. Hence skip it. Also, We can't access per cpu > + * variables in real mode for LPAR. > + */ > + if (early_cpu_has_feature(CPU_FTR_HVMODE)) > + __this_cpu_inc(irq_stat.mce_exceptions); > > - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) > + /* > + * See if platform is capable of handling machine check. > + * Otherwise fallthrough and allow CPU to handle this machine check. > + */ > + if (ppc_md.machine_check_early) > + handled = ppc_md.machine_check_early(regs); > + else if (cur_cpu_spec && cur_cpu_spec->machine_check_early) > handled = cur_cpu_spec->machine_check_early(regs); Would be good to add a powernv ppc_md handler which does the cur_cpu_spec->machine_check_early() call now that other platforms are calling this code. Because those aren't valid as a fallback call, but specific to powernv. > diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c > index 48fbb41af5d1..ed548d40a9e1 100644 > --- a/arch/powerpc/platforms/powernv/opal.c > +++ b/arch/powerpc/platforms/powernv/opal.c > @@ -417,7 +417,6 @@ static int opal_recover_mce(struct pt_regs *regs, > > if (!(regs->msr & MSR_RI)) { > /* If MSR_RI isn't set, we cannot recover */ > - pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); What's the reason for this change? > recovered = 0; > } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { > /* Platform corrected itself */ > diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h > index 60db2ee511fb..3611db5dd583 100644 > --- a/arch/powerpc/platforms/pseries/pseries.h > +++ b/arch/powerpc/platforms/pseries/pseries.h > @@ -24,6 +24,7 @@ struct pt_regs; > > extern int pSeries_system_reset_exception(struct pt_regs *regs); > extern int pSeries_machine_check_exception(struct pt_regs *regs); > +extern int pSeries_machine_check_realmode(struct pt_regs *regs); > > #ifdef CONFIG_SMP > extern void smp_init_pseries(void); > diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c > index 851ce326874a..9aa7885e0148 100644 > --- a/arch/powerpc/platforms/pseries/ras.c > +++ b/arch/powerpc/platforms/pseries/ras.c > @@ -427,6 +427,35 @@ int pSeries_system_reset_exception(struct pt_regs *regs) > return 0; /* need to perform reset */ > } > > +static int mce_handle_error(struct rtas_error_log *errp) > +{ > + struct pseries_errorlog *pseries_log; > + struct pseries_mc_errorlog *mce_log; > + int disposition = rtas_error_disposition(errp); > + uint8_t error_type; > + > + if (!rtas_error_extended(errp)) > + goto out; > + > + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); > + if (pseries_log == NULL) > + goto out; > + > + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; > + error_type = rtas_mc_error_type(mce_log); > + > + if ((disposition == RTAS_DISP_NOT_RECOVERED) && > + (error_type == PSERIES_MC_ERROR_TYPE_SLB)) { > + /* Store the old slb content someplace. */ > + slb_flush_and_rebolt_realmode(); > + disposition = RTAS_DISP_FULLY_RECOVERED; > + rtas_set_disposition_recovered(errp); > + } > + > +out: > + return disposition; > +} > + > /* > * Process MCE rtas errlog event. > */ > @@ -503,11 +532,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs) > struct rtas_error_log *errp; > > if (fwnmi_active) { > - errp = fwnmi_get_errinfo(regs); > fwnmi_release_errinfo(); Should the fwnmi_release_errinfo be done in the realmode path as well now, or is there some reason to leave it here? > + errp = fwnmi_get_errlog(); > if (errp && recover_mce(regs, errp)) > return 1; > } > > return 0; > } > + > +int pSeries_machine_check_realmode(struct pt_regs *regs) > +{ > + struct rtas_error_log *errp; > + int disposition; > + > + if (fwnmi_active) { > + errp = fwnmi_get_errinfo(regs); > + /* > + * Call to fwnmi_release_errinfo() in real mode causes kernel > + * to panic. Hence we will call it as soon as we go into > + * virtual mode. > + */ > + disposition = mce_handle_error(errp); > + if (disposition == RTAS_DISP_FULLY_RECOVERED) > + return 1; > + } > + > + return 0; > +} > diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c > index 60a067a6e743..249b02bc5c41 100644 > --- a/arch/powerpc/platforms/pseries/setup.c > +++ b/arch/powerpc/platforms/pseries/setup.c > @@ -999,6 +999,7 @@ define_machine(pseries) { > .calibrate_decr = generic_calibrate_decr, > .progress = rtas_progress, > .system_reset_exception = pSeries_system_reset_exception, > + .machine_check_early = pSeries_machine_check_realmode, > .machine_check_exception = pSeries_machine_check_exception, > #ifdef CONFIG_KEXEC_CORE > .machine_kexec = pSeries_machine_kexec, >
On 07/03/2018 03:38 AM, Nicholas Piggin wrote: > On Mon, 02 Jul 2018 11:17:06 +0530 > Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote: > >> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> >> >> On pseries, as of today system crashes if we get a machine check >> exceptions due to SLB errors. These are soft errors and can be fixed by >> flushing the SLBs so the kernel can continue to function instead of >> system crash. We do this in real mode before turning on MMU. Otherwise >> we would run into nested machine checks. This patch now fetches the >> rtas error log in real mode and flushes the SLBs on SLB errors. >> >> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> >> --- >> arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 >> arch/powerpc/include/asm/machdep.h | 1 >> arch/powerpc/kernel/exceptions-64s.S | 42 +++++++++++++++++++++ >> arch/powerpc/kernel/mce.c | 16 +++++++- >> arch/powerpc/mm/slb.c | 6 +++ >> arch/powerpc/platforms/powernv/opal.c | 1 >> arch/powerpc/platforms/pseries/pseries.h | 1 >> arch/powerpc/platforms/pseries/ras.c | 51 +++++++++++++++++++++++++ >> arch/powerpc/platforms/pseries/setup.c | 1 >> 9 files changed, 116 insertions(+), 4 deletions(-) >> > > >> +TRAMP_REAL_BEGIN(machine_check_pSeries_early) >> +BEGIN_FTR_SECTION >> + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) >> + mr r10,r1 /* Save r1 */ >> + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */ >> + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ >> + mfspr r11,SPRN_SRR0 /* Save SRR0 */ >> + mfspr r12,SPRN_SRR1 /* Save SRR1 */ >> + EXCEPTION_PROLOG_COMMON_1() >> + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) >> + EXCEPTION_PROLOG_COMMON_3(0x200) >> + addi r3,r1,STACK_FRAME_OVERHEAD >> + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */ > > Is there any reason you can't use the existing > machine_check_powernv_early code to do all this? I did think about that :-). But the machine_check_powernv_early code does bit of extra stuff which isn't required in pseries like touching ME bit in MSR and lots of checks that are done in machine_check_handle_early() before going to virtual mode. But on second look I see that we can bypass all that with HVMODE FTR section. Will rename machine_check_powernv_early to machine_check_common_early and reuse it. > >> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c >> index efdd16a79075..221271c96a57 100644 >> --- a/arch/powerpc/kernel/mce.c >> +++ b/arch/powerpc/kernel/mce.c >> @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs) >> { >> long handled = 0; >> >> - __this_cpu_inc(irq_stat.mce_exceptions); >> + /* >> + * For pSeries we count mce when we go into virtual mode machine >> + * check handler. Hence skip it. Also, We can't access per cpu >> + * variables in real mode for LPAR. >> + */ >> + if (early_cpu_has_feature(CPU_FTR_HVMODE)) >> + __this_cpu_inc(irq_stat.mce_exceptions); >> >> - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) >> + /* >> + * See if platform is capable of handling machine check. >> + * Otherwise fallthrough and allow CPU to handle this machine check. >> + */ >> + if (ppc_md.machine_check_early) >> + handled = ppc_md.machine_check_early(regs); >> + else if (cur_cpu_spec && cur_cpu_spec->machine_check_early) >> handled = cur_cpu_spec->machine_check_early(regs); > > Would be good to add a powernv ppc_md handler which does the > cur_cpu_spec->machine_check_early() call now that other platforms are > calling this code. Because those aren't valid as a fallback call, but > specific to powernv. > >> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c >> index 48fbb41af5d1..ed548d40a9e1 100644 >> --- a/arch/powerpc/platforms/powernv/opal.c >> +++ b/arch/powerpc/platforms/powernv/opal.c >> @@ -417,7 +417,6 @@ static int opal_recover_mce(struct pt_regs *regs, >> >> if (!(regs->msr & MSR_RI)) { >> /* If MSR_RI isn't set, we cannot recover */ >> - pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); > > What's the reason for this change? Err... This is by mistake.. My bad. Thanks for catching this. Will remove this hunk in next revision. We need a similar print for pSeries in ras.c. > >> recovered = 0; >> } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { >> /* Platform corrected itself */ >> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h >> index 60db2ee511fb..3611db5dd583 100644 >> --- a/arch/powerpc/platforms/pseries/pseries.h >> +++ b/arch/powerpc/platforms/pseries/pseries.h >> @@ -24,6 +24,7 @@ struct pt_regs; >> >> extern int pSeries_system_reset_exception(struct pt_regs *regs); >> extern int pSeries_machine_check_exception(struct pt_regs *regs); >> +extern int pSeries_machine_check_realmode(struct pt_regs *regs); >> >> #ifdef CONFIG_SMP >> extern void smp_init_pseries(void); >> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c >> index 851ce326874a..9aa7885e0148 100644 >> --- a/arch/powerpc/platforms/pseries/ras.c >> +++ b/arch/powerpc/platforms/pseries/ras.c >> @@ -427,6 +427,35 @@ int pSeries_system_reset_exception(struct pt_regs *regs) >> return 0; /* need to perform reset */ >> } >> >> +static int mce_handle_error(struct rtas_error_log *errp) >> +{ >> + struct pseries_errorlog *pseries_log; >> + struct pseries_mc_errorlog *mce_log; >> + int disposition = rtas_error_disposition(errp); >> + uint8_t error_type; >> + >> + if (!rtas_error_extended(errp)) >> + goto out; >> + >> + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); >> + if (pseries_log == NULL) >> + goto out; >> + >> + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; >> + error_type = rtas_mc_error_type(mce_log); >> + >> + if ((disposition == RTAS_DISP_NOT_RECOVERED) && >> + (error_type == PSERIES_MC_ERROR_TYPE_SLB)) { >> + /* Store the old slb content someplace. */ >> + slb_flush_and_rebolt_realmode(); >> + disposition = RTAS_DISP_FULLY_RECOVERED; >> + rtas_set_disposition_recovered(errp); >> + } >> + >> +out: >> + return disposition; >> +} >> + >> /* >> * Process MCE rtas errlog event. >> */ >> @@ -503,11 +532,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs) >> struct rtas_error_log *errp; >> >> if (fwnmi_active) { >> - errp = fwnmi_get_errinfo(regs); >> fwnmi_release_errinfo(); > > Should the fwnmi_release_errinfo be done in the realmode path as well > now, or is there some reason to leave it here? In real mode calling fwnmi_release_errinfo() causes kernel panic. Couldn't debug further to find out why. So decided to keep it in virtual mode. I have mentioned that in comment below in pSeries_machine_check_realmode(). > >> + errp = fwnmi_get_errlog(); >> if (errp && recover_mce(regs, errp)) >> return 1; >> } >> >> return 0; >> } >> + >> +int pSeries_machine_check_realmode(struct pt_regs *regs) >> +{ >> + struct rtas_error_log *errp; >> + int disposition; >> + >> + if (fwnmi_active) { >> + errp = fwnmi_get_errinfo(regs); >> + /* >> + * Call to fwnmi_release_errinfo() in real mode causes kernel >> + * to panic. Hence we will call it as soon as we go into >> + * virtual mode. >> + */ >> + disposition = mce_handle_error(errp); >> + if (disposition == RTAS_DISP_FULLY_RECOVERED) >> + return 1; >> + } >> + >> + return 0; >> +} >> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c >> index 60a067a6e743..249b02bc5c41 100644 >> --- a/arch/powerpc/platforms/pseries/setup.c >> +++ b/arch/powerpc/platforms/pseries/setup.c >> @@ -999,6 +999,7 @@ define_machine(pseries) { >> .calibrate_decr = generic_calibrate_decr, >> .progress = rtas_progress, >> .system_reset_exception = pSeries_system_reset_exception, >> + .machine_check_early = pSeries_machine_check_realmode, >> .machine_check_exception = pSeries_machine_check_exception, >> #ifdef CONFIG_KEXEC_CORE >> .machine_kexec = pSeries_machine_kexec, >> > Thanks for your review.
On Tue, 3 Jul 2018 08:08:14 +1000 Nicholas Piggin <npiggin@gmail.com> wrote: > On Mon, 02 Jul 2018 11:17:06 +0530 > Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote: > > > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > > > On pseries, as of today system crashes if we get a machine check > > exceptions due to SLB errors. These are soft errors and can be > > fixed by flushing the SLBs so the kernel can continue to function > > instead of system crash. We do this in real mode before turning on > > MMU. Otherwise we would run into nested machine checks. This patch > > now fetches the rtas error log in real mode and flushes the SLBs on > > SLB errors. > > > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > --- > > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 > > arch/powerpc/include/asm/machdep.h | 1 > > arch/powerpc/kernel/exceptions-64s.S | 42 > > +++++++++++++++++++++ arch/powerpc/kernel/mce.c > > | 16 +++++++- arch/powerpc/mm/slb.c | > > 6 +++ arch/powerpc/platforms/powernv/opal.c | 1 > > arch/powerpc/platforms/pseries/pseries.h | 1 > > arch/powerpc/platforms/pseries/ras.c | 51 > > +++++++++++++++++++++++++ > > arch/powerpc/platforms/pseries/setup.c | 1 9 files > > changed, 116 insertions(+), 4 deletions(-) > > > > +TRAMP_REAL_BEGIN(machine_check_pSeries_early) > > +BEGIN_FTR_SECTION > > + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) > > + mr r10,r1 /* Save r1 */ > > + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency > > stack */ > > + subi r1,r1,INT_FRAME_SIZE /* alloc stack > > frame */ > > + mfspr r11,SPRN_SRR0 /* Save SRR0 */ > > + mfspr r12,SPRN_SRR1 /* Save SRR1 */ > > + EXCEPTION_PROLOG_COMMON_1() > > + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) > > + EXCEPTION_PROLOG_COMMON_3(0x200) > > + addi r3,r1,STACK_FRAME_OVERHEAD > > + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call > > ABI */ > > Is there any reason you can't use the existing > machine_check_powernv_early code to do all this? Code sharing is nice but if we envision this going to stable kernels butchering the existing handler is going to be a nightmare. The code is quite a bit different between kernel versions. This code as is requires the bit that introduces EXCEPTION_PROLOG_COMMON_1 and then should work on Linux 3.14+ Thanks Michal
Michal Suchánek <msuchanek@suse.de> writes: > On Tue, 3 Jul 2018 08:08:14 +1000 > Nicholas Piggin <npiggin@gmail.com> wrote: >> On Mon, 02 Jul 2018 11:17:06 +0530 >> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote: >> > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> >> > >> > On pseries, as of today system crashes if we get a machine check >> > exceptions due to SLB errors. These are soft errors and can be >> > fixed by flushing the SLBs so the kernel can continue to function >> > instead of system crash. We do this in real mode before turning on >> > MMU. Otherwise we would run into nested machine checks. This patch >> > now fetches the rtas error log in real mode and flushes the SLBs on >> > SLB errors. >> > >> > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> >> > --- >> > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 >> > arch/powerpc/include/asm/machdep.h | 1 >> > arch/powerpc/kernel/exceptions-64s.S | 42 >> > +++++++++++++++++++++ arch/powerpc/kernel/mce.c >> > | 16 +++++++- arch/powerpc/mm/slb.c | >> > 6 +++ arch/powerpc/platforms/powernv/opal.c | 1 >> > arch/powerpc/platforms/pseries/pseries.h | 1 >> > arch/powerpc/platforms/pseries/ras.c | 51 >> > +++++++++++++++++++++++++ >> > arch/powerpc/platforms/pseries/setup.c | 1 9 files >> > changed, 116 insertions(+), 4 deletions(-) >> >> >> > +TRAMP_REAL_BEGIN(machine_check_pSeries_early) >> > +BEGIN_FTR_SECTION >> > + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) >> > + mr r10,r1 /* Save r1 */ >> > + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency >> > stack */ >> > + subi r1,r1,INT_FRAME_SIZE /* alloc stack >> > frame */ >> > + mfspr r11,SPRN_SRR0 /* Save SRR0 */ >> > + mfspr r12,SPRN_SRR1 /* Save SRR1 */ >> > + EXCEPTION_PROLOG_COMMON_1() >> > + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) >> > + EXCEPTION_PROLOG_COMMON_3(0x200) >> > + addi r3,r1,STACK_FRAME_OVERHEAD >> > + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call >> > ABI */ >> >> Is there any reason you can't use the existing >> machine_check_powernv_early code to do all this? > > Code sharing is nice but if we envision this going to stable kernels > butchering the existing handler is going to be a nightmare. The code is > quite a bit different between kernel versions. I'm not sure if we'll send it to stable kernels. But we obviously will back port it to some distros :) So if sharing the code is a significant impediment to that, then I'm happy if we don't share code initially. That could be done as a follow-up to this series. cheers
On Tue, 3 Jul 2018 08:08:14 +1000 "Nicholas Piggin" <npiggin@gmail.com> wrote: > On Mon, 02 Jul 2018 11:17:06 +0530 > Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote: > > > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > > > On pseries, as of today system crashes if we get a machine check > > exceptions due to SLB errors. These are soft errors and can be > > fixed by flushing the SLBs so the kernel can continue to function > > instead of system crash. We do this in real mode before turning on > > MMU. Otherwise we would run into nested machine checks. This patch > > now fetches the rtas error log in real mode and flushes the SLBs on > > SLB errors. > > > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > --- > > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 > > arch/powerpc/include/asm/machdep.h | 1 > > arch/powerpc/kernel/exceptions-64s.S | 42 > > +++++++++++++++++++++ arch/powerpc/kernel/mce.c > > | 16 +++++++- arch/powerpc/mm/slb.c | > > 6 +++ arch/powerpc/platforms/powernv/opal.c | 1 > > arch/powerpc/platforms/pseries/pseries.h | 1 > > arch/powerpc/platforms/pseries/ras.c | 51 > > +++++++++++++++++++++++++ > > arch/powerpc/platforms/pseries/setup.c | 1 9 files > > changed, 116 insertions(+), 4 deletions(-) > > > > +TRAMP_REAL_BEGIN(machine_check_pSeries_early) > > +BEGIN_FTR_SECTION > > + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) > > + mr r10,r1 /* Save r1 */ > > + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency > > stack */ > > + subi r1,r1,INT_FRAME_SIZE /* alloc stack > > frame */ > > + mfspr r11,SPRN_SRR0 /* Save SRR0 */ > > + mfspr r12,SPRN_SRR1 /* Save SRR1 */ > > + EXCEPTION_PROLOG_COMMON_1() > > + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) > > + EXCEPTION_PROLOG_COMMON_3(0x200) > > + addi r3,r1,STACK_FRAME_OVERHEAD > > + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call > > ABI */ > > Is there any reason you can't use the existing > machine_check_powernv_early code to do all this? > > > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > > index efdd16a79075..221271c96a57 100644 > > --- a/arch/powerpc/kernel/mce.c > > +++ b/arch/powerpc/kernel/mce.c > > @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs) > > { > > long handled = 0; > > > > - __this_cpu_inc(irq_stat.mce_exceptions); > > + /* > > + * For pSeries we count mce when we go into virtual mode > > machine > > + * check handler. Hence skip it. Also, We can't access per > > cpu > > + * variables in real mode for LPAR. > > + */ > > + if (early_cpu_has_feature(CPU_FTR_HVMODE)) > > + __this_cpu_inc(irq_stat.mce_exceptions); > > > > - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) > > + /* > > + * See if platform is capable of handling machine check. > > + * Otherwise fallthrough and allow CPU to handle this > > machine check. > > + */ > > + if (ppc_md.machine_check_early) > > + handled = ppc_md.machine_check_early(regs); > > + else if (cur_cpu_spec && cur_cpu_spec->machine_check_early) > > handled = > > cur_cpu_spec->machine_check_early(regs); > > Would be good to add a powernv ppc_md handler which does the > cur_cpu_spec->machine_check_early() call now that other platforms are > calling this code. Because those aren't valid as a fallback call, but > specific to powernv. > Something like this (untested)? Subject: [PATCH] powerpc/powernv: define platform MCE handler. --- arch/powerpc/kernel/mce.c | 3 --- arch/powerpc/platforms/powernv/setup.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 221271c96a57..ae17d8aa60c4 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -498,12 +498,9 @@ long machine_check_early(struct pt_regs *regs) /* * See if platform is capable of handling machine check. - * Otherwise fallthrough and allow CPU to handle this machine check. */ if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); - else if (cur_cpu_spec && cur_cpu_spec->machine_check_early) - handled = cur_cpu_spec->machine_check_early(regs); return handled; } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index f96df0a25d05..b74c93bc2e55 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -431,6 +431,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu) return ret_freq; } +static long pnv_machine_check_early(struct pt_regs *regs) +{ + long handled = 0; + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + + return handled; +} + define_machine(powernv) { .name = "PowerNV", .probe = pnv_probe, @@ -442,6 +452,7 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = NULL, .calibrate_decr = generic_calibrate_decr, + .machine_check_early = pnv_machine_check_early, #ifdef CONFIG_KEXEC_CORE .kexec_cpu_down = pnv_kexec_cpu_down, #endif
Michal Suchánek <msuchanek@suse.de> writes: > On Tue, 3 Jul 2018 08:08:14 +1000 > "Nicholas Piggin" <npiggin@gmail.com> wrote: >> On Mon, 02 Jul 2018 11:17:06 +0530 >> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote: >> > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> >> > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c >> > index efdd16a79075..221271c96a57 100644 >> > --- a/arch/powerpc/kernel/mce.c >> > +++ b/arch/powerpc/kernel/mce.c >> > @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs) >> > { >> > long handled = 0; >> > >> > - __this_cpu_inc(irq_stat.mce_exceptions); >> > + /* >> > + * For pSeries we count mce when we go into virtual mode >> > machine >> > + * check handler. Hence skip it. Also, We can't access per >> > cpu >> > + * variables in real mode for LPAR. >> > + */ >> > + if (early_cpu_has_feature(CPU_FTR_HVMODE)) >> > + __this_cpu_inc(irq_stat.mce_exceptions); >> > >> > - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) >> > + /* >> > + * See if platform is capable of handling machine check. >> > + * Otherwise fallthrough and allow CPU to handle this >> > machine check. >> > + */ >> > + if (ppc_md.machine_check_early) >> > + handled = ppc_md.machine_check_early(regs); >> > + else if (cur_cpu_spec && cur_cpu_spec->machine_check_early) >> > handled = >> > cur_cpu_spec->machine_check_early(regs); >> >> Would be good to add a powernv ppc_md handler which does the >> cur_cpu_spec->machine_check_early() call now that other platforms are >> calling this code. Because those aren't valid as a fallback call, but >> specific to powernv. >> > > Something like this (untested)? > > Subject: [PATCH] powerpc/powernv: define platform MCE handler. LGTM. cheers
On Thu, 12 Jul 2018 15:41:13 +0200 Michal Suchánek <msuchanek@suse.de> wrote: > On Tue, 3 Jul 2018 08:08:14 +1000 > "Nicholas Piggin" <npiggin@gmail.com> wrote: > > > On Mon, 02 Jul 2018 11:17:06 +0530 > > Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote: > > > > > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > > > > > On pseries, as of today system crashes if we get a machine check > > > exceptions due to SLB errors. These are soft errors and can be > > > fixed by flushing the SLBs so the kernel can continue to function > > > instead of system crash. We do this in real mode before turning on > > > MMU. Otherwise we would run into nested machine checks. This patch > > > now fetches the rtas error log in real mode and flushes the SLBs on > > > SLB errors. > > > > > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > > --- > > > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 > > > arch/powerpc/include/asm/machdep.h | 1 > > > arch/powerpc/kernel/exceptions-64s.S | 42 > > > +++++++++++++++++++++ arch/powerpc/kernel/mce.c > > > | 16 +++++++- arch/powerpc/mm/slb.c | > > > 6 +++ arch/powerpc/platforms/powernv/opal.c | 1 > > > arch/powerpc/platforms/pseries/pseries.h | 1 > > > arch/powerpc/platforms/pseries/ras.c | 51 > > > +++++++++++++++++++++++++ > > > arch/powerpc/platforms/pseries/setup.c | 1 9 files > > > changed, 116 insertions(+), 4 deletions(-) > > > > > > > +TRAMP_REAL_BEGIN(machine_check_pSeries_early) > > > +BEGIN_FTR_SECTION > > > + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) > > > + mr r10,r1 /* Save r1 */ > > > + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency > > > stack */ > > > + subi r1,r1,INT_FRAME_SIZE /* alloc stack > > > frame */ > > > + mfspr r11,SPRN_SRR0 /* Save SRR0 */ > > > + mfspr r12,SPRN_SRR1 /* Save SRR1 */ > > > + EXCEPTION_PROLOG_COMMON_1() > > > + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) > > > + EXCEPTION_PROLOG_COMMON_3(0x200) > > > + addi r3,r1,STACK_FRAME_OVERHEAD > > > + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call > > > ABI */ > > > > Is there any reason you can't use the existing > > machine_check_powernv_early code to do all this? > > > > > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > > > index efdd16a79075..221271c96a57 100644 > > > --- a/arch/powerpc/kernel/mce.c > > > +++ b/arch/powerpc/kernel/mce.c > > > @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs) > > > { > > > long handled = 0; > > > > > > - __this_cpu_inc(irq_stat.mce_exceptions); > > > + /* > > > + * For pSeries we count mce when we go into virtual mode > > > machine > > > + * check handler. Hence skip it. Also, We can't access per > > > cpu > > > + * variables in real mode for LPAR. > > > + */ > > > + if (early_cpu_has_feature(CPU_FTR_HVMODE)) > > > + __this_cpu_inc(irq_stat.mce_exceptions); > > > > > > - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) > > > + /* > > > + * See if platform is capable of handling machine check. > > > + * Otherwise fallthrough and allow CPU to handle this > > > machine check. > > > + */ > > > + if (ppc_md.machine_check_early) > > > + handled = ppc_md.machine_check_early(regs); > > > + else if (cur_cpu_spec && cur_cpu_spec->machine_check_early) > > > handled = > > > cur_cpu_spec->machine_check_early(regs); > > > > Would be good to add a powernv ppc_md handler which does the > > cur_cpu_spec->machine_check_early() call now that other platforms are > > calling this code. Because those aren't valid as a fallback call, but > > specific to powernv. > > > > Something like this (untested)? Sorry, some emails fell through the cracks. Yes exactly like this would be good. If you can add a quick changelog and SOB, and Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Thanks, Nick > > Subject: [PATCH] powerpc/powernv: define platform MCE handler. > > --- > arch/powerpc/kernel/mce.c | 3 --- > arch/powerpc/platforms/powernv/setup.c | 11 +++++++++++ > 2 files changed, 11 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index 221271c96a57..ae17d8aa60c4 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -498,12 +498,9 @@ long machine_check_early(struct pt_regs *regs) > > /* > * See if platform is capable of handling machine check. > - * Otherwise fallthrough and allow CPU to handle this machine check. > */ > if (ppc_md.machine_check_early) > handled = ppc_md.machine_check_early(regs); > - else if (cur_cpu_spec && cur_cpu_spec->machine_check_early) > - handled = cur_cpu_spec->machine_check_early(regs); > return handled; > } > > diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c > index f96df0a25d05..b74c93bc2e55 100644 > --- a/arch/powerpc/platforms/powernv/setup.c > +++ b/arch/powerpc/platforms/powernv/setup.c > @@ -431,6 +431,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu) > return ret_freq; > } > > +static long pnv_machine_check_early(struct pt_regs *regs) > +{ > + long handled = 0; > + > + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) > + handled = cur_cpu_spec->machine_check_early(regs); > + > + return handled; > +} > + > define_machine(powernv) { > .name = "PowerNV", > .probe = pnv_probe, > @@ -442,6 +452,7 @@ define_machine(powernv) { > .machine_shutdown = pnv_shutdown, > .power_save = NULL, > .calibrate_decr = generic_calibrate_decr, > + .machine_check_early = pnv_machine_check_early, > #ifdef CONFIG_KEXEC_CORE > .kexec_cpu_down = pnv_kexec_cpu_down, > #endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 50ed64fba4ae..cc00a7088cf3 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -487,6 +487,7 @@ extern void hpte_init_native(void); extern void slb_initialize(void); extern void slb_flush_and_rebolt(void); +extern void slb_flush_and_rebolt_realmode(void); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index ffe7c71e1132..fe447e0d4140 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -108,6 +108,7 @@ struct machdep_calls { /* Early exception handlers called in realmode */ int (*hmi_exception_early)(struct pt_regs *regs); + int (*machine_check_early)(struct pt_regs *regs); /* Called during machine check exception to retrive fixup address. */ bool (*mce_check_early_recovery)(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f283958129f2..0038596b7906 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -332,6 +332,9 @@ TRAMP_REAL_BEGIN(machine_check_pSeries) machine_check_fwnmi: SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_0(PACA_EXMC) +BEGIN_FTR_SECTION + b machine_check_pSeries_early +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) /* @@ -343,6 +346,45 @@ machine_check_pSeries_0: TRAMP_KVM_SKIP(PACA_EXMC, 0x200) +TRAMP_REAL_BEGIN(machine_check_pSeries_early) +BEGIN_FTR_SECTION + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) + mr r10,r1 /* Save r1 */ + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + mfspr r11,SPRN_SRR0 /* Save SRR0 */ + mfspr r12,SPRN_SRR1 /* Save SRR1 */ + EXCEPTION_PROLOG_COMMON_1() + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) + EXCEPTION_PROLOG_COMMON_3(0x200) + addi r3,r1,STACK_FRAME_OVERHEAD + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */ + + /* Move original SRR0 and SRR1 into the respective regs */ + ld r9,_MSR(r1) + mtspr SPRN_SRR1,r9 + ld r3,_NIP(r1) + mtspr SPRN_SRR0,r3 + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlr r9 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + REST_GPR(10, r1) + ld r11,_CCR(r1) + mtcr r11 + REST_GPR(11, r1) + REST_2GPRS(12, r1) + /* restore original r1. */ + ld r1,GPR1(r1) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0(PACA_EXMC) + b machine_check_pSeries_0 +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) + EXC_COMMON_BEGIN(machine_check_common) /* * Machine check is different because we use a different diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index efdd16a79075..221271c96a57 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __this_cpu_inc(irq_stat.mce_exceptions); + /* + * For pSeries we count mce when we go into virtual mode machine + * check handler. Hence skip it. Also, We can't access per cpu + * variables in real mode for LPAR. + */ + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + __this_cpu_inc(irq_stat.mce_exceptions); - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + /* + * See if platform is capable of handling machine check. + * Otherwise fallthrough and allow CPU to handle this machine check. + */ + if (ppc_md.machine_check_early) + handled = ppc_md.machine_check_early(regs); + else if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; } diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 66577cc66dc9..5b1813b98358 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -145,6 +145,12 @@ void slb_flush_and_rebolt(void) get_paca()->slb_cache_ptr = 0; } +void slb_flush_and_rebolt_realmode(void) +{ + __slb_flush_and_rebolt(); + get_paca()->slb_cache_ptr = 0; +} + void slb_vmalloc_update(void) { unsigned long vflags; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 48fbb41af5d1..ed548d40a9e1 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -417,7 +417,6 @@ static int opal_recover_mce(struct pt_regs *regs, if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ - pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { /* Platform corrected itself */ diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 60db2ee511fb..3611db5dd583 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -24,6 +24,7 @@ struct pt_regs; extern int pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); +extern int pSeries_machine_check_realmode(struct pt_regs *regs); #ifdef CONFIG_SMP extern void smp_init_pseries(void); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 851ce326874a..9aa7885e0148 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -427,6 +427,35 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } +static int mce_handle_error(struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log; + int disposition = rtas_error_disposition(errp); + uint8_t error_type; + + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (pseries_log == NULL) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = rtas_mc_error_type(mce_log); + + if ((disposition == RTAS_DISP_NOT_RECOVERED) && + (error_type == PSERIES_MC_ERROR_TYPE_SLB)) { + /* Store the old slb content someplace. */ + slb_flush_and_rebolt_realmode(); + disposition = RTAS_DISP_FULLY_RECOVERED; + rtas_set_disposition_recovered(errp); + } + +out: + return disposition; +} + /* * Process MCE rtas errlog event. */ @@ -503,11 +532,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs) struct rtas_error_log *errp; if (fwnmi_active) { - errp = fwnmi_get_errinfo(regs); fwnmi_release_errinfo(); + errp = fwnmi_get_errlog(); if (errp && recover_mce(regs, errp)) return 1; } return 0; } + +int pSeries_machine_check_realmode(struct pt_regs *regs) +{ + struct rtas_error_log *errp; + int disposition; + + if (fwnmi_active) { + errp = fwnmi_get_errinfo(regs); + /* + * Call to fwnmi_release_errinfo() in real mode causes kernel + * to panic. Hence we will call it as soon as we go into + * virtual mode. + */ + disposition = mce_handle_error(errp); + if (disposition == RTAS_DISP_FULLY_RECOVERED) + return 1; + } + + return 0; +} diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 60a067a6e743..249b02bc5c41 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -999,6 +999,7 @@ define_machine(pseries) { .calibrate_decr = generic_calibrate_decr, .progress = rtas_progress, .system_reset_exception = pSeries_system_reset_exception, + .machine_check_early = pSeries_machine_check_realmode, .machine_check_exception = pSeries_machine_check_exception, #ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec,