diff mbox series

[v5,5/7] powerpc/pseries: flush SLB contents on SLB MCE errors.

Message ID 153051042206.30541.2156877677180900261.stgit@jupiter.in.ibm.com (mailing list archive)
State Superseded
Headers show
Series powerpc/pseries: Machien check handler improvements. | expand

Commit Message

Mahesh J Salgaonkar July 2, 2018, 5:47 a.m. UTC
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

On pseries, as of today system crashes if we get a machine check
exceptions due to SLB errors. These are soft errors and can be fixed by
flushing the SLBs so the kernel can continue to function instead of
system crash. We do this in real mode before turning on MMU. Otherwise
we would run into nested machine checks. This patch now fetches the
rtas error log in real mode and flushes the SLBs on SLB errors.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
 arch/powerpc/include/asm/machdep.h            |    1 
 arch/powerpc/kernel/exceptions-64s.S          |   42 +++++++++++++++++++++
 arch/powerpc/kernel/mce.c                     |   16 +++++++-
 arch/powerpc/mm/slb.c                         |    6 +++
 arch/powerpc/platforms/powernv/opal.c         |    1 
 arch/powerpc/platforms/pseries/pseries.h      |    1 
 arch/powerpc/platforms/pseries/ras.c          |   51 +++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/setup.c        |    1 
 9 files changed, 116 insertions(+), 4 deletions(-)

Comments

Nicholas Piggin July 2, 2018, 10:08 p.m. UTC | #1
On Mon, 02 Jul 2018 11:17:06 +0530
Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> 
> On pseries, as of today system crashes if we get a machine check
> exceptions due to SLB errors. These are soft errors and can be fixed by
> flushing the SLBs so the kernel can continue to function instead of
> system crash. We do this in real mode before turning on MMU. Otherwise
> we would run into nested machine checks. This patch now fetches the
> rtas error log in real mode and flushes the SLBs on SLB errors.
> 
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
>  arch/powerpc/include/asm/machdep.h            |    1 
>  arch/powerpc/kernel/exceptions-64s.S          |   42 +++++++++++++++++++++
>  arch/powerpc/kernel/mce.c                     |   16 +++++++-
>  arch/powerpc/mm/slb.c                         |    6 +++
>  arch/powerpc/platforms/powernv/opal.c         |    1 
>  arch/powerpc/platforms/pseries/pseries.h      |    1 
>  arch/powerpc/platforms/pseries/ras.c          |   51 +++++++++++++++++++++++++
>  arch/powerpc/platforms/pseries/setup.c        |    1 
>  9 files changed, 116 insertions(+), 4 deletions(-)
> 


> +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> +BEGIN_FTR_SECTION
> +	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> +	mr	r10,r1			/* Save r1 */
> +	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
> +	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
> +	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
> +	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
> +	EXCEPTION_PROLOG_COMMON_1()
> +	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> +	EXCEPTION_PROLOG_COMMON_3(0x200)
> +	addi	r3,r1,STACK_FRAME_OVERHEAD
> +	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */

Is there any reason you can't use the existing
machine_check_powernv_early code to do all this?

> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index efdd16a79075..221271c96a57 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
>  {
>  	long handled = 0;
>  
> -	__this_cpu_inc(irq_stat.mce_exceptions);
> +	/*
> +	 * For pSeries we count mce when we go into virtual mode machine
> +	 * check handler. Hence skip it. Also, We can't access per cpu
> +	 * variables in real mode for LPAR.
> +	 */
> +	if (early_cpu_has_feature(CPU_FTR_HVMODE))
> +		__this_cpu_inc(irq_stat.mce_exceptions);
>  
> -	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> +	/*
> +	 * See if platform is capable of handling machine check.
> +	 * Otherwise fallthrough and allow CPU to handle this machine check.
> +	 */
> +	if (ppc_md.machine_check_early)
> +		handled = ppc_md.machine_check_early(regs);
> +	else if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
>  		handled = cur_cpu_spec->machine_check_early(regs);

Would be good to add a powernv ppc_md handler which does the
cur_cpu_spec->machine_check_early() call now that other platforms are
calling this code. Because those aren't valid as a fallback call, but
specific to powernv.

> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
> index 48fbb41af5d1..ed548d40a9e1 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -417,7 +417,6 @@ static int opal_recover_mce(struct pt_regs *regs,
>  
>  	if (!(regs->msr & MSR_RI)) {
>  		/* If MSR_RI isn't set, we cannot recover */
> -		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");

What's the reason for this change?

>  		recovered = 0;
>  	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
>  		/* Platform corrected itself */
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index 60db2ee511fb..3611db5dd583 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -24,6 +24,7 @@ struct pt_regs;
>  
>  extern int pSeries_system_reset_exception(struct pt_regs *regs);
>  extern int pSeries_machine_check_exception(struct pt_regs *regs);
> +extern int pSeries_machine_check_realmode(struct pt_regs *regs);
>  
>  #ifdef CONFIG_SMP
>  extern void smp_init_pseries(void);
> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
> index 851ce326874a..9aa7885e0148 100644
> --- a/arch/powerpc/platforms/pseries/ras.c
> +++ b/arch/powerpc/platforms/pseries/ras.c
> @@ -427,6 +427,35 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
>  	return 0; /* need to perform reset */
>  }
>  
> +static int mce_handle_error(struct rtas_error_log *errp)
> +{
> +	struct pseries_errorlog *pseries_log;
> +	struct pseries_mc_errorlog *mce_log;
> +	int disposition = rtas_error_disposition(errp);
> +	uint8_t error_type;
> +
> +	if (!rtas_error_extended(errp))
> +		goto out;
> +
> +	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
> +	if (pseries_log == NULL)
> +		goto out;
> +
> +	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
> +	error_type = rtas_mc_error_type(mce_log);
> +
> +	if ((disposition == RTAS_DISP_NOT_RECOVERED) &&
> +			(error_type == PSERIES_MC_ERROR_TYPE_SLB)) {
> +		/* Store the old slb content someplace. */
> +		slb_flush_and_rebolt_realmode();
> +		disposition = RTAS_DISP_FULLY_RECOVERED;
> +		rtas_set_disposition_recovered(errp);
> +	}
> +
> +out:
> +	return disposition;
> +}
> +
>  /*
>   * Process MCE rtas errlog event.
>   */
> @@ -503,11 +532,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs)
>  	struct rtas_error_log *errp;
>  
>  	if (fwnmi_active) {
> -		errp = fwnmi_get_errinfo(regs);
>  		fwnmi_release_errinfo();

Should the fwnmi_release_errinfo be done in the realmode path as well
now, or is there some reason to leave it here?

> +		errp = fwnmi_get_errlog();
>  		if (errp && recover_mce(regs, errp))
>  			return 1;
>  	}
>  
>  	return 0;
>  }
> +
> +int pSeries_machine_check_realmode(struct pt_regs *regs)
> +{
> +	struct rtas_error_log *errp;
> +	int disposition;
> +
> +	if (fwnmi_active) {
> +		errp = fwnmi_get_errinfo(regs);
> +		/*
> +		 * Call to fwnmi_release_errinfo() in real mode causes kernel
> +		 * to panic. Hence we will call it as soon as we go into
> +		 * virtual mode.
> +		 */
> +		disposition = mce_handle_error(errp);
> +		if (disposition == RTAS_DISP_FULLY_RECOVERED)
> +			return 1;
> +	}
> +
> +	return 0;
> +}
> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
> index 60a067a6e743..249b02bc5c41 100644
> --- a/arch/powerpc/platforms/pseries/setup.c
> +++ b/arch/powerpc/platforms/pseries/setup.c
> @@ -999,6 +999,7 @@ define_machine(pseries) {
>  	.calibrate_decr		= generic_calibrate_decr,
>  	.progress		= rtas_progress,
>  	.system_reset_exception = pSeries_system_reset_exception,
> +	.machine_check_early	= pSeries_machine_check_realmode,
>  	.machine_check_exception = pSeries_machine_check_exception,
>  #ifdef CONFIG_KEXEC_CORE
>  	.machine_kexec          = pSeries_machine_kexec,
>
Mahesh J Salgaonkar July 3, 2018, 7:20 a.m. UTC | #2
On 07/03/2018 03:38 AM, Nicholas Piggin wrote:
> On Mon, 02 Jul 2018 11:17:06 +0530
> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
>> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>>
>> On pseries, as of today system crashes if we get a machine check
>> exceptions due to SLB errors. These are soft errors and can be fixed by
>> flushing the SLBs so the kernel can continue to function instead of
>> system crash. We do this in real mode before turning on MMU. Otherwise
>> we would run into nested machine checks. This patch now fetches the
>> rtas error log in real mode and flushes the SLBs on SLB errors.
>>
>> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
>>  arch/powerpc/include/asm/machdep.h            |    1 
>>  arch/powerpc/kernel/exceptions-64s.S          |   42 +++++++++++++++++++++
>>  arch/powerpc/kernel/mce.c                     |   16 +++++++-
>>  arch/powerpc/mm/slb.c                         |    6 +++
>>  arch/powerpc/platforms/powernv/opal.c         |    1 
>>  arch/powerpc/platforms/pseries/pseries.h      |    1 
>>  arch/powerpc/platforms/pseries/ras.c          |   51 +++++++++++++++++++++++++
>>  arch/powerpc/platforms/pseries/setup.c        |    1 
>>  9 files changed, 116 insertions(+), 4 deletions(-)
>>
> 
> 
>> +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
>> +BEGIN_FTR_SECTION
>> +	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>> +	mr	r10,r1			/* Save r1 */
>> +	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
>> +	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
>> +	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
>> +	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
>> +	EXCEPTION_PROLOG_COMMON_1()
>> +	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>> +	EXCEPTION_PROLOG_COMMON_3(0x200)
>> +	addi	r3,r1,STACK_FRAME_OVERHEAD
>> +	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
> 
> Is there any reason you can't use the existing
> machine_check_powernv_early code to do all this?

I did think about that :-). But the machine_check_powernv_early code
does bit of extra stuff which isn't required in pseries like touching ME
bit in MSR and lots of checks that are done in
machine_check_handle_early() before going to virtual mode. But on second
look I see that we can bypass all that with HVMODE FTR section. Will
rename machine_check_powernv_early to machine_check_common_early and
reuse it.

> 
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index efdd16a79075..221271c96a57 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
>>  {
>>  	long handled = 0;
>>  
>> -	__this_cpu_inc(irq_stat.mce_exceptions);
>> +	/*
>> +	 * For pSeries we count mce when we go into virtual mode machine
>> +	 * check handler. Hence skip it. Also, We can't access per cpu
>> +	 * variables in real mode for LPAR.
>> +	 */
>> +	if (early_cpu_has_feature(CPU_FTR_HVMODE))
>> +		__this_cpu_inc(irq_stat.mce_exceptions);
>>  
>> -	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
>> +	/*
>> +	 * See if platform is capable of handling machine check.
>> +	 * Otherwise fallthrough and allow CPU to handle this machine check.
>> +	 */
>> +	if (ppc_md.machine_check_early)
>> +		handled = ppc_md.machine_check_early(regs);
>> +	else if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
>>  		handled = cur_cpu_spec->machine_check_early(regs);
> 
> Would be good to add a powernv ppc_md handler which does the
> cur_cpu_spec->machine_check_early() call now that other platforms are
> calling this code. Because those aren't valid as a fallback call, but
> specific to powernv.
> 
>> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
>> index 48fbb41af5d1..ed548d40a9e1 100644
>> --- a/arch/powerpc/platforms/powernv/opal.c
>> +++ b/arch/powerpc/platforms/powernv/opal.c
>> @@ -417,7 +417,6 @@ static int opal_recover_mce(struct pt_regs *regs,
>>  
>>  	if (!(regs->msr & MSR_RI)) {
>>  		/* If MSR_RI isn't set, we cannot recover */
>> -		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
> 
> What's the reason for this change?

Err... This is by mistake.. My bad. Thanks for catching this. Will
remove this hunk in next revision. We need a similar print for pSeries
in ras.c.

> 
>>  		recovered = 0;
>>  	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
>>  		/* Platform corrected itself */
>> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
>> index 60db2ee511fb..3611db5dd583 100644
>> --- a/arch/powerpc/platforms/pseries/pseries.h
>> +++ b/arch/powerpc/platforms/pseries/pseries.h
>> @@ -24,6 +24,7 @@ struct pt_regs;
>>  
>>  extern int pSeries_system_reset_exception(struct pt_regs *regs);
>>  extern int pSeries_machine_check_exception(struct pt_regs *regs);
>> +extern int pSeries_machine_check_realmode(struct pt_regs *regs);
>>  
>>  #ifdef CONFIG_SMP
>>  extern void smp_init_pseries(void);
>> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
>> index 851ce326874a..9aa7885e0148 100644
>> --- a/arch/powerpc/platforms/pseries/ras.c
>> +++ b/arch/powerpc/platforms/pseries/ras.c
>> @@ -427,6 +427,35 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
>>  	return 0; /* need to perform reset */
>>  }
>>  
>> +static int mce_handle_error(struct rtas_error_log *errp)
>> +{
>> +	struct pseries_errorlog *pseries_log;
>> +	struct pseries_mc_errorlog *mce_log;
>> +	int disposition = rtas_error_disposition(errp);
>> +	uint8_t error_type;
>> +
>> +	if (!rtas_error_extended(errp))
>> +		goto out;
>> +
>> +	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
>> +	if (pseries_log == NULL)
>> +		goto out;
>> +
>> +	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
>> +	error_type = rtas_mc_error_type(mce_log);
>> +
>> +	if ((disposition == RTAS_DISP_NOT_RECOVERED) &&
>> +			(error_type == PSERIES_MC_ERROR_TYPE_SLB)) {
>> +		/* Store the old slb content someplace. */
>> +		slb_flush_and_rebolt_realmode();
>> +		disposition = RTAS_DISP_FULLY_RECOVERED;
>> +		rtas_set_disposition_recovered(errp);
>> +	}
>> +
>> +out:
>> +	return disposition;
>> +}
>> +
>>  /*
>>   * Process MCE rtas errlog event.
>>   */
>> @@ -503,11 +532,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs)
>>  	struct rtas_error_log *errp;
>>  
>>  	if (fwnmi_active) {
>> -		errp = fwnmi_get_errinfo(regs);
>>  		fwnmi_release_errinfo();
> 
> Should the fwnmi_release_errinfo be done in the realmode path as well
> now, or is there some reason to leave it here?

In real mode calling fwnmi_release_errinfo() causes kernel panic.
Couldn't debug further to find out why. So decided to keep it in virtual
mode. I have mentioned that in comment below in
pSeries_machine_check_realmode().

> 
>> +		errp = fwnmi_get_errlog();
>>  		if (errp && recover_mce(regs, errp))
>>  			return 1;
>>  	}
>>  
>>  	return 0;
>>  }
>> +
>> +int pSeries_machine_check_realmode(struct pt_regs *regs)
>> +{
>> +	struct rtas_error_log *errp;
>> +	int disposition;
>> +
>> +	if (fwnmi_active) {
>> +		errp = fwnmi_get_errinfo(regs);
>> +		/*
>> +		 * Call to fwnmi_release_errinfo() in real mode causes kernel
>> +		 * to panic. Hence we will call it as soon as we go into
>> +		 * virtual mode.
>> +		 */
>> +		disposition = mce_handle_error(errp);
>> +		if (disposition == RTAS_DISP_FULLY_RECOVERED)
>> +			return 1;
>> +	}
>> +
>> +	return 0;
>> +}
>> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
>> index 60a067a6e743..249b02bc5c41 100644
>> --- a/arch/powerpc/platforms/pseries/setup.c
>> +++ b/arch/powerpc/platforms/pseries/setup.c
>> @@ -999,6 +999,7 @@ define_machine(pseries) {
>>  	.calibrate_decr		= generic_calibrate_decr,
>>  	.progress		= rtas_progress,
>>  	.system_reset_exception = pSeries_system_reset_exception,
>> +	.machine_check_early	= pSeries_machine_check_realmode,
>>  	.machine_check_exception = pSeries_machine_check_exception,
>>  #ifdef CONFIG_KEXEC_CORE
>>  	.machine_kexec          = pSeries_machine_kexec,
>>
> 

Thanks for your review.
Michal Suchánek July 3, 2018, 10:37 a.m. UTC | #3
On Tue, 3 Jul 2018 08:08:14 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> On Mon, 02 Jul 2018 11:17:06 +0530
> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
> > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> > 
> > On pseries, as of today system crashes if we get a machine check
> > exceptions due to SLB errors. These are soft errors and can be
> > fixed by flushing the SLBs so the kernel can continue to function
> > instead of system crash. We do this in real mode before turning on
> > MMU. Otherwise we would run into nested machine checks. This patch
> > now fetches the rtas error log in real mode and flushes the SLBs on
> > SLB errors.
> > 
> > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> > ---
> >  arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
> >  arch/powerpc/include/asm/machdep.h            |    1 
> >  arch/powerpc/kernel/exceptions-64s.S          |   42
> > +++++++++++++++++++++ arch/powerpc/kernel/mce.c
> > |   16 +++++++- arch/powerpc/mm/slb.c                         |
> > 6 +++ arch/powerpc/platforms/powernv/opal.c         |    1 
> >  arch/powerpc/platforms/pseries/pseries.h      |    1 
> >  arch/powerpc/platforms/pseries/ras.c          |   51
> > +++++++++++++++++++++++++
> > arch/powerpc/platforms/pseries/setup.c        |    1 9 files
> > changed, 116 insertions(+), 4 deletions(-) 
> 
> 
> > +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> > +BEGIN_FTR_SECTION
> > +	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> > +	mr	r10,r1			/* Save r1 */
> > +	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency
> > stack */
> > +	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack
> > frame		*/
> > +	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
> > +	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
> > +	EXCEPTION_PROLOG_COMMON_1()
> > +	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> > +	EXCEPTION_PROLOG_COMMON_3(0x200)
> > +	addi	r3,r1,STACK_FRAME_OVERHEAD
> > +	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call
> > ABI */  
> 
> Is there any reason you can't use the existing
> machine_check_powernv_early code to do all this?

Code sharing is nice but if we envision this going to stable kernels
butchering the existing handler is going to be a nightmare. The code is
quite a bit different between kernel versions.

This code as is requires the bit that introduces
EXCEPTION_PROLOG_COMMON_1 and then should work on Linux 3.14+

Thanks

Michal
Michael Ellerman July 4, 2018, 1:15 p.m. UTC | #4
Michal Suchánek <msuchanek@suse.de> writes:
> On Tue, 3 Jul 2018 08:08:14 +1000
> Nicholas Piggin <npiggin@gmail.com> wrote:
>> On Mon, 02 Jul 2018 11:17:06 +0530
>> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
>> > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>> > 
>> > On pseries, as of today system crashes if we get a machine check
>> > exceptions due to SLB errors. These are soft errors and can be
>> > fixed by flushing the SLBs so the kernel can continue to function
>> > instead of system crash. We do this in real mode before turning on
>> > MMU. Otherwise we would run into nested machine checks. This patch
>> > now fetches the rtas error log in real mode and flushes the SLBs on
>> > SLB errors.
>> > 
>> > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>> > ---
>> >  arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
>> >  arch/powerpc/include/asm/machdep.h            |    1 
>> >  arch/powerpc/kernel/exceptions-64s.S          |   42
>> > +++++++++++++++++++++ arch/powerpc/kernel/mce.c
>> > |   16 +++++++- arch/powerpc/mm/slb.c                         |
>> > 6 +++ arch/powerpc/platforms/powernv/opal.c         |    1 
>> >  arch/powerpc/platforms/pseries/pseries.h      |    1 
>> >  arch/powerpc/platforms/pseries/ras.c          |   51
>> > +++++++++++++++++++++++++
>> > arch/powerpc/platforms/pseries/setup.c        |    1 9 files
>> > changed, 116 insertions(+), 4 deletions(-) 
>> 
>> 
>> > +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
>> > +BEGIN_FTR_SECTION
>> > +	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>> > +	mr	r10,r1			/* Save r1 */
>> > +	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency
>> > stack */
>> > +	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack
>> > frame		*/
>> > +	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
>> > +	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
>> > +	EXCEPTION_PROLOG_COMMON_1()
>> > +	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>> > +	EXCEPTION_PROLOG_COMMON_3(0x200)
>> > +	addi	r3,r1,STACK_FRAME_OVERHEAD
>> > +	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call
>> > ABI */  
>> 
>> Is there any reason you can't use the existing
>> machine_check_powernv_early code to do all this?
>
> Code sharing is nice but if we envision this going to stable kernels
> butchering the existing handler is going to be a nightmare. The code is
> quite a bit different between kernel versions.

I'm not sure if we'll send it to stable kernels. But we obviously will
back port it to some distros :)

So if sharing the code is a significant impediment to that, then I'm
happy if we don't share code initially. That could be done as a
follow-up to this series.

cheers
Michal Suchánek July 12, 2018, 1:41 p.m. UTC | #5
On Tue, 3 Jul 2018 08:08:14 +1000
"Nicholas Piggin" <npiggin@gmail.com> wrote:

> On Mon, 02 Jul 2018 11:17:06 +0530
> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
> > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> > 
> > On pseries, as of today system crashes if we get a machine check
> > exceptions due to SLB errors. These are soft errors and can be
> > fixed by flushing the SLBs so the kernel can continue to function
> > instead of system crash. We do this in real mode before turning on
> > MMU. Otherwise we would run into nested machine checks. This patch
> > now fetches the rtas error log in real mode and flushes the SLBs on
> > SLB errors.
> > 
> > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> > ---
> >  arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
> >  arch/powerpc/include/asm/machdep.h            |    1 
> >  arch/powerpc/kernel/exceptions-64s.S          |   42
> > +++++++++++++++++++++ arch/powerpc/kernel/mce.c
> > |   16 +++++++- arch/powerpc/mm/slb.c                         |
> > 6 +++ arch/powerpc/platforms/powernv/opal.c         |    1 
> >  arch/powerpc/platforms/pseries/pseries.h      |    1 
> >  arch/powerpc/platforms/pseries/ras.c          |   51
> > +++++++++++++++++++++++++
> > arch/powerpc/platforms/pseries/setup.c        |    1 9 files
> > changed, 116 insertions(+), 4 deletions(-) 
> 
> 
> > +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> > +BEGIN_FTR_SECTION
> > +	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> > +	mr	r10,r1			/* Save r1 */
> > +	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency
> > stack */
> > +	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack
> > frame		*/
> > +	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
> > +	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
> > +	EXCEPTION_PROLOG_COMMON_1()
> > +	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> > +	EXCEPTION_PROLOG_COMMON_3(0x200)
> > +	addi	r3,r1,STACK_FRAME_OVERHEAD
> > +	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call
> > ABI */  
> 
> Is there any reason you can't use the existing
> machine_check_powernv_early code to do all this?
> 
> > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> > index efdd16a79075..221271c96a57 100644
> > --- a/arch/powerpc/kernel/mce.c
> > +++ b/arch/powerpc/kernel/mce.c
> > @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
> >  {
> >  	long handled = 0;
> >  
> > -	__this_cpu_inc(irq_stat.mce_exceptions);
> > +	/*
> > +	 * For pSeries we count mce when we go into virtual mode
> > machine
> > +	 * check handler. Hence skip it. Also, We can't access per
> > cpu
> > +	 * variables in real mode for LPAR.
> > +	 */
> > +	if (early_cpu_has_feature(CPU_FTR_HVMODE))
> > +		__this_cpu_inc(irq_stat.mce_exceptions);
> >  
> > -	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> > +	/*
> > +	 * See if platform is capable of handling machine check.
> > +	 * Otherwise fallthrough and allow CPU to handle this
> > machine check.
> > +	 */
> > +	if (ppc_md.machine_check_early)
> > +		handled = ppc_md.machine_check_early(regs);
> > +	else if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> >  		handled =
> > cur_cpu_spec->machine_check_early(regs);  
> 
> Would be good to add a powernv ppc_md handler which does the
> cur_cpu_spec->machine_check_early() call now that other platforms are
> calling this code. Because those aren't valid as a fallback call, but
> specific to powernv.
> 

Something like this (untested)?

Subject: [PATCH] powerpc/powernv: define platform MCE handler.

---
 arch/powerpc/kernel/mce.c              |  3 ---
 arch/powerpc/platforms/powernv/setup.c | 11 +++++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 221271c96a57..ae17d8aa60c4 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -498,12 +498,9 @@ long machine_check_early(struct pt_regs *regs)
 
 	/*
 	 * See if platform is capable of handling machine check.
-	 * Otherwise fallthrough and allow CPU to handle this machine check.
 	 */
 	if (ppc_md.machine_check_early)
 		handled = ppc_md.machine_check_early(regs);
-	else if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
-		handled = cur_cpu_spec->machine_check_early(regs);
 	return handled;
 }
 
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index f96df0a25d05..b74c93bc2e55 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -431,6 +431,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
 	return ret_freq;
 }
 
+static long pnv_machine_check_early(struct pt_regs *regs)
+{
+	long handled = 0;
+
+	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+		handled = cur_cpu_spec->machine_check_early(regs);
+
+	return handled;
+}
+
 define_machine(powernv) {
 	.name			= "PowerNV",
 	.probe			= pnv_probe,
@@ -442,6 +452,7 @@ define_machine(powernv) {
 	.machine_shutdown	= pnv_shutdown,
 	.power_save             = NULL,
 	.calibrate_decr		= generic_calibrate_decr,
+	.machine_check_early	= pnv_machine_check_early,
 #ifdef CONFIG_KEXEC_CORE
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
 #endif
Michael Ellerman July 19, 2018, 1:08 p.m. UTC | #6
Michal Suchánek <msuchanek@suse.de> writes:
> On Tue, 3 Jul 2018 08:08:14 +1000
> "Nicholas Piggin" <npiggin@gmail.com> wrote: >> On Mon, 02 Jul 2018 11:17:06 +0530
>> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
>> > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>> > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> > index efdd16a79075..221271c96a57 100644
>> > --- a/arch/powerpc/kernel/mce.c
>> > +++ b/arch/powerpc/kernel/mce.c
>> > @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
>> >  {
>> >  	long handled = 0;
>> >  
>> > -	__this_cpu_inc(irq_stat.mce_exceptions);
>> > +	/*
>> > +	 * For pSeries we count mce when we go into virtual mode
>> > machine
>> > +	 * check handler. Hence skip it. Also, We can't access per
>> > cpu
>> > +	 * variables in real mode for LPAR.
>> > +	 */
>> > +	if (early_cpu_has_feature(CPU_FTR_HVMODE))
>> > +		__this_cpu_inc(irq_stat.mce_exceptions);
>> >  
>> > -	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
>> > +	/*
>> > +	 * See if platform is capable of handling machine check.
>> > +	 * Otherwise fallthrough and allow CPU to handle this
>> > machine check.
>> > +	 */
>> > +	if (ppc_md.machine_check_early)
>> > +		handled = ppc_md.machine_check_early(regs);
>> > +	else if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
>> >  		handled =
>> > cur_cpu_spec->machine_check_early(regs);  
>> 
>> Would be good to add a powernv ppc_md handler which does the
>> cur_cpu_spec->machine_check_early() call now that other platforms are
>> calling this code. Because those aren't valid as a fallback call, but
>> specific to powernv.
>> 
>
> Something like this (untested)?
>
> Subject: [PATCH] powerpc/powernv: define platform MCE handler.

LGTM.

cheers
Nicholas Piggin Aug. 1, 2018, 5:49 a.m. UTC | #7
On Thu, 12 Jul 2018 15:41:13 +0200
Michal Suchánek <msuchanek@suse.de> wrote:

> On Tue, 3 Jul 2018 08:08:14 +1000
> "Nicholas Piggin" <npiggin@gmail.com> wrote:
> 
> > On Mon, 02 Jul 2018 11:17:06 +0530
> > Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> >   
> > > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> > > 
> > > On pseries, as of today system crashes if we get a machine check
> > > exceptions due to SLB errors. These are soft errors and can be
> > > fixed by flushing the SLBs so the kernel can continue to function
> > > instead of system crash. We do this in real mode before turning on
> > > MMU. Otherwise we would run into nested machine checks. This patch
> > > now fetches the rtas error log in real mode and flushes the SLBs on
> > > SLB errors.
> > > 
> > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> > > ---
> > >  arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
> > >  arch/powerpc/include/asm/machdep.h            |    1 
> > >  arch/powerpc/kernel/exceptions-64s.S          |   42
> > > +++++++++++++++++++++ arch/powerpc/kernel/mce.c
> > > |   16 +++++++- arch/powerpc/mm/slb.c                         |
> > > 6 +++ arch/powerpc/platforms/powernv/opal.c         |    1 
> > >  arch/powerpc/platforms/pseries/pseries.h      |    1 
> > >  arch/powerpc/platforms/pseries/ras.c          |   51
> > > +++++++++++++++++++++++++
> > > arch/powerpc/platforms/pseries/setup.c        |    1 9 files
> > > changed, 116 insertions(+), 4 deletions(-)   
> > 
> >   
> > > +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> > > +BEGIN_FTR_SECTION
> > > +	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> > > +	mr	r10,r1			/* Save r1 */
> > > +	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency
> > > stack */
> > > +	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack
> > > frame		*/
> > > +	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
> > > +	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
> > > +	EXCEPTION_PROLOG_COMMON_1()
> > > +	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> > > +	EXCEPTION_PROLOG_COMMON_3(0x200)
> > > +	addi	r3,r1,STACK_FRAME_OVERHEAD
> > > +	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call
> > > ABI */    
> > 
> > Is there any reason you can't use the existing
> > machine_check_powernv_early code to do all this?
> >   
> > > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> > > index efdd16a79075..221271c96a57 100644
> > > --- a/arch/powerpc/kernel/mce.c
> > > +++ b/arch/powerpc/kernel/mce.c
> > > @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
> > >  {
> > >  	long handled = 0;
> > >  
> > > -	__this_cpu_inc(irq_stat.mce_exceptions);
> > > +	/*
> > > +	 * For pSeries we count mce when we go into virtual mode
> > > machine
> > > +	 * check handler. Hence skip it. Also, We can't access per
> > > cpu
> > > +	 * variables in real mode for LPAR.
> > > +	 */
> > > +	if (early_cpu_has_feature(CPU_FTR_HVMODE))
> > > +		__this_cpu_inc(irq_stat.mce_exceptions);
> > >  
> > > -	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> > > +	/*
> > > +	 * See if platform is capable of handling machine check.
> > > +	 * Otherwise fallthrough and allow CPU to handle this
> > > machine check.
> > > +	 */
> > > +	if (ppc_md.machine_check_early)
> > > +		handled = ppc_md.machine_check_early(regs);
> > > +	else if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> > >  		handled =
> > > cur_cpu_spec->machine_check_early(regs);    
> > 
> > Would be good to add a powernv ppc_md handler which does the
> > cur_cpu_spec->machine_check_early() call now that other platforms are
> > calling this code. Because those aren't valid as a fallback call, but
> > specific to powernv.
> >   
> 
> Something like this (untested)?

Sorry, some emails fell through the cracks. Yes exactly like this would
be good. If you can add a quick changelog and SOB, and
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

Thanks,
Nick

> 
> Subject: [PATCH] powerpc/powernv: define platform MCE handler.
> 
> ---
>  arch/powerpc/kernel/mce.c              |  3 ---
>  arch/powerpc/platforms/powernv/setup.c | 11 +++++++++++
>  2 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index 221271c96a57..ae17d8aa60c4 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -498,12 +498,9 @@ long machine_check_early(struct pt_regs *regs)
>  
>  	/*
>  	 * See if platform is capable of handling machine check.
> -	 * Otherwise fallthrough and allow CPU to handle this machine check.
>  	 */
>  	if (ppc_md.machine_check_early)
>  		handled = ppc_md.machine_check_early(regs);
> -	else if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> -		handled = cur_cpu_spec->machine_check_early(regs);
>  	return handled;
>  }
>  
> diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
> index f96df0a25d05..b74c93bc2e55 100644
> --- a/arch/powerpc/platforms/powernv/setup.c
> +++ b/arch/powerpc/platforms/powernv/setup.c
> @@ -431,6 +431,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
>  	return ret_freq;
>  }
>  
> +static long pnv_machine_check_early(struct pt_regs *regs)
> +{
> +	long handled = 0;
> +
> +	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> +		handled = cur_cpu_spec->machine_check_early(regs);
> +
> +	return handled;
> +}
> +
>  define_machine(powernv) {
>  	.name			= "PowerNV",
>  	.probe			= pnv_probe,
> @@ -442,6 +452,7 @@ define_machine(powernv) {
>  	.machine_shutdown	= pnv_shutdown,
>  	.power_save             = NULL,
>  	.calibrate_decr		= generic_calibrate_decr,
> +	.machine_check_early	= pnv_machine_check_early,
>  #ifdef CONFIG_KEXEC_CORE
>  	.kexec_cpu_down		= pnv_kexec_cpu_down,
>  #endif
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 50ed64fba4ae..cc00a7088cf3 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -487,6 +487,7 @@  extern void hpte_init_native(void);
 
 extern void slb_initialize(void);
 extern void slb_flush_and_rebolt(void);
+extern void slb_flush_and_rebolt_realmode(void);
 
 extern void slb_vmalloc_update(void);
 extern void slb_set_size(u16 size);
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index ffe7c71e1132..fe447e0d4140 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -108,6 +108,7 @@  struct machdep_calls {
 
 	/* Early exception handlers called in realmode */
 	int		(*hmi_exception_early)(struct pt_regs *regs);
+	int		(*machine_check_early)(struct pt_regs *regs);
 
 	/* Called during machine check exception to retrive fixup address. */
 	bool		(*mce_check_early_recovery)(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f283958129f2..0038596b7906 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -332,6 +332,9 @@  TRAMP_REAL_BEGIN(machine_check_pSeries)
 machine_check_fwnmi:
 	SET_SCRATCH0(r13)		/* save r13 */
 	EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+	b	machine_check_pSeries_early
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 machine_check_pSeries_0:
 	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
 	/*
@@ -343,6 +346,45 @@  machine_check_pSeries_0:
 
 TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
 
+TRAMP_REAL_BEGIN(machine_check_pSeries_early)
+BEGIN_FTR_SECTION
+	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+	mr	r10,r1			/* Save r1 */
+	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
+	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
+	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
+	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
+	EXCEPTION_PROLOG_COMMON_1()
+	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+	EXCEPTION_PROLOG_COMMON_3(0x200)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
+
+	/* Move original SRR0 and SRR1 into the respective regs */
+	ld	r9,_MSR(r1)
+	mtspr	SPRN_SRR1,r9
+	ld	r3,_NIP(r1)
+	mtspr	SPRN_SRR0,r3
+	ld	r9,_CTR(r1)
+	mtctr	r9
+	ld	r9,_XER(r1)
+	mtxer	r9
+	ld	r9,_LINK(r1)
+	mtlr	r9
+	REST_GPR(0, r1)
+	REST_8GPRS(2, r1)
+	REST_GPR(10, r1)
+	ld	r11,_CCR(r1)
+	mtcr	r11
+	REST_GPR(11, r1)
+	REST_2GPRS(12, r1)
+	/* restore original r1. */
+	ld	r1,GPR1(r1)
+	SET_SCRATCH0(r13)		/* save r13 */
+	EXCEPTION_PROLOG_0(PACA_EXMC)
+	b	machine_check_pSeries_0
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+
 EXC_COMMON_BEGIN(machine_check_common)
 	/*
 	 * Machine check is different because we use a different
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index efdd16a79075..221271c96a57 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -488,9 +488,21 @@  long machine_check_early(struct pt_regs *regs)
 {
 	long handled = 0;
 
-	__this_cpu_inc(irq_stat.mce_exceptions);
+	/*
+	 * For pSeries we count mce when we go into virtual mode machine
+	 * check handler. Hence skip it. Also, We can't access per cpu
+	 * variables in real mode for LPAR.
+	 */
+	if (early_cpu_has_feature(CPU_FTR_HVMODE))
+		__this_cpu_inc(irq_stat.mce_exceptions);
 
-	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+	/*
+	 * See if platform is capable of handling machine check.
+	 * Otherwise fallthrough and allow CPU to handle this machine check.
+	 */
+	if (ppc_md.machine_check_early)
+		handled = ppc_md.machine_check_early(regs);
+	else if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 		handled = cur_cpu_spec->machine_check_early(regs);
 	return handled;
 }
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 66577cc66dc9..5b1813b98358 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -145,6 +145,12 @@  void slb_flush_and_rebolt(void)
 	get_paca()->slb_cache_ptr = 0;
 }
 
+void slb_flush_and_rebolt_realmode(void)
+{
+	__slb_flush_and_rebolt();
+	get_paca()->slb_cache_ptr = 0;
+}
+
 void slb_vmalloc_update(void)
 {
 	unsigned long vflags;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 48fbb41af5d1..ed548d40a9e1 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -417,7 +417,6 @@  static int opal_recover_mce(struct pt_regs *regs,
 
 	if (!(regs->msr & MSR_RI)) {
 		/* If MSR_RI isn't set, we cannot recover */
-		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 		recovered = 0;
 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 		/* Platform corrected itself */
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 60db2ee511fb..3611db5dd583 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -24,6 +24,7 @@  struct pt_regs;
 
 extern int pSeries_system_reset_exception(struct pt_regs *regs);
 extern int pSeries_machine_check_exception(struct pt_regs *regs);
+extern int pSeries_machine_check_realmode(struct pt_regs *regs);
 
 #ifdef CONFIG_SMP
 extern void smp_init_pseries(void);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 851ce326874a..9aa7885e0148 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -427,6 +427,35 @@  int pSeries_system_reset_exception(struct pt_regs *regs)
 	return 0; /* need to perform reset */
 }
 
+static int mce_handle_error(struct rtas_error_log *errp)
+{
+	struct pseries_errorlog *pseries_log;
+	struct pseries_mc_errorlog *mce_log;
+	int disposition = rtas_error_disposition(errp);
+	uint8_t error_type;
+
+	if (!rtas_error_extended(errp))
+		goto out;
+
+	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+	if (pseries_log == NULL)
+		goto out;
+
+	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+	error_type = rtas_mc_error_type(mce_log);
+
+	if ((disposition == RTAS_DISP_NOT_RECOVERED) &&
+			(error_type == PSERIES_MC_ERROR_TYPE_SLB)) {
+		/* Store the old slb content someplace. */
+		slb_flush_and_rebolt_realmode();
+		disposition = RTAS_DISP_FULLY_RECOVERED;
+		rtas_set_disposition_recovered(errp);
+	}
+
+out:
+	return disposition;
+}
+
 /*
  * Process MCE rtas errlog event.
  */
@@ -503,11 +532,31 @@  int pSeries_machine_check_exception(struct pt_regs *regs)
 	struct rtas_error_log *errp;
 
 	if (fwnmi_active) {
-		errp = fwnmi_get_errinfo(regs);
 		fwnmi_release_errinfo();
+		errp = fwnmi_get_errlog();
 		if (errp && recover_mce(regs, errp))
 			return 1;
 	}
 
 	return 0;
 }
+
+int pSeries_machine_check_realmode(struct pt_regs *regs)
+{
+	struct rtas_error_log *errp;
+	int disposition;
+
+	if (fwnmi_active) {
+		errp = fwnmi_get_errinfo(regs);
+		/*
+		 * Call to fwnmi_release_errinfo() in real mode causes kernel
+		 * to panic. Hence we will call it as soon as we go into
+		 * virtual mode.
+		 */
+		disposition = mce_handle_error(errp);
+		if (disposition == RTAS_DISP_FULLY_RECOVERED)
+			return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 60a067a6e743..249b02bc5c41 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -999,6 +999,7 @@  define_machine(pseries) {
 	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= rtas_progress,
 	.system_reset_exception = pSeries_system_reset_exception,
+	.machine_check_early	= pSeries_machine_check_realmode,
 	.machine_check_exception = pSeries_machine_check_exception,
 #ifdef CONFIG_KEXEC_CORE
 	.machine_kexec          = pSeries_machine_kexec,