diff mbox series

[v8,5/5] powernv/pseries: consolidate code for mce early handling.

Message ID 153469851942.21878.15396539991879060950.stgit@jupiter.in.ibm.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc/pseries: Machine check handler improvements. | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning next/apply_patch Patch failed to apply
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Mahesh J Salgaonkar Aug. 19, 2018, 5:08 p.m. UTC
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Now that other platforms also implements real mode mce handler,
lets consolidate the code by sharing existing powernv machine check
early code. Rename machine_check_powernv_early to
machine_check_common_early and reuse the code.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/exceptions-64s.S |  155 ++++++----------------------------
 1 file changed, 28 insertions(+), 127 deletions(-)

Comments

Nicholas Piggin Aug. 20, 2018, 11:34 a.m. UTC | #1
On Sun, 19 Aug 2018 22:38:39 +0530
Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> 
> Now that other platforms also implements real mode mce handler,
> lets consolidate the code by sharing existing powernv machine check
> early code. Rename machine_check_powernv_early to
> machine_check_common_early and reuse the code.
> 
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/exceptions-64s.S |  155 ++++++----------------------------
>  1 file changed, 28 insertions(+), 127 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 12f056179112..2f85a7baf026 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -243,14 +243,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
>  	SET_SCRATCH0(r13)		/* save r13 */
>  	EXCEPTION_PROLOG_0(PACA_EXMC)
>  BEGIN_FTR_SECTION
> -	b	machine_check_powernv_early
> +	b	machine_check_common_early
>  FTR_SECTION_ELSE
>  	b	machine_check_pSeries_0
>  ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
>  EXC_REAL_END(machine_check, 0x200, 0x100)
>  EXC_VIRT_NONE(0x4200, 0x100)
> -TRAMP_REAL_BEGIN(machine_check_powernv_early)
> -BEGIN_FTR_SECTION
> +TRAMP_REAL_BEGIN(machine_check_common_early)
>  	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>  	/*
>  	 * Register contents:
> @@ -306,7 +305,9 @@ BEGIN_FTR_SECTION
>  	/* Save r9 through r13 from EXMC save area to stack frame. */
>  	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>  	mfmsr	r11			/* get MSR value */
> +BEGIN_FTR_SECTION
>  	ori	r11,r11,MSR_ME		/* turn on ME bit */
> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>  	ori	r11,r11,MSR_RI		/* turn on RI bit */
>  	LOAD_HANDLER(r12, machine_check_handle_early)
>  1:	mtspr	SPRN_SRR0,r12
> @@ -325,7 +326,6 @@ BEGIN_FTR_SECTION
>  	andc	r11,r11,r10		/* Turn off MSR_ME */
>  	b	1b
>  	b	.	/* prevent speculative execution */
> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>  
>  TRAMP_REAL_BEGIN(machine_check_pSeries)
>  	.globl machine_check_fwnmi
> @@ -333,7 +333,7 @@ machine_check_fwnmi:
>  	SET_SCRATCH0(r13)		/* save r13 */
>  	EXCEPTION_PROLOG_0(PACA_EXMC)
>  BEGIN_FTR_SECTION
> -	b	machine_check_pSeries_early
> +	b	machine_check_common_early
>  END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>  machine_check_pSeries_0:
>  	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
> @@ -346,103 +346,6 @@ machine_check_pSeries_0:
>  
>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
>  
> -TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> -BEGIN_FTR_SECTION
> -	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> -	mr	r10,r1			/* Save r1 */
> -	lhz	r11,PACA_IN_MCE(r13)
> -	cmpwi	r11,0			/* Are we in nested machine check */
> -	bne	0f			/* Yes, we are. */
> -	/* First machine check entry */
> -	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
> -0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
> -	addi	r11,r11,1		/* increment paca->in_mce */
> -	sth	r11,PACA_IN_MCE(r13)
> -	/* Limit nested MCE to level 4 to avoid stack overflow */
> -	cmpwi	r11,MAX_MCE_DEPTH
> -	bgt	1f			/* Check if we hit limit of 4 */
> -	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
> -	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
> -	EXCEPTION_PROLOG_COMMON_1()
> -	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> -	EXCEPTION_PROLOG_COMMON_3(0x200)
> -	addi	r3,r1,STACK_FRAME_OVERHEAD
> -	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
> -	ld	r12,_MSR(r1)
> -	andi.	r11,r12,MSR_PR		/* See if coming from user. */
> -	bne	2f			/* continue in V mode if we are. */
> -
> -	/*
> -	 * At this point we are not sure about what context we come from.
> -	 * We may be in the middle of swithing stack. r1 may not be valid.
> -	 * Hence stay on emergency stack, call machine_check_exception and
> -	 * return from the interrupt.
> -	 * But before that, check if this is an un-recoverable exception.
> -	 * If yes, then stay on emergency stack and panic.
> -	 */
> -	andi.	r11,r12,MSR_RI
> -	beq	1f
> -
> -	/*
> -	 * Check if we have successfully handled/recovered from error, if not
> -	 * then stay on emergency stack and panic.
> -	 */
> -	cmpdi	r3,0		/* see if we handled MCE successfully */
> -	beq	1f		/* if !handled then panic */
> -
> -	/* Stay on emergency stack and return from interrupt. */
> -	LOAD_HANDLER(r10,mce_return)
> -	mtspr	SPRN_SRR0,r10
> -	ld	r10,PACAKMSR(r13)
> -	mtspr	SPRN_SRR1,r10
> -	RFI_TO_KERNEL
> -	b	.
> -
> -1:	LOAD_HANDLER(r10,unrecover_mce)
> -	mtspr	SPRN_SRR0,r10
> -	ld	r10,PACAKMSR(r13)
> -	/*
> -	 * We are going down. But there are chances that we might get hit by
> -	 * another MCE during panic path and we may run into unstable state
> -	 * with no way out. Hence, turn ME bit off while going down, so that
> -	 * when another MCE is hit during panic path, hypervisor will
> -	 * power cycle the lpar, instead of getting into MCE loop.
> -	 */
> -	li	r3,MSR_ME
> -	andc	r10,r10,r3		/* Turn off MSR_ME */
> -	mtspr	SPRN_SRR1,r10
> -	RFI_TO_KERNEL
> -	b	.
> -
> -	/* Move original SRR0 and SRR1 into the respective regs */
> -2:	ld	r9,_MSR(r1)
> -	mtspr	SPRN_SRR1,r9
> -	ld	r3,_NIP(r1)
> -	mtspr	SPRN_SRR0,r3
> -	ld	r9,_CTR(r1)
> -	mtctr	r9
> -	ld	r9,_XER(r1)
> -	mtxer	r9
> -	ld	r9,_LINK(r1)
> -	mtlr	r9
> -	REST_GPR(0, r1)
> -	REST_8GPRS(2, r1)
> -	REST_GPR(10, r1)
> -	ld	r11,_CCR(r1)
> -	mtcr	r11
> -	/* Decrement paca->in_mce. */
> -	lhz	r12,PACA_IN_MCE(r13)
> -	subi	r12,r12,1
> -	sth	r12,PACA_IN_MCE(r13)
> -	REST_GPR(11, r1)
> -	REST_2GPRS(12, r1)
> -	/* restore original r1. */
> -	ld	r1,GPR1(r1)
> -	SET_SCRATCH0(r13)		/* save r13 */
> -	EXCEPTION_PROLOG_0(PACA_EXMC)
> -	b	machine_check_pSeries_0
> -END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> -
>  EXC_COMMON_BEGIN(machine_check_common)
>  	/*
>  	 * Machine check is different because we use a different
> @@ -541,6 +444,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>  	bl	machine_check_early
>  	std	r3,RESULT(r1)	/* Save result */
>  	ld	r12,_MSR(r1)
> +BEGIN_FTR_SECTION
> +	b	4f
> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>  
>  #ifdef	CONFIG_PPC_P7_NAP
>  	/*
> @@ -564,10 +470,11 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>  	 */
>  	rldicl.	r11,r12,4,63		/* See if MC hit while in HV mode. */
>  	beq	5f
> -	andi.	r11,r12,MSR_PR		/* See if coming from user. */
> +4:	andi.	r11,r12,MSR_PR		/* See if coming from user. */
>  	bne	9f			/* continue in V mode if we are. */
>  
>  5:
> +BEGIN_FTR_SECTION
>  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
>  	/*
>  	 * We are coming from kernel context. Check if we are coming from
> @@ -578,6 +485,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>  	cmpwi	r11,0			/* Check if coming from guest */
>  	bne	9f			/* continue if we are. */
>  #endif
> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)

Put these inside the ifdef?


>  	/*
>  	 * At this point we are not sure about what context we come from.
>  	 * Queue up the MCE event and return from the interrupt.
> @@ -611,6 +519,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>  	cmpdi	r3,0		/* see if we handled MCE successfully */
>  
>  	beq	1b		/* if !handled then panic */
> +BEGIN_FTR_SECTION
>  	/*
>  	 * Return from MC interrupt.
>  	 * Queue up the MCE event so that we can log it later, while
> @@ -619,10 +528,24 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>  	bl	machine_check_queue_event
>  	MACHINE_CHECK_HANDLER_WINDUP
>  	RFI_TO_USER_OR_KERNEL
> +FTR_SECTION_ELSE
> +	/*
> +	 * pSeries: Return from MC interrupt. Before that stay on emergency
> +	 * stack and call machine_check_exception to log the MCE event.
> +	 */
> +	LOAD_HANDLER(r10,mce_return)
> +	mtspr	SPRN_SRR0,r10
> +	ld	r10,PACAKMSR(r13)
> +	mtspr	SPRN_SRR1,r10
> +	RFI_TO_KERNEL
> +	b	.
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)

Do you still need mce_return? Why can't you consolidate it as well? ...
Hmm, okay so now I look back at patch 2, I don't think you should call
machine_check_exception there. You're supposed to call
machine_check_queue_event here and it will be handled by irq work.

I think when you do that more of this code should fall out and be
consolidated. Sorry for not picking that up earlier.

Thanks,
Nick
Mahesh J Salgaonkar Aug. 23, 2018, 8:43 a.m. UTC | #2
On 08/20/2018 05:04 PM, Nicholas Piggin wrote:
> On Sun, 19 Aug 2018 22:38:39 +0530
> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
>> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>>
>> Now that other platforms also implements real mode mce handler,
>> lets consolidate the code by sharing existing powernv machine check
>> early code. Rename machine_check_powernv_early to
>> machine_check_common_early and reuse the code.
>>
>> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/kernel/exceptions-64s.S |  155 ++++++----------------------------
>>  1 file changed, 28 insertions(+), 127 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
>> index 12f056179112..2f85a7baf026 100644
>> --- a/arch/powerpc/kernel/exceptions-64s.S
>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>> @@ -243,14 +243,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
>>  	SET_SCRATCH0(r13)		/* save r13 */
>>  	EXCEPTION_PROLOG_0(PACA_EXMC)
>>  BEGIN_FTR_SECTION
>> -	b	machine_check_powernv_early
>> +	b	machine_check_common_early
>>  FTR_SECTION_ELSE
>>  	b	machine_check_pSeries_0
>>  ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
>>  EXC_REAL_END(machine_check, 0x200, 0x100)
>>  EXC_VIRT_NONE(0x4200, 0x100)
>> -TRAMP_REAL_BEGIN(machine_check_powernv_early)
>> -BEGIN_FTR_SECTION
>> +TRAMP_REAL_BEGIN(machine_check_common_early)
>>  	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>>  	/*
>>  	 * Register contents:
>> @@ -306,7 +305,9 @@ BEGIN_FTR_SECTION
>>  	/* Save r9 through r13 from EXMC save area to stack frame. */
>>  	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>>  	mfmsr	r11			/* get MSR value */
>> +BEGIN_FTR_SECTION
>>  	ori	r11,r11,MSR_ME		/* turn on ME bit */
>> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>>  	ori	r11,r11,MSR_RI		/* turn on RI bit */
>>  	LOAD_HANDLER(r12, machine_check_handle_early)
>>  1:	mtspr	SPRN_SRR0,r12
>> @@ -325,7 +326,6 @@ BEGIN_FTR_SECTION
>>  	andc	r11,r11,r10		/* Turn off MSR_ME */
>>  	b	1b
>>  	b	.	/* prevent speculative execution */
>> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>>  
>>  TRAMP_REAL_BEGIN(machine_check_pSeries)
>>  	.globl machine_check_fwnmi
>> @@ -333,7 +333,7 @@ machine_check_fwnmi:
>>  	SET_SCRATCH0(r13)		/* save r13 */
>>  	EXCEPTION_PROLOG_0(PACA_EXMC)
>>  BEGIN_FTR_SECTION
>> -	b	machine_check_pSeries_early
>> +	b	machine_check_common_early
>>  END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>>  machine_check_pSeries_0:
>>  	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
>> @@ -346,103 +346,6 @@ machine_check_pSeries_0:
>>  
>>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
>>  
>> -TRAMP_REAL_BEGIN(machine_check_pSeries_early)
>> -BEGIN_FTR_SECTION
>> -	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>> -	mr	r10,r1			/* Save r1 */
>> -	lhz	r11,PACA_IN_MCE(r13)
>> -	cmpwi	r11,0			/* Are we in nested machine check */
>> -	bne	0f			/* Yes, we are. */
>> -	/* First machine check entry */
>> -	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
>> -0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
>> -	addi	r11,r11,1		/* increment paca->in_mce */
>> -	sth	r11,PACA_IN_MCE(r13)
>> -	/* Limit nested MCE to level 4 to avoid stack overflow */
>> -	cmpwi	r11,MAX_MCE_DEPTH
>> -	bgt	1f			/* Check if we hit limit of 4 */
>> -	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
>> -	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
>> -	EXCEPTION_PROLOG_COMMON_1()
>> -	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>> -	EXCEPTION_PROLOG_COMMON_3(0x200)
>> -	addi	r3,r1,STACK_FRAME_OVERHEAD
>> -	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
>> -	ld	r12,_MSR(r1)
>> -	andi.	r11,r12,MSR_PR		/* See if coming from user. */
>> -	bne	2f			/* continue in V mode if we are. */
>> -
>> -	/*
>> -	 * At this point we are not sure about what context we come from.
>> -	 * We may be in the middle of swithing stack. r1 may not be valid.
>> -	 * Hence stay on emergency stack, call machine_check_exception and
>> -	 * return from the interrupt.
>> -	 * But before that, check if this is an un-recoverable exception.
>> -	 * If yes, then stay on emergency stack and panic.
>> -	 */
>> -	andi.	r11,r12,MSR_RI
>> -	beq	1f
>> -
>> -	/*
>> -	 * Check if we have successfully handled/recovered from error, if not
>> -	 * then stay on emergency stack and panic.
>> -	 */
>> -	cmpdi	r3,0		/* see if we handled MCE successfully */
>> -	beq	1f		/* if !handled then panic */
>> -
>> -	/* Stay on emergency stack and return from interrupt. */
>> -	LOAD_HANDLER(r10,mce_return)
>> -	mtspr	SPRN_SRR0,r10
>> -	ld	r10,PACAKMSR(r13)
>> -	mtspr	SPRN_SRR1,r10
>> -	RFI_TO_KERNEL
>> -	b	.
>> -
>> -1:	LOAD_HANDLER(r10,unrecover_mce)
>> -	mtspr	SPRN_SRR0,r10
>> -	ld	r10,PACAKMSR(r13)
>> -	/*
>> -	 * We are going down. But there are chances that we might get hit by
>> -	 * another MCE during panic path and we may run into unstable state
>> -	 * with no way out. Hence, turn ME bit off while going down, so that
>> -	 * when another MCE is hit during panic path, hypervisor will
>> -	 * power cycle the lpar, instead of getting into MCE loop.
>> -	 */
>> -	li	r3,MSR_ME
>> -	andc	r10,r10,r3		/* Turn off MSR_ME */
>> -	mtspr	SPRN_SRR1,r10
>> -	RFI_TO_KERNEL
>> -	b	.
>> -
>> -	/* Move original SRR0 and SRR1 into the respective regs */
>> -2:	ld	r9,_MSR(r1)
>> -	mtspr	SPRN_SRR1,r9
>> -	ld	r3,_NIP(r1)
>> -	mtspr	SPRN_SRR0,r3
>> -	ld	r9,_CTR(r1)
>> -	mtctr	r9
>> -	ld	r9,_XER(r1)
>> -	mtxer	r9
>> -	ld	r9,_LINK(r1)
>> -	mtlr	r9
>> -	REST_GPR(0, r1)
>> -	REST_8GPRS(2, r1)
>> -	REST_GPR(10, r1)
>> -	ld	r11,_CCR(r1)
>> -	mtcr	r11
>> -	/* Decrement paca->in_mce. */
>> -	lhz	r12,PACA_IN_MCE(r13)
>> -	subi	r12,r12,1
>> -	sth	r12,PACA_IN_MCE(r13)
>> -	REST_GPR(11, r1)
>> -	REST_2GPRS(12, r1)
>> -	/* restore original r1. */
>> -	ld	r1,GPR1(r1)
>> -	SET_SCRATCH0(r13)		/* save r13 */
>> -	EXCEPTION_PROLOG_0(PACA_EXMC)
>> -	b	machine_check_pSeries_0
>> -END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>> -
>>  EXC_COMMON_BEGIN(machine_check_common)
>>  	/*
>>  	 * Machine check is different because we use a different
>> @@ -541,6 +444,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>  	bl	machine_check_early
>>  	std	r3,RESULT(r1)	/* Save result */
>>  	ld	r12,_MSR(r1)
>> +BEGIN_FTR_SECTION
>> +	b	4f
>> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>>  
>>  #ifdef	CONFIG_PPC_P7_NAP
>>  	/*
>> @@ -564,10 +470,11 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>  	 */
>>  	rldicl.	r11,r12,4,63		/* See if MC hit while in HV mode. */
>>  	beq	5f
>> -	andi.	r11,r12,MSR_PR		/* See if coming from user. */
>> +4:	andi.	r11,r12,MSR_PR		/* See if coming from user. */
>>  	bne	9f			/* continue in V mode if we are. */
>>  
>>  5:
>> +BEGIN_FTR_SECTION
>>  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
>>  	/*
>>  	 * We are coming from kernel context. Check if we are coming from
>> @@ -578,6 +485,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>  	cmpwi	r11,0			/* Check if coming from guest */
>>  	bne	9f			/* continue if we are. */
>>  #endif
>> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> 
> Put these inside the ifdef?
> 
> 
>>  	/*
>>  	 * At this point we are not sure about what context we come from.
>>  	 * Queue up the MCE event and return from the interrupt.
>> @@ -611,6 +519,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>  	cmpdi	r3,0		/* see if we handled MCE successfully */
>>  
>>  	beq	1b		/* if !handled then panic */
>> +BEGIN_FTR_SECTION
>>  	/*
>>  	 * Return from MC interrupt.
>>  	 * Queue up the MCE event so that we can log it later, while
>> @@ -619,10 +528,24 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>  	bl	machine_check_queue_event
>>  	MACHINE_CHECK_HANDLER_WINDUP
>>  	RFI_TO_USER_OR_KERNEL
>> +FTR_SECTION_ELSE
>> +	/*
>> +	 * pSeries: Return from MC interrupt. Before that stay on emergency
>> +	 * stack and call machine_check_exception to log the MCE event.
>> +	 */
>> +	LOAD_HANDLER(r10,mce_return)
>> +	mtspr	SPRN_SRR0,r10
>> +	ld	r10,PACAKMSR(r13)
>> +	mtspr	SPRN_SRR1,r10
>> +	RFI_TO_KERNEL
>> +	b	.
>> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
> 
> Do you still need mce_return? Why can't you consolidate it as well? ...
> Hmm, okay so now I look back at patch 2, I don't think you should call
> machine_check_exception there. You're supposed to call
> machine_check_queue_event here and it will be handled by irq work.

machine_check_queue_event does not handle RTAS mce event. Also, we need
to call fwnmi_release_errinfo() as early as possible which is why I am
calling machine_check_exception() in mce_return path for pSeries.
Otherwise if we get another MCE before calling fwnmi_release_errinfo()
then lpar will get rebooted without any logs getting printed.

Thanks,
-Mahesh.
Nicholas Piggin Aug. 23, 2018, 9:02 a.m. UTC | #3
On Thu, 23 Aug 2018 14:13:13 +0530
Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> On 08/20/2018 05:04 PM, Nicholas Piggin wrote:
> > On Sun, 19 Aug 2018 22:38:39 +0530
> > Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> >   
> >> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> >>
> >> Now that other platforms also implements real mode mce handler,
> >> lets consolidate the code by sharing existing powernv machine check
> >> early code. Rename machine_check_powernv_early to
> >> machine_check_common_early and reuse the code.
> >>
> >> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> >> ---
> >>  arch/powerpc/kernel/exceptions-64s.S |  155 ++++++----------------------------
> >>  1 file changed, 28 insertions(+), 127 deletions(-)
> >>
> >> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> >> index 12f056179112..2f85a7baf026 100644
> >> --- a/arch/powerpc/kernel/exceptions-64s.S
> >> +++ b/arch/powerpc/kernel/exceptions-64s.S
> >> @@ -243,14 +243,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
> >>  	SET_SCRATCH0(r13)		/* save r13 */
> >>  	EXCEPTION_PROLOG_0(PACA_EXMC)
> >>  BEGIN_FTR_SECTION
> >> -	b	machine_check_powernv_early
> >> +	b	machine_check_common_early
> >>  FTR_SECTION_ELSE
> >>  	b	machine_check_pSeries_0
> >>  ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
> >>  EXC_REAL_END(machine_check, 0x200, 0x100)
> >>  EXC_VIRT_NONE(0x4200, 0x100)
> >> -TRAMP_REAL_BEGIN(machine_check_powernv_early)
> >> -BEGIN_FTR_SECTION
> >> +TRAMP_REAL_BEGIN(machine_check_common_early)
> >>  	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> >>  	/*
> >>  	 * Register contents:
> >> @@ -306,7 +305,9 @@ BEGIN_FTR_SECTION
> >>  	/* Save r9 through r13 from EXMC save area to stack frame. */
> >>  	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> >>  	mfmsr	r11			/* get MSR value */
> >> +BEGIN_FTR_SECTION
> >>  	ori	r11,r11,MSR_ME		/* turn on ME bit */
> >> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> >>  	ori	r11,r11,MSR_RI		/* turn on RI bit */
> >>  	LOAD_HANDLER(r12, machine_check_handle_early)
> >>  1:	mtspr	SPRN_SRR0,r12
> >> @@ -325,7 +326,6 @@ BEGIN_FTR_SECTION
> >>  	andc	r11,r11,r10		/* Turn off MSR_ME */
> >>  	b	1b
> >>  	b	.	/* prevent speculative execution */
> >> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> >>  
> >>  TRAMP_REAL_BEGIN(machine_check_pSeries)
> >>  	.globl machine_check_fwnmi
> >> @@ -333,7 +333,7 @@ machine_check_fwnmi:
> >>  	SET_SCRATCH0(r13)		/* save r13 */
> >>  	EXCEPTION_PROLOG_0(PACA_EXMC)
> >>  BEGIN_FTR_SECTION
> >> -	b	machine_check_pSeries_early
> >> +	b	machine_check_common_early
> >>  END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> >>  machine_check_pSeries_0:
> >>  	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
> >> @@ -346,103 +346,6 @@ machine_check_pSeries_0:
> >>  
> >>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
> >>  
> >> -TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> >> -BEGIN_FTR_SECTION
> >> -	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> >> -	mr	r10,r1			/* Save r1 */
> >> -	lhz	r11,PACA_IN_MCE(r13)
> >> -	cmpwi	r11,0			/* Are we in nested machine check */
> >> -	bne	0f			/* Yes, we are. */
> >> -	/* First machine check entry */
> >> -	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
> >> -0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
> >> -	addi	r11,r11,1		/* increment paca->in_mce */
> >> -	sth	r11,PACA_IN_MCE(r13)
> >> -	/* Limit nested MCE to level 4 to avoid stack overflow */
> >> -	cmpwi	r11,MAX_MCE_DEPTH
> >> -	bgt	1f			/* Check if we hit limit of 4 */
> >> -	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
> >> -	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
> >> -	EXCEPTION_PROLOG_COMMON_1()
> >> -	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> >> -	EXCEPTION_PROLOG_COMMON_3(0x200)
> >> -	addi	r3,r1,STACK_FRAME_OVERHEAD
> >> -	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
> >> -	ld	r12,_MSR(r1)
> >> -	andi.	r11,r12,MSR_PR		/* See if coming from user. */
> >> -	bne	2f			/* continue in V mode if we are. */
> >> -
> >> -	/*
> >> -	 * At this point we are not sure about what context we come from.
> >> -	 * We may be in the middle of swithing stack. r1 may not be valid.
> >> -	 * Hence stay on emergency stack, call machine_check_exception and
> >> -	 * return from the interrupt.
> >> -	 * But before that, check if this is an un-recoverable exception.
> >> -	 * If yes, then stay on emergency stack and panic.
> >> -	 */
> >> -	andi.	r11,r12,MSR_RI
> >> -	beq	1f
> >> -
> >> -	/*
> >> -	 * Check if we have successfully handled/recovered from error, if not
> >> -	 * then stay on emergency stack and panic.
> >> -	 */
> >> -	cmpdi	r3,0		/* see if we handled MCE successfully */
> >> -	beq	1f		/* if !handled then panic */
> >> -
> >> -	/* Stay on emergency stack and return from interrupt. */
> >> -	LOAD_HANDLER(r10,mce_return)
> >> -	mtspr	SPRN_SRR0,r10
> >> -	ld	r10,PACAKMSR(r13)
> >> -	mtspr	SPRN_SRR1,r10
> >> -	RFI_TO_KERNEL
> >> -	b	.
> >> -
> >> -1:	LOAD_HANDLER(r10,unrecover_mce)
> >> -	mtspr	SPRN_SRR0,r10
> >> -	ld	r10,PACAKMSR(r13)
> >> -	/*
> >> -	 * We are going down. But there are chances that we might get hit by
> >> -	 * another MCE during panic path and we may run into unstable state
> >> -	 * with no way out. Hence, turn ME bit off while going down, so that
> >> -	 * when another MCE is hit during panic path, hypervisor will
> >> -	 * power cycle the lpar, instead of getting into MCE loop.
> >> -	 */
> >> -	li	r3,MSR_ME
> >> -	andc	r10,r10,r3		/* Turn off MSR_ME */
> >> -	mtspr	SPRN_SRR1,r10
> >> -	RFI_TO_KERNEL
> >> -	b	.
> >> -
> >> -	/* Move original SRR0 and SRR1 into the respective regs */
> >> -2:	ld	r9,_MSR(r1)
> >> -	mtspr	SPRN_SRR1,r9
> >> -	ld	r3,_NIP(r1)
> >> -	mtspr	SPRN_SRR0,r3
> >> -	ld	r9,_CTR(r1)
> >> -	mtctr	r9
> >> -	ld	r9,_XER(r1)
> >> -	mtxer	r9
> >> -	ld	r9,_LINK(r1)
> >> -	mtlr	r9
> >> -	REST_GPR(0, r1)
> >> -	REST_8GPRS(2, r1)
> >> -	REST_GPR(10, r1)
> >> -	ld	r11,_CCR(r1)
> >> -	mtcr	r11
> >> -	/* Decrement paca->in_mce. */
> >> -	lhz	r12,PACA_IN_MCE(r13)
> >> -	subi	r12,r12,1
> >> -	sth	r12,PACA_IN_MCE(r13)
> >> -	REST_GPR(11, r1)
> >> -	REST_2GPRS(12, r1)
> >> -	/* restore original r1. */
> >> -	ld	r1,GPR1(r1)
> >> -	SET_SCRATCH0(r13)		/* save r13 */
> >> -	EXCEPTION_PROLOG_0(PACA_EXMC)
> >> -	b	machine_check_pSeries_0
> >> -END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> >> -
> >>  EXC_COMMON_BEGIN(machine_check_common)
> >>  	/*
> >>  	 * Machine check is different because we use a different
> >> @@ -541,6 +444,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
> >>  	bl	machine_check_early
> >>  	std	r3,RESULT(r1)	/* Save result */
> >>  	ld	r12,_MSR(r1)
> >> +BEGIN_FTR_SECTION
> >> +	b	4f
> >> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> >>  
> >>  #ifdef	CONFIG_PPC_P7_NAP
> >>  	/*
> >> @@ -564,10 +470,11 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
> >>  	 */
> >>  	rldicl.	r11,r12,4,63		/* See if MC hit while in HV mode. */
> >>  	beq	5f
> >> -	andi.	r11,r12,MSR_PR		/* See if coming from user. */
> >> +4:	andi.	r11,r12,MSR_PR		/* See if coming from user. */
> >>  	bne	9f			/* continue in V mode if we are. */
> >>  
> >>  5:
> >> +BEGIN_FTR_SECTION
> >>  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
> >>  	/*
> >>  	 * We are coming from kernel context. Check if we are coming from
> >> @@ -578,6 +485,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
> >>  	cmpwi	r11,0			/* Check if coming from guest */
> >>  	bne	9f			/* continue if we are. */
> >>  #endif
> >> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)  
> > 
> > Put these inside the ifdef?
> > 
> >   
> >>  	/*
> >>  	 * At this point we are not sure about what context we come from.
> >>  	 * Queue up the MCE event and return from the interrupt.
> >> @@ -611,6 +519,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
> >>  	cmpdi	r3,0		/* see if we handled MCE successfully */
> >>  
> >>  	beq	1b		/* if !handled then panic */
> >> +BEGIN_FTR_SECTION
> >>  	/*
> >>  	 * Return from MC interrupt.
> >>  	 * Queue up the MCE event so that we can log it later, while
> >> @@ -619,10 +528,24 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
> >>  	bl	machine_check_queue_event
> >>  	MACHINE_CHECK_HANDLER_WINDUP
> >>  	RFI_TO_USER_OR_KERNEL
> >> +FTR_SECTION_ELSE
> >> +	/*
> >> +	 * pSeries: Return from MC interrupt. Before that stay on emergency
> >> +	 * stack and call machine_check_exception to log the MCE event.
> >> +	 */
> >> +	LOAD_HANDLER(r10,mce_return)
> >> +	mtspr	SPRN_SRR0,r10
> >> +	ld	r10,PACAKMSR(r13)
> >> +	mtspr	SPRN_SRR1,r10
> >> +	RFI_TO_KERNEL
> >> +	b	.
> >> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)  
> > 
> > Do you still need mce_return? Why can't you consolidate it as well? ...
> > Hmm, okay so now I look back at patch 2, I don't think you should call
> > machine_check_exception there. You're supposed to call
> > machine_check_queue_event here and it will be handled by irq work.  
> 
> machine_check_queue_event does not handle RTAS mce event.

Yes it would need a bit of work.

> Also, we need
> to call fwnmi_release_errinfo() as early as possible which is why I am
> calling machine_check_exception() in mce_return path for pSeries.
> Otherwise if we get another MCE before calling fwnmi_release_errinfo()
> then lpar will get rebooted without any logs getting printed.

I think you can call that in your early handler, but then defer
the printing to the irq work.

Although hmm, maybe that's less of a problem now we do nmi_enter
in machine check exception so I think printk will use an NMI safe
buffer.

We have to be careful actually of soft irq state if we take a
machine check in an un-reconciled state or in the middle of
the irq replay code I'm not actually sure we do the right thing,
but that would be a bug in existing code too. And we definitely
have MSR[RI] vs DAR/DSISR bugs in existing code, sigh.

I don't know... maybe just push what you have and we'll try to do
some more fixes and cleanups on top of that.

Thanks,
Nick
Mahesh J Salgaonkar Aug. 27, 2018, 10:32 a.m. UTC | #4
On 08/23/2018 02:32 PM, Nicholas Piggin wrote:
> On Thu, 23 Aug 2018 14:13:13 +0530
> Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
>> On 08/20/2018 05:04 PM, Nicholas Piggin wrote:
>>> On Sun, 19 Aug 2018 22:38:39 +0530
>>> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
>>>   
>>>> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>>>>
>>>> Now that other platforms also implements real mode mce handler,
>>>> lets consolidate the code by sharing existing powernv machine check
>>>> early code. Rename machine_check_powernv_early to
>>>> machine_check_common_early and reuse the code.
>>>>
>>>> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>>>> ---
>>>>  arch/powerpc/kernel/exceptions-64s.S |  155 ++++++----------------------------
>>>>  1 file changed, 28 insertions(+), 127 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
>>>> index 12f056179112..2f85a7baf026 100644
>>>> --- a/arch/powerpc/kernel/exceptions-64s.S
>>>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>>>> @@ -243,14 +243,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
>>>>  	SET_SCRATCH0(r13)		/* save r13 */
>>>>  	EXCEPTION_PROLOG_0(PACA_EXMC)
>>>>  BEGIN_FTR_SECTION
>>>> -	b	machine_check_powernv_early
>>>> +	b	machine_check_common_early
>>>>  FTR_SECTION_ELSE
>>>>  	b	machine_check_pSeries_0
>>>>  ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
>>>>  EXC_REAL_END(machine_check, 0x200, 0x100)
>>>>  EXC_VIRT_NONE(0x4200, 0x100)
>>>> -TRAMP_REAL_BEGIN(machine_check_powernv_early)
>>>> -BEGIN_FTR_SECTION
>>>> +TRAMP_REAL_BEGIN(machine_check_common_early)
>>>>  	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>>>>  	/*
>>>>  	 * Register contents:
>>>> @@ -306,7 +305,9 @@ BEGIN_FTR_SECTION
>>>>  	/* Save r9 through r13 from EXMC save area to stack frame. */
>>>>  	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>>>>  	mfmsr	r11			/* get MSR value */
>>>> +BEGIN_FTR_SECTION
>>>>  	ori	r11,r11,MSR_ME		/* turn on ME bit */
>>>> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>>>>  	ori	r11,r11,MSR_RI		/* turn on RI bit */
>>>>  	LOAD_HANDLER(r12, machine_check_handle_early)
>>>>  1:	mtspr	SPRN_SRR0,r12
>>>> @@ -325,7 +326,6 @@ BEGIN_FTR_SECTION
>>>>  	andc	r11,r11,r10		/* Turn off MSR_ME */
>>>>  	b	1b
>>>>  	b	.	/* prevent speculative execution */
>>>> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>>>>  
>>>>  TRAMP_REAL_BEGIN(machine_check_pSeries)
>>>>  	.globl machine_check_fwnmi
>>>> @@ -333,7 +333,7 @@ machine_check_fwnmi:
>>>>  	SET_SCRATCH0(r13)		/* save r13 */
>>>>  	EXCEPTION_PROLOG_0(PACA_EXMC)
>>>>  BEGIN_FTR_SECTION
>>>> -	b	machine_check_pSeries_early
>>>> +	b	machine_check_common_early
>>>>  END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>>>>  machine_check_pSeries_0:
>>>>  	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
>>>> @@ -346,103 +346,6 @@ machine_check_pSeries_0:
>>>>  
>>>>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
>>>>  
>>>> -TRAMP_REAL_BEGIN(machine_check_pSeries_early)
>>>> -BEGIN_FTR_SECTION
>>>> -	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>>>> -	mr	r10,r1			/* Save r1 */
>>>> -	lhz	r11,PACA_IN_MCE(r13)
>>>> -	cmpwi	r11,0			/* Are we in nested machine check */
>>>> -	bne	0f			/* Yes, we are. */
>>>> -	/* First machine check entry */
>>>> -	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
>>>> -0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
>>>> -	addi	r11,r11,1		/* increment paca->in_mce */
>>>> -	sth	r11,PACA_IN_MCE(r13)
>>>> -	/* Limit nested MCE to level 4 to avoid stack overflow */
>>>> -	cmpwi	r11,MAX_MCE_DEPTH
>>>> -	bgt	1f			/* Check if we hit limit of 4 */
>>>> -	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
>>>> -	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
>>>> -	EXCEPTION_PROLOG_COMMON_1()
>>>> -	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>>>> -	EXCEPTION_PROLOG_COMMON_3(0x200)
>>>> -	addi	r3,r1,STACK_FRAME_OVERHEAD
>>>> -	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
>>>> -	ld	r12,_MSR(r1)
>>>> -	andi.	r11,r12,MSR_PR		/* See if coming from user. */
>>>> -	bne	2f			/* continue in V mode if we are. */
>>>> -
>>>> -	/*
>>>> -	 * At this point we are not sure about what context we come from.
>>>> -	 * We may be in the middle of swithing stack. r1 may not be valid.
>>>> -	 * Hence stay on emergency stack, call machine_check_exception and
>>>> -	 * return from the interrupt.
>>>> -	 * But before that, check if this is an un-recoverable exception.
>>>> -	 * If yes, then stay on emergency stack and panic.
>>>> -	 */
>>>> -	andi.	r11,r12,MSR_RI
>>>> -	beq	1f
>>>> -
>>>> -	/*
>>>> -	 * Check if we have successfully handled/recovered from error, if not
>>>> -	 * then stay on emergency stack and panic.
>>>> -	 */
>>>> -	cmpdi	r3,0		/* see if we handled MCE successfully */
>>>> -	beq	1f		/* if !handled then panic */
>>>> -
>>>> -	/* Stay on emergency stack and return from interrupt. */
>>>> -	LOAD_HANDLER(r10,mce_return)
>>>> -	mtspr	SPRN_SRR0,r10
>>>> -	ld	r10,PACAKMSR(r13)
>>>> -	mtspr	SPRN_SRR1,r10
>>>> -	RFI_TO_KERNEL
>>>> -	b	.
>>>> -
>>>> -1:	LOAD_HANDLER(r10,unrecover_mce)
>>>> -	mtspr	SPRN_SRR0,r10
>>>> -	ld	r10,PACAKMSR(r13)
>>>> -	/*
>>>> -	 * We are going down. But there are chances that we might get hit by
>>>> -	 * another MCE during panic path and we may run into unstable state
>>>> -	 * with no way out. Hence, turn ME bit off while going down, so that
>>>> -	 * when another MCE is hit during panic path, hypervisor will
>>>> -	 * power cycle the lpar, instead of getting into MCE loop.
>>>> -	 */
>>>> -	li	r3,MSR_ME
>>>> -	andc	r10,r10,r3		/* Turn off MSR_ME */
>>>> -	mtspr	SPRN_SRR1,r10
>>>> -	RFI_TO_KERNEL
>>>> -	b	.
>>>> -
>>>> -	/* Move original SRR0 and SRR1 into the respective regs */
>>>> -2:	ld	r9,_MSR(r1)
>>>> -	mtspr	SPRN_SRR1,r9
>>>> -	ld	r3,_NIP(r1)
>>>> -	mtspr	SPRN_SRR0,r3
>>>> -	ld	r9,_CTR(r1)
>>>> -	mtctr	r9
>>>> -	ld	r9,_XER(r1)
>>>> -	mtxer	r9
>>>> -	ld	r9,_LINK(r1)
>>>> -	mtlr	r9
>>>> -	REST_GPR(0, r1)
>>>> -	REST_8GPRS(2, r1)
>>>> -	REST_GPR(10, r1)
>>>> -	ld	r11,_CCR(r1)
>>>> -	mtcr	r11
>>>> -	/* Decrement paca->in_mce. */
>>>> -	lhz	r12,PACA_IN_MCE(r13)
>>>> -	subi	r12,r12,1
>>>> -	sth	r12,PACA_IN_MCE(r13)
>>>> -	REST_GPR(11, r1)
>>>> -	REST_2GPRS(12, r1)
>>>> -	/* restore original r1. */
>>>> -	ld	r1,GPR1(r1)
>>>> -	SET_SCRATCH0(r13)		/* save r13 */
>>>> -	EXCEPTION_PROLOG_0(PACA_EXMC)
>>>> -	b	machine_check_pSeries_0
>>>> -END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>>>> -
>>>>  EXC_COMMON_BEGIN(machine_check_common)
>>>>  	/*
>>>>  	 * Machine check is different because we use a different
>>>> @@ -541,6 +444,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>>>  	bl	machine_check_early
>>>>  	std	r3,RESULT(r1)	/* Save result */
>>>>  	ld	r12,_MSR(r1)
>>>> +BEGIN_FTR_SECTION
>>>> +	b	4f
>>>> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>>>>  
>>>>  #ifdef	CONFIG_PPC_P7_NAP
>>>>  	/*
>>>> @@ -564,10 +470,11 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>>>  	 */
>>>>  	rldicl.	r11,r12,4,63		/* See if MC hit while in HV mode. */
>>>>  	beq	5f
>>>> -	andi.	r11,r12,MSR_PR		/* See if coming from user. */
>>>> +4:	andi.	r11,r12,MSR_PR		/* See if coming from user. */
>>>>  	bne	9f			/* continue in V mode if we are. */
>>>>  
>>>>  5:
>>>> +BEGIN_FTR_SECTION
>>>>  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
>>>>  	/*
>>>>  	 * We are coming from kernel context. Check if we are coming from
>>>> @@ -578,6 +485,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>>>  	cmpwi	r11,0			/* Check if coming from guest */
>>>>  	bne	9f			/* continue if we are. */
>>>>  #endif
>>>> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)  
>>>
>>> Put these inside the ifdef?
>>>
>>>   
>>>>  	/*
>>>>  	 * At this point we are not sure about what context we come from.
>>>>  	 * Queue up the MCE event and return from the interrupt.
>>>> @@ -611,6 +519,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>>>  	cmpdi	r3,0		/* see if we handled MCE successfully */
>>>>  
>>>>  	beq	1b		/* if !handled then panic */
>>>> +BEGIN_FTR_SECTION
>>>>  	/*
>>>>  	 * Return from MC interrupt.
>>>>  	 * Queue up the MCE event so that we can log it later, while
>>>> @@ -619,10 +528,24 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>>>>  	bl	machine_check_queue_event
>>>>  	MACHINE_CHECK_HANDLER_WINDUP
>>>>  	RFI_TO_USER_OR_KERNEL
>>>> +FTR_SECTION_ELSE
>>>> +	/*
>>>> +	 * pSeries: Return from MC interrupt. Before that stay on emergency
>>>> +	 * stack and call machine_check_exception to log the MCE event.
>>>> +	 */
>>>> +	LOAD_HANDLER(r10,mce_return)
>>>> +	mtspr	SPRN_SRR0,r10
>>>> +	ld	r10,PACAKMSR(r13)
>>>> +	mtspr	SPRN_SRR1,r10
>>>> +	RFI_TO_KERNEL
>>>> +	b	.
>>>> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)  
>>>
>>> Do you still need mce_return? Why can't you consolidate it as well? ...
>>> Hmm, okay so now I look back at patch 2, I don't think you should call
>>> machine_check_exception there. You're supposed to call
>>> machine_check_queue_event here and it will be handled by irq work.  
>>
>> machine_check_queue_event does not handle RTAS mce event.
> 
> Yes it would need a bit of work.
> 
>> Also, we need
>> to call fwnmi_release_errinfo() as early as possible which is why I am
>> calling machine_check_exception() in mce_return path for pSeries.
>> Otherwise if we get another MCE before calling fwnmi_release_errinfo()
>> then lpar will get rebooted without any logs getting printed.
> 
> I think you can call that in your early handler, but then defer
> the printing to the irq work.
> 
> Although hmm, maybe that's less of a problem now we do nmi_enter
> in machine check exception so I think printk will use an NMI safe
> buffer.
> 
> We have to be careful actually of soft irq state if we take a
> machine check in an un-reconciled state or in the middle of
> the irq replay code I'm not actually sure we do the right thing,
> but that would be a bug in existing code too. And we definitely
> have MSR[RI] vs DAR/DSISR bugs in existing code, sigh.
> 
> I don't know... maybe just push what you have and we'll try to do
> some more fixes and cleanups on top of that.

Sure. I will respin the next version addressing your other minor
comments. Will work on more improvements as separate change.

Thanks,
-Mahesh.
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 12f056179112..2f85a7baf026 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -243,14 +243,13 @@  EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
 	SET_SCRATCH0(r13)		/* save r13 */
 	EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
-	b	machine_check_powernv_early
+	b	machine_check_common_early
 FTR_SECTION_ELSE
 	b	machine_check_pSeries_0
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 EXC_REAL_END(machine_check, 0x200, 0x100)
 EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_powernv_early)
-BEGIN_FTR_SECTION
+TRAMP_REAL_BEGIN(machine_check_common_early)
 	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
 	/*
 	 * Register contents:
@@ -306,7 +305,9 @@  BEGIN_FTR_SECTION
 	/* Save r9 through r13 from EXMC save area to stack frame. */
 	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
 	mfmsr	r11			/* get MSR value */
+BEGIN_FTR_SECTION
 	ori	r11,r11,MSR_ME		/* turn on ME bit */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	ori	r11,r11,MSR_RI		/* turn on RI bit */
 	LOAD_HANDLER(r12, machine_check_handle_early)
 1:	mtspr	SPRN_SRR0,r12
@@ -325,7 +326,6 @@  BEGIN_FTR_SECTION
 	andc	r11,r11,r10		/* Turn off MSR_ME */
 	b	1b
 	b	.	/* prevent speculative execution */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 
 TRAMP_REAL_BEGIN(machine_check_pSeries)
 	.globl machine_check_fwnmi
@@ -333,7 +333,7 @@  machine_check_fwnmi:
 	SET_SCRATCH0(r13)		/* save r13 */
 	EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
-	b	machine_check_pSeries_early
+	b	machine_check_common_early
 END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 machine_check_pSeries_0:
 	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
@@ -346,103 +346,6 @@  machine_check_pSeries_0:
 
 TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
 
-TRAMP_REAL_BEGIN(machine_check_pSeries_early)
-BEGIN_FTR_SECTION
-	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
-	mr	r10,r1			/* Save r1 */
-	lhz	r11,PACA_IN_MCE(r13)
-	cmpwi	r11,0			/* Are we in nested machine check */
-	bne	0f			/* Yes, we are. */
-	/* First machine check entry */
-	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
-0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
-	addi	r11,r11,1		/* increment paca->in_mce */
-	sth	r11,PACA_IN_MCE(r13)
-	/* Limit nested MCE to level 4 to avoid stack overflow */
-	cmpwi	r11,MAX_MCE_DEPTH
-	bgt	1f			/* Check if we hit limit of 4 */
-	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
-	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
-	EXCEPTION_PROLOG_COMMON_1()
-	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
-	EXCEPTION_PROLOG_COMMON_3(0x200)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
-	ld	r12,_MSR(r1)
-	andi.	r11,r12,MSR_PR		/* See if coming from user. */
-	bne	2f			/* continue in V mode if we are. */
-
-	/*
-	 * At this point we are not sure about what context we come from.
-	 * We may be in the middle of swithing stack. r1 may not be valid.
-	 * Hence stay on emergency stack, call machine_check_exception and
-	 * return from the interrupt.
-	 * But before that, check if this is an un-recoverable exception.
-	 * If yes, then stay on emergency stack and panic.
-	 */
-	andi.	r11,r12,MSR_RI
-	beq	1f
-
-	/*
-	 * Check if we have successfully handled/recovered from error, if not
-	 * then stay on emergency stack and panic.
-	 */
-	cmpdi	r3,0		/* see if we handled MCE successfully */
-	beq	1f		/* if !handled then panic */
-
-	/* Stay on emergency stack and return from interrupt. */
-	LOAD_HANDLER(r10,mce_return)
-	mtspr	SPRN_SRR0,r10
-	ld	r10,PACAKMSR(r13)
-	mtspr	SPRN_SRR1,r10
-	RFI_TO_KERNEL
-	b	.
-
-1:	LOAD_HANDLER(r10,unrecover_mce)
-	mtspr	SPRN_SRR0,r10
-	ld	r10,PACAKMSR(r13)
-	/*
-	 * We are going down. But there are chances that we might get hit by
-	 * another MCE during panic path and we may run into unstable state
-	 * with no way out. Hence, turn ME bit off while going down, so that
-	 * when another MCE is hit during panic path, hypervisor will
-	 * power cycle the lpar, instead of getting into MCE loop.
-	 */
-	li	r3,MSR_ME
-	andc	r10,r10,r3		/* Turn off MSR_ME */
-	mtspr	SPRN_SRR1,r10
-	RFI_TO_KERNEL
-	b	.
-
-	/* Move original SRR0 and SRR1 into the respective regs */
-2:	ld	r9,_MSR(r1)
-	mtspr	SPRN_SRR1,r9
-	ld	r3,_NIP(r1)
-	mtspr	SPRN_SRR0,r3
-	ld	r9,_CTR(r1)
-	mtctr	r9
-	ld	r9,_XER(r1)
-	mtxer	r9
-	ld	r9,_LINK(r1)
-	mtlr	r9
-	REST_GPR(0, r1)
-	REST_8GPRS(2, r1)
-	REST_GPR(10, r1)
-	ld	r11,_CCR(r1)
-	mtcr	r11
-	/* Decrement paca->in_mce. */
-	lhz	r12,PACA_IN_MCE(r13)
-	subi	r12,r12,1
-	sth	r12,PACA_IN_MCE(r13)
-	REST_GPR(11, r1)
-	REST_2GPRS(12, r1)
-	/* restore original r1. */
-	ld	r1,GPR1(r1)
-	SET_SCRATCH0(r13)		/* save r13 */
-	EXCEPTION_PROLOG_0(PACA_EXMC)
-	b	machine_check_pSeries_0
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-
 EXC_COMMON_BEGIN(machine_check_common)
 	/*
 	 * Machine check is different because we use a different
@@ -541,6 +444,9 @@  EXC_COMMON_BEGIN(machine_check_handle_early)
 	bl	machine_check_early
 	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
+BEGIN_FTR_SECTION
+	b	4f
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 
 #ifdef	CONFIG_PPC_P7_NAP
 	/*
@@ -564,10 +470,11 @@  EXC_COMMON_BEGIN(machine_check_handle_early)
 	 */
 	rldicl.	r11,r12,4,63		/* See if MC hit while in HV mode. */
 	beq	5f
-	andi.	r11,r12,MSR_PR		/* See if coming from user. */
+4:	andi.	r11,r12,MSR_PR		/* See if coming from user. */
 	bne	9f			/* continue in V mode if we are. */
 
 5:
+BEGIN_FTR_SECTION
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 	/*
 	 * We are coming from kernel context. Check if we are coming from
@@ -578,6 +485,7 @@  EXC_COMMON_BEGIN(machine_check_handle_early)
 	cmpwi	r11,0			/* Check if coming from guest */
 	bne	9f			/* continue if we are. */
 #endif
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	/*
 	 * At this point we are not sure about what context we come from.
 	 * Queue up the MCE event and return from the interrupt.
@@ -611,6 +519,7 @@  EXC_COMMON_BEGIN(machine_check_handle_early)
 	cmpdi	r3,0		/* see if we handled MCE successfully */
 
 	beq	1b		/* if !handled then panic */
+BEGIN_FTR_SECTION
 	/*
 	 * Return from MC interrupt.
 	 * Queue up the MCE event so that we can log it later, while
@@ -619,10 +528,24 @@  EXC_COMMON_BEGIN(machine_check_handle_early)
 	bl	machine_check_queue_event
 	MACHINE_CHECK_HANDLER_WINDUP
 	RFI_TO_USER_OR_KERNEL
+FTR_SECTION_ELSE
+	/*
+	 * pSeries: Return from MC interrupt. Before that stay on emergency
+	 * stack and call machine_check_exception to log the MCE event.
+	 */
+	LOAD_HANDLER(r10,mce_return)
+	mtspr	SPRN_SRR0,r10
+	ld	r10,PACAKMSR(r13)
+	mtspr	SPRN_SRR1,r10
+	RFI_TO_KERNEL
+	b	.
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 9:
 	/* Deliver the machine check to host kernel in V mode. */
 	MACHINE_CHECK_HANDLER_WINDUP
-	b	machine_check_pSeries
+	SET_SCRATCH0(r13)		/* save r13 */
+	EXCEPTION_PROLOG_0(PACA_EXMC)
+	b	machine_check_pSeries_0
 
 EXC_COMMON_BEGIN(unrecover_mce)
 	/* Invoke machine_check_exception to print MCE event and panic. */
@@ -640,29 +563,7 @@  EXC_COMMON_BEGIN(mce_return)
 	/* Invoke machine_check_exception to print MCE event and return. */
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	machine_check_exception
-	ld	r9,_MSR(r1)
-	mtspr	SPRN_SRR1,r9
-	ld	r3,_NIP(r1)
-	mtspr	SPRN_SRR0,r3
-	ld	r9,_CTR(r1)
-	mtctr	r9
-	ld	r9,_XER(r1)
-	mtxer	r9
-	ld	r9,_LINK(r1)
-	mtlr	r9
-	REST_GPR(0, r1)
-	REST_8GPRS(2, r1)
-	REST_GPR(10, r1)
-	ld	r11,_CCR(r1)
-	mtcr	r11
-	/* Decrement paca->in_mce. */
-	lhz	r12,PACA_IN_MCE(r13)
-	subi	r12,r12,1
-	sth	r12,PACA_IN_MCE(r13)
-	REST_GPR(11, r1)
-	REST_2GPRS(12, r1)
-	/* restore original r1. */
-	ld	r1,GPR1(r1)
+	MACHINE_CHECK_HANDLER_WINDUP
 	RFI_TO_KERNEL
 	b	.