diff mbox series

[v2,08/14] powerpc/pseries/ras: fwnmi sreset should not interlock

Message ID 20200403132622.130394-9-npiggin@gmail.com (mailing list archive)
State Superseded
Headers show
Series powerpc/64: machine check and system reset fixes | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (d0c12846a3a24cd6d68b608c866712bc7e471634)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 1 warnings, 0 checks, 71 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Nicholas Piggin April 3, 2020, 1:26 p.m. UTC
PAPR does not specify that fwnmi sreset should be interlocked, and
PowerVM (and therefore now QEMU) do not require it.

These "ibm,nmi-interlock" calls are ignored by firmware, but there
is a possibility that the sreset could have interrupted a machine
check and release the machine check's interlock too early, corrupting
it if another machine check came in.

This is an extremely rare case, but it should be fixed for clarity
and reducing the code executed in the sreset path. Firmware also
does not provide error information for the sreset case to look at, so
remove that comment.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/platforms/pseries/ras.c | 48 ++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 14 deletions(-)

Comments

Christophe Leroy April 3, 2020, 2:35 p.m. UTC | #1
Le 03/04/2020 à 15:26, Nicholas Piggin a écrit :
> PAPR does not specify that fwnmi sreset should be interlocked, and
> PowerVM (and therefore now QEMU) do not require it.
> 
> These "ibm,nmi-interlock" calls are ignored by firmware, but there
> is a possibility that the sreset could have interrupted a machine
> check and release the machine check's interlock too early, corrupting
> it if another machine check came in.
> 
> This is an extremely rare case, but it should be fixed for clarity
> and reducing the code executed in the sreset path. Firmware also
> does not provide error information for the sreset case to look at, so
> remove that comment.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/platforms/pseries/ras.c | 48 ++++++++++++++++++++--------
>   1 file changed, 34 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
> index a40598e6e525..833ae34b7fec 100644
> --- a/arch/powerpc/platforms/pseries/ras.c
> +++ b/arch/powerpc/platforms/pseries/ras.c
> @@ -406,6 +406,20 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void)
>   	return (struct rtas_error_log *)local_paca->mce_data_buf;
>   }
>   
> +static unsigned long *fwnmi_get_savep(struct pt_regs *regs)
> +{
> +	unsigned long savep_ra;
> +
> +	/* Mask top two bits */
> +	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
> +	if (!VALID_FWNMI_BUFFER(savep_ra)) {
> +		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);

Can't you use pr_err() instead ?

> +		return NULL;
> +	}
> +
> +	return __va(savep_ra);
> +}
> +
>   /*
>    * Get the error information for errors coming through the
>    * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
> @@ -423,20 +437,15 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void)
>    */
>   static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
>   {
> -	unsigned long savep_ra;
>   	unsigned long *savep;
>   	struct rtas_error_log *h;
>   
> -	/* Mask top two bits */
> -	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
> -
> -	if (!VALID_FWNMI_BUFFER(savep_ra)) {
> -		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
> +	savep = fwnmi_get_savep(regs);
> +	if (!savep)
>   		return NULL;
> -	}
>   
> -	savep = __va(savep_ra);
> -	regs->gpr[3] = be64_to_cpu(savep[0]);	/* restore original r3 */
> +	/* restore original r3 */
> +	regs->gpr[3] = be64_to_cpu(savep[0]);

Is it needed to change the location of the comment ?

>   
>   	h = (struct rtas_error_log *)&savep[1];
>   	/* Use the per cpu buffer from paca to store rtas error log */
> @@ -483,11 +492,22 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
>   #endif
>   
>   	if (fwnmi_active) {
> -		struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
> -		if (errhdr) {
> -			/* XXX Should look at FWNMI information */
> -		}
> -		fwnmi_release_errinfo();
> +		unsigned long *savep;
> +
> +		/*
> +		 * Firmware (PowerVM and KVM) saves r3 to a save area like
> +		 * machine check, which is not exactly what PAPR (2.9)
> +		 * suggests but there is no way to detect otherwise, so this
> +		 * is the interface now.
> +		 *
> +		 * System resets do not save any error log or require an
> +		 * "ibm,nmi-interlock" rtas call to release.
> +		 */
> +
> +		savep = fwnmi_get_savep(regs);
> +		/* restore original r3 */
> +		if (savep)
> +			regs->gpr[3] = be64_to_cpu(savep[0]);
>   	}
>   
>   	if (smp_handle_nmi_ipi(regs))
> 

Christophe
Nicholas Piggin April 7, 2020, 4:26 a.m. UTC | #2
Christophe Leroy's on April 4, 2020 12:35 am:
> 
> 
> Le 03/04/2020 à 15:26, Nicholas Piggin a écrit :
>> PAPR does not specify that fwnmi sreset should be interlocked, and
>> PowerVM (and therefore now QEMU) do not require it.
>> 
>> These "ibm,nmi-interlock" calls are ignored by firmware, but there
>> is a possibility that the sreset could have interrupted a machine
>> check and release the machine check's interlock too early, corrupting
>> it if another machine check came in.
>> 
>> This is an extremely rare case, but it should be fixed for clarity
>> and reducing the code executed in the sreset path. Firmware also
>> does not provide error information for the sreset case to look at, so
>> remove that comment.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/platforms/pseries/ras.c | 48 ++++++++++++++++++++--------
>>   1 file changed, 34 insertions(+), 14 deletions(-)
>> 
>> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
>> index a40598e6e525..833ae34b7fec 100644
>> --- a/arch/powerpc/platforms/pseries/ras.c
>> +++ b/arch/powerpc/platforms/pseries/ras.c
>> @@ -406,6 +406,20 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void)
>>   	return (struct rtas_error_log *)local_paca->mce_data_buf;
>>   }
>>   
>> +static unsigned long *fwnmi_get_savep(struct pt_regs *regs)
>> +{
>> +	unsigned long savep_ra;
>> +
>> +	/* Mask top two bits */
>> +	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
>> +	if (!VALID_FWNMI_BUFFER(savep_ra)) {
>> +		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
> 
> Can't you use pr_err() instead ?

I think so.

>> +		return NULL;
>> +	}
>> +
>> +	return __va(savep_ra);
>> +}
>> +
>>   /*
>>    * Get the error information for errors coming through the
>>    * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
>> @@ -423,20 +437,15 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void)
>>    */
>>   static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
>>   {
>> -	unsigned long savep_ra;
>>   	unsigned long *savep;
>>   	struct rtas_error_log *h;
>>   
>> -	/* Mask top two bits */
>> -	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
>> -
>> -	if (!VALID_FWNMI_BUFFER(savep_ra)) {
>> -		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
>> +	savep = fwnmi_get_savep(regs);
>> +	if (!savep)
>>   		return NULL;
>> -	}
>>   
>> -	savep = __va(savep_ra);
>> -	regs->gpr[3] = be64_to_cpu(savep[0]);	/* restore original r3 */
>> +	/* restore original r3 */
>> +	regs->gpr[3] = be64_to_cpu(savep[0]);
> 
> Is it needed to change the location of the comment ?

No, I originally had other changes I think. Will fix.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index a40598e6e525..833ae34b7fec 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -406,6 +406,20 @@  static inline struct rtas_error_log *fwnmi_get_errlog(void)
 	return (struct rtas_error_log *)local_paca->mce_data_buf;
 }
 
+static unsigned long *fwnmi_get_savep(struct pt_regs *regs)
+{
+	unsigned long savep_ra;
+
+	/* Mask top two bits */
+	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
+	if (!VALID_FWNMI_BUFFER(savep_ra)) {
+		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
+		return NULL;
+	}
+
+	return __va(savep_ra);
+}
+
 /*
  * Get the error information for errors coming through the
  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
@@ -423,20 +437,15 @@  static inline struct rtas_error_log *fwnmi_get_errlog(void)
  */
 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 {
-	unsigned long savep_ra;
 	unsigned long *savep;
 	struct rtas_error_log *h;
 
-	/* Mask top two bits */
-	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
-
-	if (!VALID_FWNMI_BUFFER(savep_ra)) {
-		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
+	savep = fwnmi_get_savep(regs);
+	if (!savep)
 		return NULL;
-	}
 
-	savep = __va(savep_ra);
-	regs->gpr[3] = be64_to_cpu(savep[0]);	/* restore original r3 */
+	/* restore original r3 */
+	regs->gpr[3] = be64_to_cpu(savep[0]);
 
 	h = (struct rtas_error_log *)&savep[1];
 	/* Use the per cpu buffer from paca to store rtas error log */
@@ -483,11 +492,22 @@  int pSeries_system_reset_exception(struct pt_regs *regs)
 #endif
 
 	if (fwnmi_active) {
-		struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
-		if (errhdr) {
-			/* XXX Should look at FWNMI information */
-		}
-		fwnmi_release_errinfo();
+		unsigned long *savep;
+
+		/*
+		 * Firmware (PowerVM and KVM) saves r3 to a save area like
+		 * machine check, which is not exactly what PAPR (2.9)
+		 * suggests but there is no way to detect otherwise, so this
+		 * is the interface now.
+		 *
+		 * System resets do not save any error log or require an
+		 * "ibm,nmi-interlock" rtas call to release.
+		 */
+
+		savep = fwnmi_get_savep(regs);
+		/* restore original r3 */
+		if (savep)
+			regs->gpr[3] = be64_to_cpu(savep[0]);
 	}
 
 	if (smp_handle_nmi_ipi(regs))