diff mbox

fast-reboot: disable on FSP code update or unrecoverable HMI

Message ID 1477290509-31620-1-git-send-email-stewart@linux.vnet.ibm.com
State Superseded
Headers show

Commit Message

Stewart Smith Oct. 24, 2016, 6:28 a.m. UTC
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
---
 core/fast-reboot.c      | 18 ++++++++++++++++++
 core/hmi.c              |  2 ++
 hw/fsp/fsp-codeupdate.c |  2 ++
 include/skiboot.h       |  1 +
 4 files changed, 23 insertions(+)

Comments

Mahesh J Salgaonkar Oct. 24, 2016, 6:48 a.m. UTC | #1
On 10/24/2016 11:58 AM, Stewart Smith wrote:
> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
> ---
>  core/fast-reboot.c      | 18 ++++++++++++++++++
>  core/hmi.c              |  2 ++
>  hw/fsp/fsp-codeupdate.c |  2 ++
>  include/skiboot.h       |  1 +
>  4 files changed, 23 insertions(+)
> 
> diff --git a/core/fast-reboot.c b/core/fast-reboot.c
> index 66b31824c9eb..7f861b9760cd 100644
> --- a/core/fast-reboot.c
> +++ b/core/fast-reboot.c
> @@ -281,6 +281,16 @@ static bool fast_reset_p8(void)
>  extern void *fdt;
>  extern struct lock capi_lock;
>  
> +static const char *fast_reboot_disabled = NULL;
> +static struct lock fast_reboot_disabled_lock = LOCK_UNLOCKED;
> +
> +void disable_fast_reboot(const char *reason)
> +{
> +	lock(&fast_reboot_disabled_lock);
> +	fast_reboot_disabled = reason;
> +	unlock(&fast_reboot_disabled_lock);
> +}
> +
>  void fast_reboot(void)
>  {
>  	bool success;
> @@ -298,6 +308,14 @@ void fast_reboot(void)
>  		return;
>  	}
>  
> +	lock(&fast_reboot_disabled_lock);
> +	if (fast_reboot_disabled) {
> +		prlog(PR_DEBUG, "RESET: Fast reboot disabled because %s\n",
> +		      fast_reboot_disabled);

Shouldn't we unlock and return ?

> +		return;
> +	}
> +	unlock(&fast_reboot_disabled_lock);
> +
>  	lock(&capi_lock);
>  	for_each_chip(chip) {
>  		if (chip->capp_phb3_attached_mask) {
> diff --git a/core/hmi.c b/core/hmi.c
> index 69403c6db350..6fe060dc7c56 100644
> --- a/core/hmi.c
> +++ b/core/hmi.c
> @@ -836,6 +836,8 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  		}
>  	}
>  
> +	if (recover == 0)
> +		disable_fast_reboot("Unrecoverable HMI");

This only fixes platform error reported by HMI.. What about memory
errors like memory UE ?

I think we should disable fast reboot from opal_cec_reboot2() under
'OPAL_REBOOT_PLATFORM_ERROR' switch case. Linux always call
opal_cec_reboot2() on any platform errors reported to it e.g.
unrecoverable HMI or MCE. What do you think ?

Thanks,
-Mahesh.

>  	/*
>  	 * HMER bits are sticky, once set to 1 they remain set to 1 until
>  	 * they are set to 0. Reset the error source bit to 0, otherwise
> diff --git a/hw/fsp/fsp-codeupdate.c b/hw/fsp/fsp-codeupdate.c
> index 2263bf36dcc8..ae1c4b6c5a42 100644
> --- a/hw/fsp/fsp-codeupdate.c
> +++ b/hw/fsp/fsp-codeupdate.c
> @@ -1126,6 +1126,8 @@ static int64_t fsp_opal_update_flash(struct opal_sg_list *list)
>  	struct opal_sg_entry *entry;
>  	int length, num_entries, result = 0, rc = OPAL_PARAMETER;
>  
> +	disable_fast_reboot("FSP Code Update");
> +
>  	/* Ensure that the sg list honors our alignment requirements */
>  	rc = validate_sglist(list);
>  	if (rc) {
> diff --git a/include/skiboot.h b/include/skiboot.h
> index b50868d2d8c7..45f230a37635 100644
> --- a/include/skiboot.h
> +++ b/include/skiboot.h
> @@ -190,6 +190,7 @@ extern unsigned long get_symbol(unsigned long addr,
>  				char **sym, char **sym_end);
>  
>  /* Fast reboot support */
> +extern void disable_fast_reboot(const char *reason);
>  extern void fast_reboot(void);
>  extern void __noreturn __secondary_cpu_entry(void);
>  extern void __noreturn load_and_boot_kernel(bool is_reboot);
>
Stewart Smith Oct. 24, 2016, 7:46 a.m. UTC | #2
Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> writes:
> On 10/24/2016 11:58 AM, Stewart Smith wrote:
>> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
>> ---
>>  core/fast-reboot.c      | 18 ++++++++++++++++++
>>  core/hmi.c              |  2 ++
>>  hw/fsp/fsp-codeupdate.c |  2 ++
>>  include/skiboot.h       |  1 +
>>  4 files changed, 23 insertions(+)
>> 
>> diff --git a/core/fast-reboot.c b/core/fast-reboot.c
>> index 66b31824c9eb..7f861b9760cd 100644
>> --- a/core/fast-reboot.c
>> +++ b/core/fast-reboot.c
>> @@ -281,6 +281,16 @@ static bool fast_reset_p8(void)
>>  extern void *fdt;
>>  extern struct lock capi_lock;
>>  
>> +static const char *fast_reboot_disabled = NULL;
>> +static struct lock fast_reboot_disabled_lock = LOCK_UNLOCKED;
>> +
>> +void disable_fast_reboot(const char *reason)
>> +{
>> +	lock(&fast_reboot_disabled_lock);
>> +	fast_reboot_disabled = reason;
>> +	unlock(&fast_reboot_disabled_lock);
>> +}
>> +
>>  void fast_reboot(void)
>>  {
>>  	bool success;
>> @@ -298,6 +308,14 @@ void fast_reboot(void)
>>  		return;
>>  	}
>>  
>> +	lock(&fast_reboot_disabled_lock);
>> +	if (fast_reboot_disabled) {
>> +		prlog(PR_DEBUG, "RESET: Fast reboot disabled because %s\n",
>> +		      fast_reboot_disabled);
>
> Shouldn't we unlock and return ?

Yes... although the slow reboot will *technically* unlock it :)

>
>> +		return;
>> +	}
>> +	unlock(&fast_reboot_disabled_lock);
>> +
>>  	lock(&capi_lock);
>>  	for_each_chip(chip) {
>>  		if (chip->capp_phb3_attached_mask) {
>> diff --git a/core/hmi.c b/core/hmi.c
>> index 69403c6db350..6fe060dc7c56 100644
>> --- a/core/hmi.c
>> +++ b/core/hmi.c
>> @@ -836,6 +836,8 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>>  		}
>>  	}
>>  
>> +	if (recover == 0)
>> +		disable_fast_reboot("Unrecoverable HMI");
>
> This only fixes platform error reported by HMI.. What about memory
> errors like memory UE ?
>
> I think we should disable fast reboot from opal_cec_reboot2() under
> 'OPAL_REBOOT_PLATFORM_ERROR' switch case. Linux always call
> opal_cec_reboot2() on any platform errors reported to it e.g.
> unrecoverable HMI or MCE. What do you think ?

I think that's a good place to do it too.

A TODO is to check FIRs themselves of course.
Andrew Donnellan Oct. 24, 2016, 8:17 a.m. UTC | #3
On 24/10/16 17:28, Stewart Smith wrote:
> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>

With the added unlock(&fast_reboot_disabled_lock):

Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

I probably won't bother with a patch to make CAPI use 
disable_fast_reboot() given I'm working on fixing that properly.
diff mbox

Patch

diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 66b31824c9eb..7f861b9760cd 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -281,6 +281,16 @@  static bool fast_reset_p8(void)
 extern void *fdt;
 extern struct lock capi_lock;
 
+static const char *fast_reboot_disabled = NULL;
+static struct lock fast_reboot_disabled_lock = LOCK_UNLOCKED;
+
+void disable_fast_reboot(const char *reason)
+{
+	lock(&fast_reboot_disabled_lock);
+	fast_reboot_disabled = reason;
+	unlock(&fast_reboot_disabled_lock);
+}
+
 void fast_reboot(void)
 {
 	bool success;
@@ -298,6 +308,14 @@  void fast_reboot(void)
 		return;
 	}
 
+	lock(&fast_reboot_disabled_lock);
+	if (fast_reboot_disabled) {
+		prlog(PR_DEBUG, "RESET: Fast reboot disabled because %s\n",
+		      fast_reboot_disabled);
+		return;
+	}
+	unlock(&fast_reboot_disabled_lock);
+
 	lock(&capi_lock);
 	for_each_chip(chip) {
 		if (chip->capp_phb3_attached_mask) {
diff --git a/core/hmi.c b/core/hmi.c
index 69403c6db350..6fe060dc7c56 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -836,6 +836,8 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 		}
 	}
 
+	if (recover == 0)
+		disable_fast_reboot("Unrecoverable HMI");
 	/*
 	 * HMER bits are sticky, once set to 1 they remain set to 1 until
 	 * they are set to 0. Reset the error source bit to 0, otherwise
diff --git a/hw/fsp/fsp-codeupdate.c b/hw/fsp/fsp-codeupdate.c
index 2263bf36dcc8..ae1c4b6c5a42 100644
--- a/hw/fsp/fsp-codeupdate.c
+++ b/hw/fsp/fsp-codeupdate.c
@@ -1126,6 +1126,8 @@  static int64_t fsp_opal_update_flash(struct opal_sg_list *list)
 	struct opal_sg_entry *entry;
 	int length, num_entries, result = 0, rc = OPAL_PARAMETER;
 
+	disable_fast_reboot("FSP Code Update");
+
 	/* Ensure that the sg list honors our alignment requirements */
 	rc = validate_sglist(list);
 	if (rc) {
diff --git a/include/skiboot.h b/include/skiboot.h
index b50868d2d8c7..45f230a37635 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -190,6 +190,7 @@  extern unsigned long get_symbol(unsigned long addr,
 				char **sym, char **sym_end);
 
 /* Fast reboot support */
+extern void disable_fast_reboot(const char *reason);
 extern void fast_reboot(void);
 extern void __noreturn __secondary_cpu_entry(void);
 extern void __noreturn load_and_boot_kernel(bool is_reboot);