Message ID | 1477290509-31620-1-git-send-email-stewart@linux.vnet.ibm.com |
---|---|
State | Superseded |
Headers | show |
On 10/24/2016 11:58 AM, Stewart Smith wrote: > Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com> > --- > core/fast-reboot.c | 18 ++++++++++++++++++ > core/hmi.c | 2 ++ > hw/fsp/fsp-codeupdate.c | 2 ++ > include/skiboot.h | 1 + > 4 files changed, 23 insertions(+) > > diff --git a/core/fast-reboot.c b/core/fast-reboot.c > index 66b31824c9eb..7f861b9760cd 100644 > --- a/core/fast-reboot.c > +++ b/core/fast-reboot.c > @@ -281,6 +281,16 @@ static bool fast_reset_p8(void) > extern void *fdt; > extern struct lock capi_lock; > > +static const char *fast_reboot_disabled = NULL; > +static struct lock fast_reboot_disabled_lock = LOCK_UNLOCKED; > + > +void disable_fast_reboot(const char *reason) > +{ > + lock(&fast_reboot_disabled_lock); > + fast_reboot_disabled = reason; > + unlock(&fast_reboot_disabled_lock); > +} > + > void fast_reboot(void) > { > bool success; > @@ -298,6 +308,14 @@ void fast_reboot(void) > return; > } > > + lock(&fast_reboot_disabled_lock); > + if (fast_reboot_disabled) { > + prlog(PR_DEBUG, "RESET: Fast reboot disabled because %s\n", > + fast_reboot_disabled); Shouldn't we unlock and return ? > + return; > + } > + unlock(&fast_reboot_disabled_lock); > + > lock(&capi_lock); > for_each_chip(chip) { > if (chip->capp_phb3_attached_mask) { > diff --git a/core/hmi.c b/core/hmi.c > index 69403c6db350..6fe060dc7c56 100644 > --- a/core/hmi.c > +++ b/core/hmi.c > @@ -836,6 +836,8 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) > } > } > > + if (recover == 0) > + disable_fast_reboot("Unrecoverable HMI"); This only fixes platform error reported by HMI.. What about memory errors like memory UE ? I think we should disable fast reboot from opal_cec_reboot2() under 'OPAL_REBOOT_PLATFORM_ERROR' switch case. Linux always call opal_cec_reboot2() on any platform errors reported to it e.g. unrecoverable HMI or MCE. What do you think ? Thanks, -Mahesh. > /* > * HMER bits are sticky, once set to 1 they remain set to 1 until > * they are set to 0. Reset the error source bit to 0, otherwise > diff --git a/hw/fsp/fsp-codeupdate.c b/hw/fsp/fsp-codeupdate.c > index 2263bf36dcc8..ae1c4b6c5a42 100644 > --- a/hw/fsp/fsp-codeupdate.c > +++ b/hw/fsp/fsp-codeupdate.c > @@ -1126,6 +1126,8 @@ static int64_t fsp_opal_update_flash(struct opal_sg_list *list) > struct opal_sg_entry *entry; > int length, num_entries, result = 0, rc = OPAL_PARAMETER; > > + disable_fast_reboot("FSP Code Update"); > + > /* Ensure that the sg list honors our alignment requirements */ > rc = validate_sglist(list); > if (rc) { > diff --git a/include/skiboot.h b/include/skiboot.h > index b50868d2d8c7..45f230a37635 100644 > --- a/include/skiboot.h > +++ b/include/skiboot.h > @@ -190,6 +190,7 @@ extern unsigned long get_symbol(unsigned long addr, > char **sym, char **sym_end); > > /* Fast reboot support */ > +extern void disable_fast_reboot(const char *reason); > extern void fast_reboot(void); > extern void __noreturn __secondary_cpu_entry(void); > extern void __noreturn load_and_boot_kernel(bool is_reboot); >
Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> writes: > On 10/24/2016 11:58 AM, Stewart Smith wrote: >> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com> >> --- >> core/fast-reboot.c | 18 ++++++++++++++++++ >> core/hmi.c | 2 ++ >> hw/fsp/fsp-codeupdate.c | 2 ++ >> include/skiboot.h | 1 + >> 4 files changed, 23 insertions(+) >> >> diff --git a/core/fast-reboot.c b/core/fast-reboot.c >> index 66b31824c9eb..7f861b9760cd 100644 >> --- a/core/fast-reboot.c >> +++ b/core/fast-reboot.c >> @@ -281,6 +281,16 @@ static bool fast_reset_p8(void) >> extern void *fdt; >> extern struct lock capi_lock; >> >> +static const char *fast_reboot_disabled = NULL; >> +static struct lock fast_reboot_disabled_lock = LOCK_UNLOCKED; >> + >> +void disable_fast_reboot(const char *reason) >> +{ >> + lock(&fast_reboot_disabled_lock); >> + fast_reboot_disabled = reason; >> + unlock(&fast_reboot_disabled_lock); >> +} >> + >> void fast_reboot(void) >> { >> bool success; >> @@ -298,6 +308,14 @@ void fast_reboot(void) >> return; >> } >> >> + lock(&fast_reboot_disabled_lock); >> + if (fast_reboot_disabled) { >> + prlog(PR_DEBUG, "RESET: Fast reboot disabled because %s\n", >> + fast_reboot_disabled); > > Shouldn't we unlock and return ? Yes... although the slow reboot will *technically* unlock it :) > >> + return; >> + } >> + unlock(&fast_reboot_disabled_lock); >> + >> lock(&capi_lock); >> for_each_chip(chip) { >> if (chip->capp_phb3_attached_mask) { >> diff --git a/core/hmi.c b/core/hmi.c >> index 69403c6db350..6fe060dc7c56 100644 >> --- a/core/hmi.c >> +++ b/core/hmi.c >> @@ -836,6 +836,8 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) >> } >> } >> >> + if (recover == 0) >> + disable_fast_reboot("Unrecoverable HMI"); > > This only fixes platform error reported by HMI.. What about memory > errors like memory UE ? > > I think we should disable fast reboot from opal_cec_reboot2() under > 'OPAL_REBOOT_PLATFORM_ERROR' switch case. Linux always call > opal_cec_reboot2() on any platform errors reported to it e.g. > unrecoverable HMI or MCE. What do you think ? I think that's a good place to do it too. A TODO is to check FIRs themselves of course.
On 24/10/16 17:28, Stewart Smith wrote: > Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com> With the added unlock(&fast_reboot_disabled_lock): Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> I probably won't bother with a patch to make CAPI use disable_fast_reboot() given I'm working on fixing that properly.
diff --git a/core/fast-reboot.c b/core/fast-reboot.c index 66b31824c9eb..7f861b9760cd 100644 --- a/core/fast-reboot.c +++ b/core/fast-reboot.c @@ -281,6 +281,16 @@ static bool fast_reset_p8(void) extern void *fdt; extern struct lock capi_lock; +static const char *fast_reboot_disabled = NULL; +static struct lock fast_reboot_disabled_lock = LOCK_UNLOCKED; + +void disable_fast_reboot(const char *reason) +{ + lock(&fast_reboot_disabled_lock); + fast_reboot_disabled = reason; + unlock(&fast_reboot_disabled_lock); +} + void fast_reboot(void) { bool success; @@ -298,6 +308,14 @@ void fast_reboot(void) return; } + lock(&fast_reboot_disabled_lock); + if (fast_reboot_disabled) { + prlog(PR_DEBUG, "RESET: Fast reboot disabled because %s\n", + fast_reboot_disabled); + return; + } + unlock(&fast_reboot_disabled_lock); + lock(&capi_lock); for_each_chip(chip) { if (chip->capp_phb3_attached_mask) { diff --git a/core/hmi.c b/core/hmi.c index 69403c6db350..6fe060dc7c56 100644 --- a/core/hmi.c +++ b/core/hmi.c @@ -836,6 +836,8 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) } } + if (recover == 0) + disable_fast_reboot("Unrecoverable HMI"); /* * HMER bits are sticky, once set to 1 they remain set to 1 until * they are set to 0. Reset the error source bit to 0, otherwise diff --git a/hw/fsp/fsp-codeupdate.c b/hw/fsp/fsp-codeupdate.c index 2263bf36dcc8..ae1c4b6c5a42 100644 --- a/hw/fsp/fsp-codeupdate.c +++ b/hw/fsp/fsp-codeupdate.c @@ -1126,6 +1126,8 @@ static int64_t fsp_opal_update_flash(struct opal_sg_list *list) struct opal_sg_entry *entry; int length, num_entries, result = 0, rc = OPAL_PARAMETER; + disable_fast_reboot("FSP Code Update"); + /* Ensure that the sg list honors our alignment requirements */ rc = validate_sglist(list); if (rc) { diff --git a/include/skiboot.h b/include/skiboot.h index b50868d2d8c7..45f230a37635 100644 --- a/include/skiboot.h +++ b/include/skiboot.h @@ -190,6 +190,7 @@ extern unsigned long get_symbol(unsigned long addr, char **sym, char **sym_end); /* Fast reboot support */ +extern void disable_fast_reboot(const char *reason); extern void fast_reboot(void); extern void __noreturn __secondary_cpu_entry(void); extern void __noreturn load_and_boot_kernel(bool is_reboot);
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com> --- core/fast-reboot.c | 18 ++++++++++++++++++ core/hmi.c | 2 ++ hw/fsp/fsp-codeupdate.c | 2 ++ include/skiboot.h | 1 + 4 files changed, 23 insertions(+)