diff mbox series

[v5,2/3] platforms/astbmc: Check for SBE validation step

Message ID 20190507020153.18927-2-sam@mendozajonas.com
State Superseded
Headers show
Series [v5,1/3] include/ipmi: Fix incorrect chassis commands | expand

Commit Message

Sam Mendoza-Jonas May 7, 2019, 2:01 a.m. UTC
On some POWER8 astbmc systems an update to the SBE requires pausing at
runtime to ensure integrity of the SBE. If this is required the BMC will
set a chassis boot option IPMI flag using the OEM parameter 0x62. If
Skiboot sees this flag is set it waits until the SBE update is complete
and the flag is cleared.
Unfortunately the mystery operation that validates the SBE also leaves
it in a bad state and unable to be used for timer operations. To
workaround this the flag is checked as soon as possible (ie. when IPMI
and the console are set up), and once complete the system is rebooted.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
v5: Check resp_size in ipmi-info and check the SBE flag less frequently
v4: As explained above and in doc/bmc the operation breaks the SBE and
gets the SLW timer stuck. Move the check as early as possible and
reboot once complete to fix things.
v3: Check location moved to just before kernel booting; otherwise the
kernel image appears to be malformed somehow and we immediately reboot.
v2: IPMI response format updated.
This reflects functionality that will appear in new versions of AMI's
and SMC's BMC implementations. The format of the IPMI response has been
confirmed and testing is ongoing with both parties to verify the
behaviour.

 core/init.c                 |   8 +++
 hw/ipmi/ipmi-info.c         | 105 ++++++++++++++++++++++++++++++++++--
 include/ipmi.h              |   7 +++
 include/platform.h          |   5 ++
 platforms/astbmc/astbmc.h   |   1 +
 platforms/astbmc/common.c   |  47 ++++++++++++++++
 platforms/astbmc/garrison.c |   1 +
 platforms/astbmc/habanero.c |   1 +
 platforms/astbmc/p8dnu.c    |   1 +
 platforms/astbmc/p8dtu.c    |   2 +
 10 files changed, 175 insertions(+), 3 deletions(-)

Comments

Andrew Jeffery May 7, 2019, 2:43 a.m. UTC | #1
On Tue, 7 May 2019, at 11:32, Samuel Mendoza-Jonas wrote:
> On some POWER8 astbmc systems an update to the SBE requires pausing at
> runtime to ensure integrity of the SBE. If this is required the BMC will
> set a chassis boot option IPMI flag using the OEM parameter 0x62. If
> Skiboot sees this flag is set it waits until the SBE update is complete
> and the flag is cleared.
> Unfortunately the mystery operation that validates the SBE also leaves
> it in a bad state and unable to be used for timer operations. To
> workaround this the flag is checked as soon as possible (ie. when IPMI
> and the console are set up), and once complete the system is rebooted.
> 
> Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
> ---
> v5: Check resp_size in ipmi-info and check the SBE flag less frequently
> v4: As explained above and in doc/bmc the operation breaks the SBE and
> gets the SLW timer stuck. Move the check as early as possible and
> reboot once complete to fix things.
> v3: Check location moved to just before kernel booting; otherwise the
> kernel image appears to be malformed somehow and we immediately reboot.
> v2: IPMI response format updated.
> This reflects functionality that will appear in new versions of AMI's
> and SMC's BMC implementations. The format of the IPMI response has been
> confirmed and testing is ongoing with both parties to verify the
> behaviour.
> 
>  core/init.c                 |   8 +++
>  hw/ipmi/ipmi-info.c         | 105 ++++++++++++++++++++++++++++++++++--
>  include/ipmi.h              |   7 +++
>  include/platform.h          |   5 ++
>  platforms/astbmc/astbmc.h   |   1 +
>  platforms/astbmc/common.c   |  47 ++++++++++++++++
>  platforms/astbmc/garrison.c |   1 +
>  platforms/astbmc/habanero.c |   1 +
>  platforms/astbmc/p8dnu.c    |   1 +
>  platforms/astbmc/p8dtu.c    |   2 +
>  10 files changed, 175 insertions(+), 3 deletions(-)
> 
> diff --git a/core/init.c b/core/init.c
> index 955d299d..bca12dfc 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -1190,6 +1190,14 @@ void __noreturn __nomcount main_cpu_entry(const 
> void *fdt)
>  	/* Install the OPAL Console handlers */
>  	init_opal_console();
>  
> +	/*
> +	 * Some platforms set a flag to wait for SBE validation to be
> +	 * performed by the BMC. If this occurs it leaves the SBE in a
> +	 * bad state and the system will reboot at this point.
> +	 */
> +	if (platform.seeprom_update)
> +		platform.seeprom_update();
> +
>  	/* Init SLW related stuff, including fastsleep */
>  	slw_init();
>  
> diff --git a/hw/ipmi/ipmi-info.c b/hw/ipmi/ipmi-info.c
> index 56370037..a582fe59 100644
> --- a/hw/ipmi/ipmi-info.c
> +++ b/hw/ipmi/ipmi-info.c
> @@ -23,7 +23,7 @@
>  #include <timebase.h>
>  
>  /*
> - * Respones data from IPMI Get device ID command (As defined in
> + * Response data from IPMI Get device ID command (As defined in
>   * Section 20.1 Get Device ID Command - IPMI standard spec).
>   */
>  struct ipmi_dev_id {
> @@ -39,9 +39,27 @@ struct ipmi_dev_id {
>  };
>  static struct ipmi_dev_id *ipmi_dev_id;
>  
> +/*
> + * Response data from IPMI Chassis Get System Boot Option (As defined in
> + * Section 28.13 Get System Boot Options Command - IPMI standard spec).
> + */
> +struct ipmi_sys_boot_opt {
> +	uint8_t param_version;
> +	uint8_t param_valid;
> +	/*
> +	 * Fields for OEM parameter 0x62. This parameter does not follow
> +	 * the normal layout and just has a single byte to signal if it
> +	 * is active or not.
> +	 */
> +	uint8_t flag_set;
> +};
> +static struct ipmi_sys_boot_opt *ipmi_sys_boot_opt;
> +
>  /* Got response from BMC? */
>  static bool bmc_info_waiting = false;
>  static bool bmc_info_valid = false;
> +static bool bmc_boot_opt_waiting = false;
> +static bool bmc_boot_opt_valid = false;
>  
>  /* This will free ipmi_dev_id structure */
>  void ipmi_dt_add_bmc_info(void)
> @@ -79,8 +97,14 @@ static void ipmi_get_bmc_info_resp(struct ipmi_msg *msg)
>  		return;
>  	}
>  
> -	bmc_info_valid = true;
> -	memcpy(ipmi_dev_id, msg->data, msg->resp_size);
> +	/* ipmi_dev_id has optional fields */
> +	if (msg->resp_size <= sizeof(struct ipmi_dev_id)) {
> +		bmc_info_valid = true;
> +		memcpy(ipmi_dev_id, msg->data, msg->resp_size);
> +	} else {
> +		prlog(PR_WARNING, "IPMI: IPMI_BMC_GET_DEVICE_ID unexpected response size\n");
> +	}
> +

Probably should have been a separate patch?

The changes look okay otherwise.

Reviewed-by: Andrew Jeffery <andrew@aj.id.au>

>  	ipmi_free_msg(msg);
>  }
>  
> @@ -110,3 +134,78 @@ int ipmi_get_bmc_info_request(void)
>  	bmc_info_waiting = true;
>  	return rc;
>  }
> +
> +/* This will free ipmi_sys_boot_opt structure */
> +int ipmi_chassis_check_sbe_validation(void)
> +{
> +	int rc = -1;
> +
> +	while (bmc_boot_opt_waiting)
> +		time_wait_ms(5);
> +
> +	if (!bmc_boot_opt_valid)
> +		return -1;
> +
> +	if ((ipmi_sys_boot_opt->param_valid & 0x8) != 0)
> +		goto out;
> +	if (ipmi_sys_boot_opt->param_valid != 0x62)
> +		goto out;
> +
> +	rc = ipmi_sys_boot_opt->flag_set;
> +
> +out:
> +	free(ipmi_sys_boot_opt);
> +	return rc;
> +}
> +
> +static void ipmi_get_chassis_boot_opt_resp(struct ipmi_msg *msg)
> +{
> +	bmc_boot_opt_waiting = false;
> +
> +	if (msg->cc != IPMI_CC_NO_ERROR) {
> +		prlog(PR_INFO, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT cmd returned error"
> +		      " [rc : 0x%x]\n", msg->data[0]);
> +		return;
> +	}
> +
> +	if (msg->resp_size == sizeof(struct ipmi_sys_boot_opt)) {
> +		bmc_boot_opt_valid = true;
> +		memcpy(ipmi_sys_boot_opt, msg->data, msg->resp_size);
> +	} else {
> +		prlog(PR_WARNING, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT unexpected 
> response size\n");
> +	}
> +
> +	ipmi_free_msg(msg);
> +}
> +
> +int ipmi_get_chassis_boot_opt_request(void)
> +{
> +	int rc;
> +	struct ipmi_msg *msg;
> +	uint8_t req[] = {
> +		0x62, /* OEM parameter (SBE Validation on astbmc) */
> +		0x00, /* no set selector */
> +		0x00, /* no block selector */
> +	};
> +
> +	ipmi_sys_boot_opt = zalloc(sizeof(struct ipmi_sys_boot_opt));
> +	assert(ipmi_sys_boot_opt);
> +
> +	msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_CHASSIS_GET_BOOT_OPT,
> +			 ipmi_get_chassis_boot_opt_resp, NULL, req,
> +			 sizeof(req), sizeof(struct ipmi_sys_boot_opt));
> +	if (!msg)
> +		return OPAL_NO_MEM;
> +
> +	msg->error = ipmi_get_chassis_boot_opt_resp;
> +	prlog(PR_INFO, "IPMI: Requesting IPMI_CHASSIS_GET_BOOT_OPT\n");
> +	rc = ipmi_queue_msg(msg);
> +	if (rc) {
> +		prlog(PR_ERR, "IPMI: Failed to queue IPMI_CHASSIS_GET_BOOT_OPT\n");
> +		ipmi_free_msg(msg);
> +		return rc;
> +	}
> +
> +	bmc_boot_opt_waiting = true;
> +	return rc;
> +}
> diff --git a/include/ipmi.h b/include/ipmi.h
> index da85c4b0..ec9f3c49 100644
> --- a/include/ipmi.h
> +++ b/include/ipmi.h
> @@ -109,6 +109,7 @@
>  #define IPMI_GET_SEL_TIME		IPMI_CODE(IPMI_NETFN_STORAGE, 0x48)
>  #define IPMI_SET_SEL_TIME		IPMI_CODE(IPMI_NETFN_STORAGE, 0x49)
>  #define IPMI_CHASSIS_CONTROL		IPMI_CODE(IPMI_NETFN_CHASSIS, 0x02)
> +#define IPMI_CHASSIS_GET_BOOT_OPT	IPMI_CODE(IPMI_NETFN_CHASSIS, 0x09)
>  #define IPMI_BMC_GET_DEVICE_ID		IPMI_CODE(IPMI_NETFN_APP, 0x01)
>  #define IPMI_SET_POWER_STATE		IPMI_CODE(IPMI_NETFN_APP, 0x06)
>  #define IPMI_GET_POWER_STATE		IPMI_CODE(IPMI_NETFN_APP, 0x07)
> @@ -291,4 +292,10 @@ extern int ipmi_get_bmc_info_request(void);
>  /* Add BMC firmware info to device tree */
>  extern void ipmi_dt_add_bmc_info(void);
>  
> +/* Get BMC Boot Options info (specifically OEM param 0x62) */
> +int ipmi_get_chassis_boot_opt_request(void);
> +
> +/* Get OEM Boot Option 0x62 for SBE validation flag */
> +int ipmi_chassis_check_sbe_validation(void);
> +
>  #endif
> diff --git a/include/platform.h b/include/platform.h
> index f63c24a3..4f8627a3 100644
> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -232,6 +232,11 @@ struct platform {
>  	 * OPAL terminate
>  	 */
>  	void __attribute__((noreturn)) (*terminate)(const char *msg);
> +
> +	/*
> +	 * SEEPROM update routine
> +	 */
> +	void		(*seeprom_update)(void);
>  };
>  
>  extern struct platform __platforms_start;
> diff --git a/platforms/astbmc/astbmc.h b/platforms/astbmc/astbmc.h
> index fe358b74..c302b607 100644
> --- a/platforms/astbmc/astbmc.h
> +++ b/platforms/astbmc/astbmc.h
> @@ -103,6 +103,7 @@ extern void astbmc_ext_irq_serirq_cpld(unsigned int 
> chip_id);
>  extern int pnor_init(void);
>  extern void check_all_slot_table(void);
>  extern void astbmc_exit(void);
> +extern void astbmc_seeprom_update(void);
>  
>  extern void slot_table_init(const struct slot_table_entry *top_table);
>  extern void slot_table_get_slot_info(struct phb *phb, struct pci_device * pd);
> diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
> index faa73e2f..413613da 100644
> --- a/platforms/astbmc/common.c
> +++ b/platforms/astbmc/common.c
> @@ -26,6 +26,7 @@
>  #include <bt.h>
>  #include <errorlog.h>
>  #include <lpc.h>
> +#include <timebase.h>
>  
>  #include "astbmc.h"
>  
> @@ -168,6 +169,52 @@ int64_t astbmc_ipmi_reboot(void)
>  	return ipmi_chassis_control(IPMI_CHASSIS_HARD_RESET);
>  }
>  
> +void astbmc_seeprom_update(void)
> +{
> +	int flag_set, counter;
> +
> +	ipmi_get_chassis_boot_opt_request();
> +
> +	flag_set = ipmi_chassis_check_sbe_validation();
> +
> +	if (flag_set <= 0) {
> +		prlog(PR_DEBUG, "SBE validation flag unset or invalid\n");
> +		return;
> +	}
> +
> +	/*
> +	 * Flag is set, wait until SBE validation is complete and the flag
> +	 * has been reset.
> +	 */
> +	prlog(PR_WARNING, "SBE validation required, waiting for completion\n");
> +	prlog(PR_WARNING, "System will be powered off if validation fails\n");
> +	counter = 0;
> +
> +	while (flag_set > 0) {
> +		time_wait_ms(10000);
> +		if (++counter >= 3) {
> +			/* Let the user know we're alive every 30s */
> +			prlog(PR_WARNING, "waiting for completion..\n");
> +			counter = 0;
> +		}
> +
> +		ipmi_get_chassis_boot_opt_request();
> +		flag_set = ipmi_chassis_check_sbe_validation();
> +	}
> +
> +	/*
> +	 * The SBE validation can (will) leave the SBE in a bad state,
> +	 * preventing timers from working properly. Reboot so that we
> +	 * can boot normally with everything intact.
> +	 */
> +	prlog(PR_WARNING, "SBE validation complete, rebooting\n");
> +	if (platform.cec_reboot)
> +		platform.cec_reboot();
> +	else
> +		abort();
> +	while(true);
> +}
> +
>  static void astbmc_fixup_dt_system_id(void)
>  {
>  	/* Make sure we don't already have one */
> diff --git a/platforms/astbmc/garrison.c b/platforms/astbmc/garrison.c
> index 5cbe64b5..ddd33721 100644
> --- a/platforms/astbmc/garrison.c
> +++ b/platforms/astbmc/garrison.c
> @@ -305,4 +305,5 @@ DECLARE_PLATFORM(garrison) = {
>  	.resource_loaded	= flash_resource_loaded,
>  	.exit			= ipmi_wdt_final_reset,
>  	.terminate		= ipmi_terminate,
> +	.seeprom_update		= astbmc_seeprom_update,
>  };
> diff --git a/platforms/astbmc/habanero.c b/platforms/astbmc/habanero.c
> index 8e11b81e..ab010278 100644
> --- a/platforms/astbmc/habanero.c
> +++ b/platforms/astbmc/habanero.c
> @@ -149,4 +149,5 @@ DECLARE_PLATFORM(habanero) = {
>  	.resource_loaded	= flash_resource_loaded,
>  	.exit			= ipmi_wdt_final_reset,
>  	.terminate		= ipmi_terminate,
> +	.seeprom_update		= astbmc_seeprom_update,
>  };
> diff --git a/platforms/astbmc/p8dnu.c b/platforms/astbmc/p8dnu.c
> index 9d42fc43..391aa7a8 100644
> --- a/platforms/astbmc/p8dnu.c
> +++ b/platforms/astbmc/p8dnu.c
> @@ -361,4 +361,5 @@ DECLARE_PLATFORM(p8dnu) = {
>  	.resource_loaded	= flash_resource_loaded,
>  	.exit			= ipmi_wdt_final_reset,
>  	.terminate		= ipmi_terminate,
> +	.seeprom_update		= astbmc_seeprom_update,
>  };
> diff --git a/platforms/astbmc/p8dtu.c b/platforms/astbmc/p8dtu.c
> index 69500ea2..6f66dc22 100644
> --- a/platforms/astbmc/p8dtu.c
> +++ b/platforms/astbmc/p8dtu.c
> @@ -262,6 +262,7 @@ DECLARE_PLATFORM(p8dtu1u) = {
>  	.resource_loaded	= flash_resource_loaded,
>  	.exit			= ipmi_wdt_final_reset,
>  	.terminate		= ipmi_terminate,
> +	.seeprom_update		= astbmc_seeprom_update,
>  };
>  
>  DECLARE_PLATFORM(p8dtu2u) = {
> @@ -279,5 +280,6 @@ DECLARE_PLATFORM(p8dtu2u) = {
>  	.resource_loaded	= flash_resource_loaded,
>  	.exit			= ipmi_wdt_final_reset,
>  	.terminate		= ipmi_terminate,
> +	.seeprom_update		= astbmc_seeprom_update,
>  };
>  
> -- 
> 2.21.0
> 
>
Vasant Hegde May 7, 2019, 11:21 a.m. UTC | #2
On 05/07/2019 07:31 AM, Samuel Mendoza-Jonas wrote:
> On some POWER8 astbmc systems an update to the SBE requires pausing at
> runtime to ensure integrity of the SBE. If this is required the BMC will
> set a chassis boot option IPMI flag using the OEM parameter 0x62. If
> Skiboot sees this flag is set it waits until the SBE update is complete
> and the flag is cleared.
> Unfortunately the mystery operation that validates the SBE also leaves
> it in a bad state and unable to be used for timer operations. To
> workaround this the flag is checked as soon as possible (ie. when IPMI
> and the console are set up), and once complete the system is rebooted.

Keep in mind that the way we do timer init is different in P8 and P9.
In P9, p9_sbe_init() initiates timer while in P8, slw_init() initiates timer.

Your platform.seeprom_update() works fine on P8 systems today. We have to be 
careful
before enabling on P9 systems.


> 
> Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
> ---


.../...

> @@ -110,3 +134,78 @@ int ipmi_get_bmc_info_request(void)
>   	bmc_info_waiting = true;
>   	return rc;
>   }
> +
> +/* This will free ipmi_sys_boot_opt structure */
> +int ipmi_chassis_check_sbe_validation(void)
> +{
> +	int rc = -1;
> +
> +	while (bmc_boot_opt_waiting)
> +		time_wait_ms(5);

With 5ms we may not run poller (see time_wait_poll()).

> +
> +	if (!bmc_boot_opt_valid)

Free ipmi_sys_boot_opt and return rc?

> +		return -1;
> +
> +	if ((ipmi_sys_boot_opt->param_valid & 0x8) != 0)
> +		goto out;
> +	if (ipmi_sys_boot_opt->param_valid != 0x62)
> +		goto out;
> +
> +	rc = ipmi_sys_boot_opt->flag_set;
> +
> +out:
> +	free(ipmi_sys_boot_opt);
> +	return rc;
> +}
> +
> +static void ipmi_get_chassis_boot_opt_resp(struct ipmi_msg *msg)
> +{
> +	bmc_boot_opt_waiting = false;
> +
> +	if (msg->cc != IPMI_CC_NO_ERROR) {
> +		prlog(PR_INFO, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT cmd returned error"
> +		      " [rc : 0x%x]\n", msg->data[0]);

Free ipmi message here ?

> +		return;
> +	}
> +
> +	if (msg->resp_size == sizeof(struct ipmi_sys_boot_opt)) {
> +		bmc_boot_opt_valid = true;
> +		memcpy(ipmi_sys_boot_opt, msg->data, msg->resp_size);
> +	} else {
> +		prlog(PR_WARNING, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT unexpected response size\n");
> +	}
> +
> +	ipmi_free_msg(msg);
> +}
> +


.../...

>   extern void slot_table_init(const struct slot_table_entry *top_table);
>   extern void slot_table_get_slot_info(struct phb *phb, struct pci_device * pd);
> diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
> index faa73e2f..413613da 100644
> --- a/platforms/astbmc/common.c
> +++ b/platforms/astbmc/common.c
> @@ -26,6 +26,7 @@
>   #include <bt.h>
>   #include <errorlog.h>
>   #include <lpc.h>
> +#include <timebase.h>
> 
>   #include "astbmc.h"
> 
> @@ -168,6 +169,52 @@ int64_t astbmc_ipmi_reboot(void)
>   	return ipmi_chassis_control(IPMI_CHASSIS_HARD_RESET);
>   }
> 
> +void astbmc_seeprom_update(void)
> +{
> +	int flag_set, counter;
> +
> +	ipmi_get_chassis_boot_opt_request();
> +
> +	flag_set = ipmi_chassis_check_sbe_validation();
> +
> +	if (flag_set <= 0) {
> +		prlog(PR_DEBUG, "SBE validation flag unset or invalid\n");
> +		return;
> +	}
> +
> +	/*
> +	 * Flag is set, wait until SBE validation is complete and the flag
> +	 * has been reset.
> +	 */
> +	prlog(PR_WARNING, "SBE validation required, waiting for completion\n");
> +	prlog(PR_WARNING, "System will be powered off if validation fails\n");
> +	counter = 0;
> +
> +	while (flag_set > 0) {

What is BMC never updates option flag properly (say buggy BMC code) ?
Do we need some way to exit this loop ?

> +		time_wait_ms(10000);
> +		if (++counter >= 3) {
> +			/* Let the user know we're alive every 30s */
> +			prlog(PR_WARNING, "waiting for completion..\n");
> +			counter = 0;
> +		}
> +
> +		ipmi_get_chassis_boot_opt_request();

May be its good to check return value to confirm we did queue IPMI message properly.

-Vasant

> +		flag_set = ipmi_chassis_check_sbe_validation();
> +	}
> +
Sam Mendoza-Jonas May 8, 2019, 12:59 a.m. UTC | #3
On Tue, 2019-05-07 at 16:51 +0530, Vasant Hegde wrote:
> On 05/07/2019 07:31 AM, Samuel Mendoza-Jonas wrote:
> > On some POWER8 astbmc systems an update to the SBE requires pausing at
> > runtime to ensure integrity of the SBE. If this is required the BMC will
> > set a chassis boot option IPMI flag using the OEM parameter 0x62. If
> > Skiboot sees this flag is set it waits until the SBE update is complete
> > and the flag is cleared.
> > Unfortunately the mystery operation that validates the SBE also leaves
> > it in a bad state and unable to be used for timer operations. To
> > workaround this the flag is checked as soon as possible (ie. when IPMI
> > and the console are set up), and once complete the system is rebooted.
> 
> Keep in mind that the way we do timer init is different in P8 and P9.
> In P9, p9_sbe_init() initiates timer while in P8, slw_init() initiates timer.
> 
> Your platform.seeprom_update() works fine on P8 systems today. We have to be 
> careful
> before enabling on P9 systems.
> 
> 
> > Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
> > ---
> 
> .../...
> 
> > @@ -110,3 +134,78 @@ int ipmi_get_bmc_info_request(void)
> >   	bmc_info_waiting = true;
> >   	return rc;
> >   }
> > +
> > +/* This will free ipmi_sys_boot_opt structure */
> > +int ipmi_chassis_check_sbe_validation(void)
> > +{
> > +	int rc = -1;
> > +
> > +	while (bmc_boot_opt_waiting)
> > +		time_wait_ms(5);
> 
> With 5ms we may not run poller (see time_wait_poll()).

Good catch; I'll bump the interval a bit to be safe.

> 
> > +
> > +	if (!bmc_boot_opt_valid)
> 
> Free ipmi_sys_boot_opt and return rc?

Yep

> 
> > +		return -1;
> > +
> > +	if ((ipmi_sys_boot_opt->param_valid & 0x8) != 0)
> > +		goto out;
> > +	if (ipmi_sys_boot_opt->param_valid != 0x62)
> > +		goto out;
> > +
> > +	rc = ipmi_sys_boot_opt->flag_set;
> > +
> > +out:
> > +	free(ipmi_sys_boot_opt);
> > +	return rc;
> > +}
> > +
> > +static void ipmi_get_chassis_boot_opt_resp(struct ipmi_msg *msg)
> > +{
> > +	bmc_boot_opt_waiting = false;
> > +
> > +	if (msg->cc != IPMI_CC_NO_ERROR) {
> > +		prlog(PR_INFO, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT cmd returned error"
> > +		      " [rc : 0x%x]\n", msg->data[0]);
> 
> Free ipmi message here ?

Yep

> 
> > +		return;
> > +	}
> > +
> > +	if (msg->resp_size == sizeof(struct ipmi_sys_boot_opt)) {
> > +		bmc_boot_opt_valid = true;
> > +		memcpy(ipmi_sys_boot_opt, msg->data, msg->resp_size);
> > +	} else {
> > +		prlog(PR_WARNING, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT unexpected response size\n");
> > +	}
> > +
> > +	ipmi_free_msg(msg);
> > +}
> > +
> 
> .../...
> 
> >   extern void slot_table_init(const struct slot_table_entry *top_table);
> >   extern void slot_table_get_slot_info(struct phb *phb, struct pci_device * pd);
> > diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
> > index faa73e2f..413613da 100644
> > --- a/platforms/astbmc/common.c
> > +++ b/platforms/astbmc/common.c
> > @@ -26,6 +26,7 @@
> >   #include <bt.h>
> >   #include <errorlog.h>
> >   #include <lpc.h>
> > +#include <timebase.h>
> > 
> >   #include "astbmc.h"
> > 
> > @@ -168,6 +169,52 @@ int64_t astbmc_ipmi_reboot(void)
> >   	return ipmi_chassis_control(IPMI_CHASSIS_HARD_RESET);
> >   }
> > 
> > +void astbmc_seeprom_update(void)
> > +{
> > +	int flag_set, counter;
> > +
> > +	ipmi_get_chassis_boot_opt_request();
> > +
> > +	flag_set = ipmi_chassis_check_sbe_validation();
> > +
> > +	if (flag_set <= 0) {
> > +		prlog(PR_DEBUG, "SBE validation flag unset or invalid\n");
> > +		return;
> > +	}
> > +
> > +	/*
> > +	 * Flag is set, wait until SBE validation is complete and the flag
> > +	 * has been reset.
> > +	 */
> > +	prlog(PR_WARNING, "SBE validation required, waiting for completion\n");
> > +	prlog(PR_WARNING, "System will be powered off if validation fails\n");
> > +	counter = 0;
> > +
> > +	while (flag_set > 0) {
> 
> What is BMC never updates option flag properly (say buggy BMC code) ?
> Do we need some way to exit this loop ?

We should probably warn the user after a certain amount of time, but I
don't think we should continue boot necessarily. It may be better to warn
the user and have them shut off / reboot the system if needed. Say after
30 minutes or so (...yes anything up to there could be valid :( )

> 
> > +		time_wait_ms(10000);
> > +		if (++counter >= 3) {
> > +			/* Let the user know we're alive every 30s */
> > +			prlog(PR_WARNING, "waiting for completion..\n");
> > +			counter = 0;
> > +		}
> > +
> > +		ipmi_get_chassis_boot_opt_request();
> 
> May be its good to check return value to confirm we did queue IPMI message properly.

Yep

> 
> -Vasant
> 
> > +		flag_set = ipmi_chassis_check_sbe_validation();
> > +	}
> > +
> 
>
Sam Mendoza-Jonas May 8, 2019, 1:51 a.m. UTC | #4
On Tue, 2019-05-07 at 16:51 +0530, Vasant Hegde wrote:
> On 05/07/2019 07:31 AM, Samuel Mendoza-Jonas wrote:
> > On some POWER8 astbmc systems an update to the SBE requires pausing at
> > runtime to ensure integrity of the SBE. If this is required the BMC will
> > set a chassis boot option IPMI flag using the OEM parameter 0x62. If
> > Skiboot sees this flag is set it waits until the SBE update is complete
> > and the flag is cleared.
> > Unfortunately the mystery operation that validates the SBE also leaves
> > it in a bad state and unable to be used for timer operations. To
> > workaround this the flag is checked as soon as possible (ie. when IPMI
> > and the console are set up), and once complete the system is rebooted.
> 
> Keep in mind that the way we do timer init is different in P8 and P9.
> In P9, p9_sbe_init() initiates timer while in P8, slw_init() initiates timer.
> 
> Your platform.seeprom_update() works fine on P8 systems today. We have to be 
> careful
> before enabling on P9 systems.

Forgot to mention in the last email; this is specifically only for P8
machines, there is no plan to perform this check on P9.

> 
> 
> > Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
> > ---
> 
> .../...
> 
> > @@ -110,3 +134,78 @@ int ipmi_get_bmc_info_request(void)
> >   	bmc_info_waiting = true;
> >   	return rc;
> >   }
> > +
> > +/* This will free ipmi_sys_boot_opt structure */
> > +int ipmi_chassis_check_sbe_validation(void)
> > +{
> > +	int rc = -1;
> > +
> > +	while (bmc_boot_opt_waiting)
> > +		time_wait_ms(5);
> 
> With 5ms we may not run poller (see time_wait_poll()).
> 
> > +
> > +	if (!bmc_boot_opt_valid)
> 
> Free ipmi_sys_boot_opt and return rc?
> 
> > +		return -1;
> > +
> > +	if ((ipmi_sys_boot_opt->param_valid & 0x8) != 0)
> > +		goto out;
> > +	if (ipmi_sys_boot_opt->param_valid != 0x62)
> > +		goto out;
> > +
> > +	rc = ipmi_sys_boot_opt->flag_set;
> > +
> > +out:
> > +	free(ipmi_sys_boot_opt);
> > +	return rc;
> > +}
> > +
> > +static void ipmi_get_chassis_boot_opt_resp(struct ipmi_msg *msg)
> > +{
> > +	bmc_boot_opt_waiting = false;
> > +
> > +	if (msg->cc != IPMI_CC_NO_ERROR) {
> > +		prlog(PR_INFO, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT cmd returned error"
> > +		      " [rc : 0x%x]\n", msg->data[0]);
> 
> Free ipmi message here ?
> 
> > +		return;
> > +	}
> > +
> > +	if (msg->resp_size == sizeof(struct ipmi_sys_boot_opt)) {
> > +		bmc_boot_opt_valid = true;
> > +		memcpy(ipmi_sys_boot_opt, msg->data, msg->resp_size);
> > +	} else {
> > +		prlog(PR_WARNING, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT unexpected response size\n");
> > +	}
> > +
> > +	ipmi_free_msg(msg);
> > +}
> > +
> 
> .../...
> 
> >   extern void slot_table_init(const struct slot_table_entry *top_table);
> >   extern void slot_table_get_slot_info(struct phb *phb, struct pci_device * pd);
> > diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
> > index faa73e2f..413613da 100644
> > --- a/platforms/astbmc/common.c
> > +++ b/platforms/astbmc/common.c
> > @@ -26,6 +26,7 @@
> >   #include <bt.h>
> >   #include <errorlog.h>
> >   #include <lpc.h>
> > +#include <timebase.h>
> > 
> >   #include "astbmc.h"
> > 
> > @@ -168,6 +169,52 @@ int64_t astbmc_ipmi_reboot(void)
> >   	return ipmi_chassis_control(IPMI_CHASSIS_HARD_RESET);
> >   }
> > 
> > +void astbmc_seeprom_update(void)
> > +{
> > +	int flag_set, counter;
> > +
> > +	ipmi_get_chassis_boot_opt_request();
> > +
> > +	flag_set = ipmi_chassis_check_sbe_validation();
> > +
> > +	if (flag_set <= 0) {
> > +		prlog(PR_DEBUG, "SBE validation flag unset or invalid\n");
> > +		return;
> > +	}
> > +
> > +	/*
> > +	 * Flag is set, wait until SBE validation is complete and the flag
> > +	 * has been reset.
> > +	 */
> > +	prlog(PR_WARNING, "SBE validation required, waiting for completion\n");
> > +	prlog(PR_WARNING, "System will be powered off if validation fails\n");
> > +	counter = 0;
> > +
> > +	while (flag_set > 0) {
> 
> What is BMC never updates option flag properly (say buggy BMC code) ?
> Do we need some way to exit this loop ?
> 
> > +		time_wait_ms(10000);
> > +		if (++counter >= 3) {
> > +			/* Let the user know we're alive every 30s */
> > +			prlog(PR_WARNING, "waiting for completion..\n");
> > +			counter = 0;
> > +		}
> > +
> > +		ipmi_get_chassis_boot_opt_request();
> 
> May be its good to check return value to confirm we did queue IPMI message properly.
> 
> -Vasant
> 
> > +		flag_set = ipmi_chassis_check_sbe_validation();
> > +	}
> > +
> 
>
diff mbox series

Patch

diff --git a/core/init.c b/core/init.c
index 955d299d..bca12dfc 100644
--- a/core/init.c
+++ b/core/init.c
@@ -1190,6 +1190,14 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	/* Install the OPAL Console handlers */
 	init_opal_console();
 
+	/*
+	 * Some platforms set a flag to wait for SBE validation to be
+	 * performed by the BMC. If this occurs it leaves the SBE in a
+	 * bad state and the system will reboot at this point.
+	 */
+	if (platform.seeprom_update)
+		platform.seeprom_update();
+
 	/* Init SLW related stuff, including fastsleep */
 	slw_init();
 
diff --git a/hw/ipmi/ipmi-info.c b/hw/ipmi/ipmi-info.c
index 56370037..a582fe59 100644
--- a/hw/ipmi/ipmi-info.c
+++ b/hw/ipmi/ipmi-info.c
@@ -23,7 +23,7 @@ 
 #include <timebase.h>
 
 /*
- * Respones data from IPMI Get device ID command (As defined in
+ * Response data from IPMI Get device ID command (As defined in
  * Section 20.1 Get Device ID Command - IPMI standard spec).
  */
 struct ipmi_dev_id {
@@ -39,9 +39,27 @@  struct ipmi_dev_id {
 };
 static struct ipmi_dev_id *ipmi_dev_id;
 
+/*
+ * Response data from IPMI Chassis Get System Boot Option (As defined in
+ * Section 28.13 Get System Boot Options Command - IPMI standard spec).
+ */
+struct ipmi_sys_boot_opt {
+	uint8_t param_version;
+	uint8_t param_valid;
+	/*
+	 * Fields for OEM parameter 0x62. This parameter does not follow
+	 * the normal layout and just has a single byte to signal if it
+	 * is active or not.
+	 */
+	uint8_t flag_set;
+};
+static struct ipmi_sys_boot_opt *ipmi_sys_boot_opt;
+
 /* Got response from BMC? */
 static bool bmc_info_waiting = false;
 static bool bmc_info_valid = false;
+static bool bmc_boot_opt_waiting = false;
+static bool bmc_boot_opt_valid = false;
 
 /* This will free ipmi_dev_id structure */
 void ipmi_dt_add_bmc_info(void)
@@ -79,8 +97,14 @@  static void ipmi_get_bmc_info_resp(struct ipmi_msg *msg)
 		return;
 	}
 
-	bmc_info_valid = true;
-	memcpy(ipmi_dev_id, msg->data, msg->resp_size);
+	/* ipmi_dev_id has optional fields */
+	if (msg->resp_size <= sizeof(struct ipmi_dev_id)) {
+		bmc_info_valid = true;
+		memcpy(ipmi_dev_id, msg->data, msg->resp_size);
+	} else {
+		prlog(PR_WARNING, "IPMI: IPMI_BMC_GET_DEVICE_ID unexpected response size\n");
+	}
+
 	ipmi_free_msg(msg);
 }
 
@@ -110,3 +134,78 @@  int ipmi_get_bmc_info_request(void)
 	bmc_info_waiting = true;
 	return rc;
 }
+
+/* This will free ipmi_sys_boot_opt structure */
+int ipmi_chassis_check_sbe_validation(void)
+{
+	int rc = -1;
+
+	while (bmc_boot_opt_waiting)
+		time_wait_ms(5);
+
+	if (!bmc_boot_opt_valid)
+		return -1;
+
+	if ((ipmi_sys_boot_opt->param_valid & 0x8) != 0)
+		goto out;
+	if (ipmi_sys_boot_opt->param_valid != 0x62)
+		goto out;
+
+	rc = ipmi_sys_boot_opt->flag_set;
+
+out:
+	free(ipmi_sys_boot_opt);
+	return rc;
+}
+
+static void ipmi_get_chassis_boot_opt_resp(struct ipmi_msg *msg)
+{
+	bmc_boot_opt_waiting = false;
+
+	if (msg->cc != IPMI_CC_NO_ERROR) {
+		prlog(PR_INFO, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT cmd returned error"
+		      " [rc : 0x%x]\n", msg->data[0]);
+		return;
+	}
+
+	if (msg->resp_size == sizeof(struct ipmi_sys_boot_opt)) {
+		bmc_boot_opt_valid = true;
+		memcpy(ipmi_sys_boot_opt, msg->data, msg->resp_size);
+	} else {
+		prlog(PR_WARNING, "IPMI: IPMI_CHASSIS_GET_BOOT_OPT unexpected response size\n");
+	}
+
+	ipmi_free_msg(msg);
+}
+
+int ipmi_get_chassis_boot_opt_request(void)
+{
+	int rc;
+	struct ipmi_msg *msg;
+	uint8_t req[] = {
+		0x62, /* OEM parameter (SBE Validation on astbmc) */
+		0x00, /* no set selector */
+		0x00, /* no block selector */
+	};
+
+	ipmi_sys_boot_opt = zalloc(sizeof(struct ipmi_sys_boot_opt));
+	assert(ipmi_sys_boot_opt);
+
+	msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_CHASSIS_GET_BOOT_OPT,
+			 ipmi_get_chassis_boot_opt_resp, NULL, req,
+			 sizeof(req), sizeof(struct ipmi_sys_boot_opt));
+	if (!msg)
+		return OPAL_NO_MEM;
+
+	msg->error = ipmi_get_chassis_boot_opt_resp;
+	prlog(PR_INFO, "IPMI: Requesting IPMI_CHASSIS_GET_BOOT_OPT\n");
+	rc = ipmi_queue_msg(msg);
+	if (rc) {
+		prlog(PR_ERR, "IPMI: Failed to queue IPMI_CHASSIS_GET_BOOT_OPT\n");
+		ipmi_free_msg(msg);
+		return rc;
+	}
+
+	bmc_boot_opt_waiting = true;
+	return rc;
+}
diff --git a/include/ipmi.h b/include/ipmi.h
index da85c4b0..ec9f3c49 100644
--- a/include/ipmi.h
+++ b/include/ipmi.h
@@ -109,6 +109,7 @@ 
 #define IPMI_GET_SEL_TIME		IPMI_CODE(IPMI_NETFN_STORAGE, 0x48)
 #define IPMI_SET_SEL_TIME		IPMI_CODE(IPMI_NETFN_STORAGE, 0x49)
 #define IPMI_CHASSIS_CONTROL		IPMI_CODE(IPMI_NETFN_CHASSIS, 0x02)
+#define IPMI_CHASSIS_GET_BOOT_OPT	IPMI_CODE(IPMI_NETFN_CHASSIS, 0x09)
 #define IPMI_BMC_GET_DEVICE_ID		IPMI_CODE(IPMI_NETFN_APP, 0x01)
 #define IPMI_SET_POWER_STATE		IPMI_CODE(IPMI_NETFN_APP, 0x06)
 #define IPMI_GET_POWER_STATE		IPMI_CODE(IPMI_NETFN_APP, 0x07)
@@ -291,4 +292,10 @@  extern int ipmi_get_bmc_info_request(void);
 /* Add BMC firmware info to device tree */
 extern void ipmi_dt_add_bmc_info(void);
 
+/* Get BMC Boot Options info (specifically OEM param 0x62) */
+int ipmi_get_chassis_boot_opt_request(void);
+
+/* Get OEM Boot Option 0x62 for SBE validation flag */
+int ipmi_chassis_check_sbe_validation(void);
+
 #endif
diff --git a/include/platform.h b/include/platform.h
index f63c24a3..4f8627a3 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -232,6 +232,11 @@  struct platform {
 	 * OPAL terminate
 	 */
 	void __attribute__((noreturn)) (*terminate)(const char *msg);
+
+	/*
+	 * SEEPROM update routine
+	 */
+	void		(*seeprom_update)(void);
 };
 
 extern struct platform __platforms_start;
diff --git a/platforms/astbmc/astbmc.h b/platforms/astbmc/astbmc.h
index fe358b74..c302b607 100644
--- a/platforms/astbmc/astbmc.h
+++ b/platforms/astbmc/astbmc.h
@@ -103,6 +103,7 @@  extern void astbmc_ext_irq_serirq_cpld(unsigned int chip_id);
 extern int pnor_init(void);
 extern void check_all_slot_table(void);
 extern void astbmc_exit(void);
+extern void astbmc_seeprom_update(void);
 
 extern void slot_table_init(const struct slot_table_entry *top_table);
 extern void slot_table_get_slot_info(struct phb *phb, struct pci_device * pd);
diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
index faa73e2f..413613da 100644
--- a/platforms/astbmc/common.c
+++ b/platforms/astbmc/common.c
@@ -26,6 +26,7 @@ 
 #include <bt.h>
 #include <errorlog.h>
 #include <lpc.h>
+#include <timebase.h>
 
 #include "astbmc.h"
 
@@ -168,6 +169,52 @@  int64_t astbmc_ipmi_reboot(void)
 	return ipmi_chassis_control(IPMI_CHASSIS_HARD_RESET);
 }
 
+void astbmc_seeprom_update(void)
+{
+	int flag_set, counter;
+
+	ipmi_get_chassis_boot_opt_request();
+
+	flag_set = ipmi_chassis_check_sbe_validation();
+
+	if (flag_set <= 0) {
+		prlog(PR_DEBUG, "SBE validation flag unset or invalid\n");
+		return;
+	}
+
+	/*
+	 * Flag is set, wait until SBE validation is complete and the flag
+	 * has been reset.
+	 */
+	prlog(PR_WARNING, "SBE validation required, waiting for completion\n");
+	prlog(PR_WARNING, "System will be powered off if validation fails\n");
+	counter = 0;
+
+	while (flag_set > 0) {
+		time_wait_ms(10000);
+		if (++counter >= 3) {
+			/* Let the user know we're alive every 30s */
+			prlog(PR_WARNING, "waiting for completion..\n");
+			counter = 0;
+		}
+
+		ipmi_get_chassis_boot_opt_request();
+		flag_set = ipmi_chassis_check_sbe_validation();
+	}
+
+	/*
+	 * The SBE validation can (will) leave the SBE in a bad state,
+	 * preventing timers from working properly. Reboot so that we
+	 * can boot normally with everything intact.
+	 */
+	prlog(PR_WARNING, "SBE validation complete, rebooting\n");
+	if (platform.cec_reboot)
+		platform.cec_reboot();
+	else
+		abort();
+	while(true);
+}
+
 static void astbmc_fixup_dt_system_id(void)
 {
 	/* Make sure we don't already have one */
diff --git a/platforms/astbmc/garrison.c b/platforms/astbmc/garrison.c
index 5cbe64b5..ddd33721 100644
--- a/platforms/astbmc/garrison.c
+++ b/platforms/astbmc/garrison.c
@@ -305,4 +305,5 @@  DECLARE_PLATFORM(garrison) = {
 	.resource_loaded	= flash_resource_loaded,
 	.exit			= ipmi_wdt_final_reset,
 	.terminate		= ipmi_terminate,
+	.seeprom_update		= astbmc_seeprom_update,
 };
diff --git a/platforms/astbmc/habanero.c b/platforms/astbmc/habanero.c
index 8e11b81e..ab010278 100644
--- a/platforms/astbmc/habanero.c
+++ b/platforms/astbmc/habanero.c
@@ -149,4 +149,5 @@  DECLARE_PLATFORM(habanero) = {
 	.resource_loaded	= flash_resource_loaded,
 	.exit			= ipmi_wdt_final_reset,
 	.terminate		= ipmi_terminate,
+	.seeprom_update		= astbmc_seeprom_update,
 };
diff --git a/platforms/astbmc/p8dnu.c b/platforms/astbmc/p8dnu.c
index 9d42fc43..391aa7a8 100644
--- a/platforms/astbmc/p8dnu.c
+++ b/platforms/astbmc/p8dnu.c
@@ -361,4 +361,5 @@  DECLARE_PLATFORM(p8dnu) = {
 	.resource_loaded	= flash_resource_loaded,
 	.exit			= ipmi_wdt_final_reset,
 	.terminate		= ipmi_terminate,
+	.seeprom_update		= astbmc_seeprom_update,
 };
diff --git a/platforms/astbmc/p8dtu.c b/platforms/astbmc/p8dtu.c
index 69500ea2..6f66dc22 100644
--- a/platforms/astbmc/p8dtu.c
+++ b/platforms/astbmc/p8dtu.c
@@ -262,6 +262,7 @@  DECLARE_PLATFORM(p8dtu1u) = {
 	.resource_loaded	= flash_resource_loaded,
 	.exit			= ipmi_wdt_final_reset,
 	.terminate		= ipmi_terminate,
+	.seeprom_update		= astbmc_seeprom_update,
 };
 
 DECLARE_PLATFORM(p8dtu2u) = {
@@ -279,5 +280,6 @@  DECLARE_PLATFORM(p8dtu2u) = {
 	.resource_loaded	= flash_resource_loaded,
 	.exit			= ipmi_wdt_final_reset,
 	.terminate		= ipmi_terminate,
+	.seeprom_update		= astbmc_seeprom_update,
 };