diff mbox

[v2,1/2] powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform

Message ID 1430991020-7556-2-git-send-email-vipin@linux.vnet.ibm.com (mailing list archive)
State Superseded
Delegated to: Michael Ellerman
Headers show

Commit Message

Vipin K Parashar May 7, 2015, 9:30 a.m. UTC
This patch adds support for FSP EPOW (Early Power Off Warning) and
DPO (Delayed Power Off) events support for PowerNV platform.  EPOW events
are generated by SPCN/FSP due to various critical system conditions that
need system shutdown.  Few examples of these conditions are high ambient
temperature or system running on UPS power with low UPS battery. DPO event
is generated in response to admin initiated system shutdown request.
	This patch enables host kernel on PowerNV platform to handle OPAL
notifications for these events and initiate system poweroff. Since EPOW
notifications are sent in advance of impending shutdown event and thus
this patch also adds functionality to wait for EPOW condition to return to
normal. Host allows MAX_POWEROFF_SYS_TIME (600 seconds) as system
poweroff time (time for host + guests shutdown) and waits for remaining
time for EPOW condition to return to normal. If EPOW condition doesn't
return to normal in calculated time it proceeds with graceful system
shutdown. For EPOW events with smaller timeouts values than
MAX_POWEROFF_SYS_TIME it proceeds with system shutdown without any wait
for EPOW condition to return to normal.
	System admin can also add systemd service shutdown scripts to
perform any specific actions like graceful guest shutdown upon system
poweroff. libvirt-guests is systemd service available on recent distros
for management of guests at system stat/shutdown time.

Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal-api.h            |  30 ++
 arch/powerpc/include/asm/opal.h                |   3 +-
 arch/powerpc/platforms/powernv/opal-power.c    | 379 +++++++++++++++++++++++--
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 4 files changed, 391 insertions(+), 22 deletions(-)

Comments

Joel Stanley May 8, 2015, 1:26 a.m. UTC | #1
Hello Vipin,

On Thu, May 7, 2015 at 7:00 PM, Vipin K Parashar
<vipin@linux.vnet.ibm.com> wrote:
> This patch adds support for FSP EPOW (Early Power Off Warning) and
> DPO (Delayed Power Off) events support for PowerNV platform.

I reviewed this patch for the changes it made to the existing poweroff
code, you still need someone to look at the EPOW code itself.

> Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/opal-api.h            |  30 ++
>  arch/powerpc/include/asm/opal.h                |   3 +-
>  arch/powerpc/platforms/powernv/opal-power.c    | 379 +++++++++++++++++++++++--
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  4 files changed, 391 insertions(+), 22 deletions(-)
>

>  /* Internal functions */
>  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
> diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
> index ac46c2c..7c1b2f8 100644
> --- a/arch/powerpc/platforms/powernv/opal-power.c
> +++ b/arch/powerpc/platforms/powernv/opal-power.c
> @@ -1,5 +1,5 @@
>  /*
> - * PowerNV OPAL power control for graceful shutdown handling
> + * PowerNV poweroff events support
>   *
>   * Copyright 2015 IBM Corp.
>   *
> @@ -9,58 +9,395 @@
>   * 2 of the License, or (at your option) any later version.
>   */
>
> +#define pr_fmt(fmt)    "POWEROFF_EVENT: "    fmt

OPAL_POWER?

> +
>  #include <linux/kernel.h>
> +#include <linux/spinlock.h>
> +#include <linux/timer.h>
>  #include <linux/reboot.h>
> -#include <linux/notifier.h>
> -
> +#include <linux/of.h>
>  #include <asm/opal.h>
>  #include <asm/machdep.h>
>
> -#define SOFT_OFF 0x00
> -#define SOFT_REBOOT 0x01
> +/* Power control event types */
> +#define SOFT_OFF       0x00
> +#define SOFT_REBOOT    0x01

While you're touching this code, I think these should be moved to opal-api.h

> +
> +/* Max time for graceful system shutdown including guests. */
> +#define MAX_POWEROFF_SYS_TIME  600
> +

> +/* IPMI power-control events notifier */
>  static int opal_power_control_event(struct notifier_block *nb,
> -                                   unsigned long msg_type, void *msg)
> +                               unsigned long msg_type, void *msg)
>  {
> -       struct opal_msg *power_msg = msg;
>         uint64_t type;
> +       struct opal_msg *power_msg = msg;
>
>         type = be64_to_cpu(power_msg->params[0]);
>
>         switch (type) {
>         case SOFT_REBOOT:
> -               pr_info("OPAL: reboot requested\n");
> +               pr_info("Reboot requested\n");

I prefer the OPAL prefix.

>                 orderly_reboot();
>                 break;
>         case SOFT_OFF:
> -               pr_info("OPAL: poweroff requested\n");
> +               pr_info("Poweroff requested\n");

Ditto.

>                 orderly_poweroff(true);
>                 break;
>         default:
> -               pr_err("OPAL: power control type unexpected %016llx\n", type);
> +               pr_err("Unknown event %llu\n", type);

Ditto.

>         }
>
>         return 0;
>  }
>
> +/* OPAL EPOW event notifier block */
> +static struct notifier_block opal_epow_nb = {
> +       .notifier_call  = opal_epow_event,
> +       .next           = NULL,
> +       .priority       = 0,
> +};
> +
> +/* OPAL DPO event notifier block */
> +static struct notifier_block opal_dpo_nb = {
> +       .notifier_call  = opal_dpo_event,
> +       .next           = NULL,
> +       .priority       = 0,
> +};
> +
> +/* OPAL Power control events */
>  static struct notifier_block opal_power_control_nb = {
> -       .notifier_call  = opal_power_control_event,
> -       .next           = NULL,
> -       .priority       = 0,
> +       .notifier_call  = opal_power_control_event,
> +       .next           = NULL,
> +       .priority       = 0,
>  };

Looks like you changed the whitespace?

>
> -static int __init opal_power_control_init(void)
> +/* Poweroff events init */
> +static int __init opal_poweroff_events_init(void)

This comment does not add any value.

Renaming the function doesn't add much either.

>  {
>         int ret;
> +       struct device_node *node_epow;
>
> -       ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
> -                                            &opal_power_control_nb);
> -       if (ret) {
> -               pr_err("%s: Can't register OPAL event notifier (%d)\n",
> -                               __func__, ret);
> -               return ret;
> +       /*
> +       * Determine EPOW, DPO support in hardware.
> +       */
> +       node_epow = of_find_node_by_path("/ibm,opal/epow");
> +       if (node_epow) {
> +               if (of_device_is_compatible(node_epow, "ibm,opal-epow")) {
> +                       epow_supported = true;
> +                       dpo_supported = true;

Why are these separate flags? Do we have any systems that will support
EPOW but not DPO, or DPO without EPOW?

I suggest merging them into the one flag.

> +                       pr_info("OPAL EPOW, DPO support detected.\n");
> +               }
> +               of_node_put(node_epow);
> +       }
> +
> +       /* Prepare to handle EPOW events */
> +       if (epow_supported) {
> +               /* Initialize poweroff timer */
> +               init_timer(&epow_timer);
> +               epow_timer.function = epow_poweroff;
> +
> +               /* Register EPOW event notifier */
> +               ret = opal_message_notifier_register(OPAL_MSG_EPOW,
> +                               &opal_epow_nb);
> +               if (ret) {
> +                       pr_err("EPOW event notifier registration failed, "
> +                                       "ret = %d\n", ret);
> +               }
> +       }
> +
> +       if (dpo_supported) {
> +               /* Register DPO event notifier */
> +               ret = opal_message_notifier_register(OPAL_MSG_DPO,
> +                               &opal_dpo_nb);
> +               if (ret)
> +                       pr_err("DPO event notifier registration failed, "
> +                                       "ret = %d\n", ret);
>         }
>
> +       /* Check for any existing EPOW or DPO events. */
> +       process_existing_poweroff_events();
> +
> +       /* Register IPMI poweroff events notifier */
> +       ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
> +                               &opal_power_control_nb);
> +       if (ret)
> +               pr_err("IPMI power-control events notifier registration "
> +                               "failed, ret = %d\n", ret);

The comment and the pr_err message are both incorrect; this is not an
IPMI event but an OPAL event.

> +
> +
> +       pr_info("Poweroff events support initialized\n");

This is unnecessary.

>         return 0;
>  }
> -machine_subsys_initcall(powernv, opal_power_control_init);
> +
> +machine_subsys_initcall(powernv, opal_poweroff_events_init);
> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
> index a7ade94..5d3c8e3 100644
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -249,6 +249,7 @@ OPAL_CALL(opal_pci_reinit,                  OPAL_PCI_REINIT);
>  OPAL_CALL(opal_pci_mask_pe_error,              OPAL_PCI_MASK_PE_ERROR);
>  OPAL_CALL(opal_set_slot_led_status,            OPAL_SET_SLOT_LED_STATUS);
>  OPAL_CALL(opal_get_epow_status,                        OPAL_GET_EPOW_STATUS);
> +OPAL_CALL(opal_get_dpo_status,                 OPAL_GET_DPO_STATUS);
>  OPAL_CALL(opal_set_system_attention_led,       OPAL_SET_SYSTEM_ATTENTION_LED);
>  OPAL_CALL(opal_pci_next_error,                 OPAL_PCI_NEXT_ERROR);
>  OPAL_CALL(opal_pci_poll,                       OPAL_PCI_POLL);
> --
> 1.9.3
>
Triggering May 8, 2015, 7:37 a.m. UTC | #2
Hi Vipin,

These comments are in addition to what Joel has said in his review.

On Thu, May 7, 2015 at 3:00 PM, Vipin K Parashar
<vipin@linux.vnet.ibm.com> wrote:
> This patch adds support for FSP EPOW (Early Power Off Warning) and
> DPO (Delayed Power Off) events support for PowerNV platform.  EPOW events
> are generated by SPCN/FSP due to various critical system conditions that
> need system shutdown.  Few examples of these conditions are high ambient
> temperature or system running on UPS power with low UPS battery. DPO event
> is generated in response to admin initiated system shutdown request.
>         This patch enables host kernel on PowerNV platform to handle OPAL
> notifications for these events and initiate system poweroff. Since EPOW
> notifications are sent in advance of impending shutdown event and thus
> this patch also adds functionality to wait for EPOW condition to return to
> normal. Host allows MAX_POWEROFF_SYS_TIME (600 seconds) as system
> poweroff time (time for host + guests shutdown) and waits for remaining
> time for EPOW condition to return to normal. If EPOW condition doesn't
> return to normal in calculated time it proceeds with graceful system
> shutdown. For EPOW events with smaller timeouts values than
> MAX_POWEROFF_SYS_TIME it proceeds with system shutdown without any wait
> for EPOW condition to return to normal.
>         System admin can also add systemd service shutdown scripts to
> perform any specific actions like graceful guest shutdown upon system
> poweroff. libvirt-guests is systemd service available on recent distros
> for management of guests at system stat/shutdown time.
>
> Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/opal-api.h            |  30 ++
>  arch/powerpc/include/asm/opal.h                |   3 +-
>  arch/powerpc/platforms/powernv/opal-power.c    | 379 +++++++++++++++++++++++--
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  4 files changed, 391 insertions(+), 22 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
> index 0321a90..03b3cef 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -730,6 +730,36 @@ struct opal_i2c_request {
>         __be64 buffer_ra;               /* Buffer real address */
>  };
>
> +/*
> + * EPOW status sharing (OPAL and the host)
> + *
> + * The host will pass on OPAL, a buffer of length OPAL_EPOW_MAX_CLASSES
> + * to fetch system wide EPOW status. Each element in the returned buffer
> + * will contain bitwise EPOW status for each EPOW sub class.
> + */
> +
> +/* EPOW types */
> +enum OpalEpow {
> +       OPAL_EPOW_POWER         = 0,    /* Power EPOW */
> +       OPAL_EPOW_TEMP          = 1,    /* Temperature EPOW */
> +       OPAL_EPOW_COOLING       = 2,    /* Cooling EPOW */
> +       OPAL_MAX_EPOW_CLASSES   = 3,    /* Max EPOW categories */
> +};
Dont explicitly assign sequential numbers in an enum. Its taken care
of by the compiler.

> +
> +/* Power EPOW events */
> +enum OpalEpowPower {
> +       OPAL_EPOW_POWER_UPS     = 0x1, /* System on UPS power */
> +       OPAL_EPOW_POWER_UPS_LOW = 0x2, /* System on UPS power with low battery*/
> +};
> +
> +/* Temperature EPOW events */
> +enum OpalEpowTemp {
> +       OPAL_EPOW_TEMP_HIGH_AMB = 0x1, /* High ambient temperature */
> +       OPAL_EPOW_TEMP_CRIT_AMB = 0x2, /* Critical ambient temperature */
> +       OPAL_EPOW_TEMP_HIGH_INT = 0x4, /* High internal temperature */
> +       OPAL_EPOW_TEMP_CRIT_INT = 0x8, /* Critical internal temperature */
> +};
> +
>  #endif /* __ASSEMBLY__ */
>
>  #endif /* __OPAL_API_H */
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 042af1a..0777864 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -141,7 +141,6 @@ int64_t opal_pci_fence_phb(uint64_t phb_id);
>  int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data);
>  int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action);
>  int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
> -int64_t opal_get_epow_status(__be64 *status);
>  int64_t opal_set_system_attention_led(uint8_t led_action);
>  int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
>                             __be16 *pci_error_type, __be16 *severity);
> @@ -200,6 +199,8 @@ int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf,
>                 uint64_t size, uint64_t token);
>  int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size,
>                 uint64_t token);
> +int32_t opal_get_epow_status(__be32 *status, __be32 *num_classes);
> +int32_t opal_get_dpo_status(__be32 *timeout);
>
>  /* Internal functions */
>  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
> diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
> index ac46c2c..7c1b2f8 100644
> --- a/arch/powerpc/platforms/powernv/opal-power.c
> +++ b/arch/powerpc/platforms/powernv/opal-power.c
> @@ -1,5 +1,5 @@
>  /*
> - * PowerNV OPAL power control for graceful shutdown handling
> + * PowerNV poweroff events support
>   *
>   * Copyright 2015 IBM Corp.
>   *
> @@ -9,58 +9,395 @@
>   * 2 of the License, or (at your option) any later version.
>   */
>
> +#define pr_fmt(fmt)    "POWEROFF_EVENT: "    fmt
> +
>  #include <linux/kernel.h>
> +#include <linux/spinlock.h>
> +#include <linux/timer.h>
>  #include <linux/reboot.h>
> -#include <linux/notifier.h>
> -
> +#include <linux/of.h>
>  #include <asm/opal.h>
>  #include <asm/machdep.h>
>
> -#define SOFT_OFF 0x00
> -#define SOFT_REBOOT 0x01
> +/* Power control event types */
> +#define SOFT_OFF       0x00
> +#define SOFT_REBOOT    0x01
> +
> +/* Max time for graceful system shutdown including guests. */
> +#define MAX_POWEROFF_SYS_TIME  600
> +
> +/* System EPOW, DPO support variables */
> +bool epow_supported = false;
> +bool dpo_supported = false;
Make them static and no need to set them to false explicitly.

> +
> +/* System EPOW status */
> +static u32 epow_status[OPAL_MAX_EPOW_CLASSES];
> +static int num_epow_classes;
> +
> +/* EPOW event timer and corresponding locks */
> +static struct timer_list epow_timer;
> +static DEFINE_SPINLOCK(epow_timer_spinlock);
> +
> +/* EPOW events types */
> +#define EPOW_POWER_UPS         0
> +#define EPOW_POWER_UPS_LOW     1
> +#define EPOW_TEMP_HIGH_AMB     2
> +#define EPOW_TEMP_CRIT_AMB     3
> +#define EPOW_TEMP_HIGH_INT     4
> +#define EPOW_TEMP_CRIT_INT     5
> +#define EPOW_UNKNOWN           6
> +#define MAX_EPOW_EVENTS                7
Can make these constants enum.

> +
> +/* Poweroff EPOW events description */
> +static const char * const epow_events_map[] = {
> +       [EPOW_POWER_UPS]        = "ups",
> +       [EPOW_POWER_UPS_LOW]    = "ups-low",
> +       [EPOW_TEMP_HIGH_AMB]    = "high-ambient-temp",
> +       [EPOW_TEMP_CRIT_AMB]    = "crit-ambient-temp",
> +       [EPOW_TEMP_HIGH_INT]    = "high-internal-temp",
> +       [EPOW_TEMP_CRIT_INT]    = "crit-internal-temp",
> +       [EPOW_UNKNOWN]          = "Unknown",
> +};
> +
> +/* Poweroff EPOW events timeout values in seconds */
> +static const int epow_timeout[] = {
> +       [EPOW_POWER_UPS]        = 900,
> +       [EPOW_POWER_UPS_LOW]    = 20,
> +       [EPOW_TEMP_HIGH_AMB]    = 900,
> +       [EPOW_TEMP_CRIT_AMB]    = 20,
> +       [EPOW_TEMP_HIGH_INT]    = 900,
> +       [EPOW_TEMP_CRIT_INT]    = 20,
> +       [EPOW_UNKNOWN]          = 0,
> +};
> +
> +/* System poweroff function. */
> +static void epow_poweroff(unsigned long event)
> +{
> +       pr_info("Powering off system due to %s EPOW event\n",
> +                               epow_events_map[event]);
> +       orderly_poweroff(true);
> +}
> +
> +/* Start EPOW poweroff timer */
> +static void start_epow_timer(unsigned long event, int32_t timeout)
> +{
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&epow_timer_spinlock, flags);
> +       /* Check for already running epow poweroff timer */
> +       if (timer_pending(&epow_timer)) {
> +               /* Timer for same event */
> +               if (epow_timer.data == event) {
> +                       spin_unlock_irqrestore(&epow_timer_spinlock, flags);
> +                       return;
> +               }
I dont  think this condition is need as the timeout for an event is
fixed and event if this
function is called twice with same event then the condition below will
take care of it.

> +
> +               /* Timer with early poweroff timeout */
> +               if (epow_timer.expires <= (jiffies + timeout * HZ)) {
> +                       event = epow_timer.data;
Load/Store not really needed. You can simply use the epow_timer.data.
The race even if it happens wont be catastrophic.

> +                       spin_unlock_irqrestore(&epow_timer_spinlock, flags);
> +                       pr_info("Poweroff already scheduled for %s EPOW event "
> +                                       "with earlier timeout.\n",
> +                                       epow_events_map[event]);
> +                       return;
> +               }
> +       }
> +
> +       /* Start a new timer/modify existing timer with new timeout value */
> +       epow_timer.data = event;
> +       mod_timer(&epow_timer, jiffies + timeout  * HZ);
> +       spin_unlock_irqrestore(&epow_timer_spinlock, flags);
> +       pr_info("Scheduled system poweroff due to %s EPOW event "
> +                       "after %d seconds\n", epow_events_map[event], timeout);
> +}
> +
> +/* Stop EPOW poweroff timer */
> +static void stop_epow_timer(void)
> +{
> +       int rc;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&epow_timer_spinlock, flags);
> +       rc = del_timer(&epow_timer);
> +       spin_unlock_irqrestore(&epow_timer_spinlock, flags);
> +
> +       if (rc)
> +               pr_info("Poweroff timer deactivated\n");
> +}
>
> +/* Get DPO status */
> +static bool get_dpo_status(int32_t *dpo_timeout)
> +{
> +       int rc;
> +       __be32 opal_dpo_timeout;
> +
> +       rc = opal_get_dpo_status(&opal_dpo_timeout);
> +       if (rc == OPAL_WRONG_STATE) {
> +               *dpo_timeout = 0;
> +               return false;
> +       }
> +
> +       *dpo_timeout = be32_to_cpu(opal_dpo_timeout);
> +       return true;
> +}
> +
> +/* Get EPOW status */
> +static bool get_epow_status(void)
> +{
> +       int i;
> +       bool epow_detected = false;
> +
> +       __be32 opal_epow_status[OPAL_MAX_EPOW_CLASSES] = {0};
> +       __be32 epow_classes;
> +
> +       /* Send kernel EPOW classes supported info to OPAL */
> +       epow_classes = cpu_to_be32(OPAL_MAX_EPOW_CLASSES);
> +
> +       /* Get EPOW events information from OPAL */
> +       opal_get_epow_status(opal_epow_status, &epow_classes);
> +
> +       /* Copy EPOW status obtained from OPAL */
> +       memset(epow_status, 0, sizeof(epow_status));
> +       num_epow_classes = be32_to_cpu(epow_classes);
> +       for (i = 0; i < num_epow_classes; i++) {
> +               epow_status[i] = be32_to_cpu(opal_epow_status[i]);
> +               if (epow_status[i])
> +                       epow_detected = true;
> +       }
> +
> +       pr_info("EPOW classes supported OPAL = %d, Host = %d "
> +                       "EPOW Status = 0x%x, 0x%x, 0x%x\n",
> +                       num_epow_classes, OPAL_MAX_EPOW_CLASSES,
> +                       epow_status[0], epow_status[1], epow_status[2]);
> +
> +       return epow_detected;
> +}
> +
> +/* Process EPOW information */
> +static void process_epow(void)
> +{
> +       int timeout;
> +       unsigned int event = EPOW_UNKNOWN;
> +
> +       /* EPOW events in order of criticality */
> +       if (epow_status[OPAL_EPOW_POWER] & OPAL_EPOW_POWER_UPS) {
> +               pr_info("EPOW due to system running on UPS power\n");
> +               event = EPOW_POWER_UPS;
> +       }
> +
> +       if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_HIGH_AMB) {
> +               pr_info("EPOW due to high ambient temperature\n");
> +               event = EPOW_TEMP_HIGH_AMB;
> +       }
> +
> +       if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_HIGH_INT) {
> +               pr_info("EPOW due to high internal temperature\n");
> +               event = EPOW_TEMP_HIGH_INT;
> +       }
> +
> +       if (epow_status[OPAL_EPOW_POWER] & OPAL_EPOW_POWER_UPS_LOW) {
> +               pr_info("EPOW due to system running on UPS power "
> +                               "with low battery\n");
> +               event = EPOW_POWER_UPS_LOW;
> +       }
> +
> +       if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_CRIT_AMB) {
> +               pr_info("EPOW due to critical ambient temperature\n");
> +               event = EPOW_TEMP_CRIT_AMB;
> +       }
> +
> +       if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_CRIT_INT) {
> +               pr_info("EPOW due to critical internal temperature\n");
> +               event = EPOW_TEMP_CRIT_INT;
> +       }
> +
> +       if (event == EPOW_UNKNOWN) {
> +               pr_err("Unknown EPOW event.\n");
> +               return;
> +       }
Can convert above series of ifs to an if-else ladder with most
critical events checked for first.
In that way you wont be checking against a non critical event when a
critical event has already arrived.

> +
> +       /*
> +       * Check if we have enough time to perform graceful shutdown.
> +       * Wait for EPOW condition to return to normal if we have more
> +       * time than needed for graceful shutdown time. Otherwise proceed
> +       * with immediate system shutdown
> +       */
> +       timeout = epow_timeout[event];
> +       if (timeout <= MAX_POWEROFF_SYS_TIME)
> +               timeout = 0;
> +       else
> +               timeout = timeout - MAX_POWEROFF_SYS_TIME;
> +
> +       /* Start EPOW poweroff timer */
> +       start_epow_timer(event, timeout);
> +}
> +
> +/* Process existing EPOW, DPO events */
> +static void process_existing_poweroff_events(void)
> +{
> +       bool status;
> +       int32_t dpo_timeout;
> +
> +       /* Check for any existing DPO event */
> +       if (dpo_supported) {
> +               status = get_dpo_status(&dpo_timeout);
> +               if (status) {
> +                       pr_info("Existing DPO event detected. "
> +                                       "Powering off system\n");
> +                       orderly_poweroff(true);
> +                       return;
> +               }
> +               pr_info("No existing DPO event detected\n");
> +       }
> +
> +       /* Check for any existing EPOW event */
> +       if (epow_supported) {
> +               status = get_epow_status();
> +               if (status) {
> +                       pr_info("Existing EPOW event detected.\n");
> +                       process_epow();
> +                       return;
> +               }
> +               pr_info("No existing EPOW event detected\n");
> +       }
> +}
> +
> +/* EPOW event notifier */
> +static int opal_epow_event(struct notifier_block *nb,
> +                       unsigned long msg_type, void *msg)
> +{
> +       bool epow_present;
> +
> +       pr_info("EPOW event received\n");
> +
> +       /* Get EPOW event details */
> +       epow_present = get_epow_status();
> +
> +       /* Stop shutdown timer if EPOW condition has returned to normal */
> +       if (!epow_present) {
Can be replaced with  if (!get_epow_status()) eliminating the variable
epow_present.
> +               stop_epow_timer();
> +               return 0;
> +       }
> +
> +       /* Process EPOW event information */
> +       process_epow();
> +
> +       return 0;
> +}
> +
Not much being done in this function above. May be you can think about merging
it with process_epow

> +/* DPO event notifier */
> +static int opal_dpo_event(struct notifier_block *nb,
> +                               unsigned long msg_type, void *msg)
> +{
> +       pr_info("DPO event received. Powering off system as requested\n");
> +       orderly_poweroff(true);
> +
> +       return 0;
> +}
> +
> +/* IPMI power-control events notifier */
>  static int opal_power_control_event(struct notifier_block *nb,
> -                                   unsigned long msg_type, void *msg)
> +                               unsigned long msg_type, void *msg)
>  {
> -       struct opal_msg *power_msg = msg;
>         uint64_t type;
> +       struct opal_msg *power_msg = msg;
>
>         type = be64_to_cpu(power_msg->params[0]);
>
>         switch (type) {
>         case SOFT_REBOOT:
> -               pr_info("OPAL: reboot requested\n");
> +               pr_info("Reboot requested\n");
>                 orderly_reboot();
>                 break;
>         case SOFT_OFF:
> -               pr_info("OPAL: poweroff requested\n");
> +               pr_info("Poweroff requested\n");
>                 orderly_poweroff(true);
>                 break;
>         default:
> -               pr_err("OPAL: power control type unexpected %016llx\n", type);
> +               pr_err("Unknown event %llu\n", type);
>         }
>
>         return 0;
>  }
>
> +/* OPAL EPOW event notifier block */
> +static struct notifier_block opal_epow_nb = {
> +       .notifier_call  = opal_epow_event,
> +       .next           = NULL,
> +       .priority       = 0,
> +};
> +
> +/* OPAL DPO event notifier block */
> +static struct notifier_block opal_dpo_nb = {
> +       .notifier_call  = opal_dpo_event,
> +       .next           = NULL,
> +       .priority       = 0,
> +};
> +
> +/* OPAL Power control events */
>  static struct notifier_block opal_power_control_nb = {
> -       .notifier_call  = opal_power_control_event,
> -       .next           = NULL,
> -       .priority       = 0,
> +       .notifier_call  = opal_power_control_event,
> +       .next           = NULL,
> +       .priority       = 0,
>  };
>
> -static int __init opal_power_control_init(void)
> +/* Poweroff events init */
> +static int __init opal_poweroff_events_init(void)
>  {
>         int ret;
> +       struct device_node *node_epow;
>
> -       ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
> -                                            &opal_power_control_nb);
> -       if (ret) {
> -               pr_err("%s: Can't register OPAL event notifier (%d)\n",
> -                               __func__, ret);
> -               return ret;
> +       /*
> +       * Determine EPOW, DPO support in hardware.
> +       */
> +       node_epow = of_find_node_by_path("/ibm,opal/epow");
> +       if (node_epow) {
> +               if (of_device_is_compatible(node_epow, "ibm,opal-epow")) {
> +                       epow_supported = true;
> +                       dpo_supported = true;
> +                       pr_info("OPAL EPOW, DPO support detected.\n");
> +               }
> +               of_node_put(node_epow);
> +       }
> +
> +       /* Prepare to handle EPOW events */
> +       if (epow_supported) {
> +               /* Initialize poweroff timer */
> +               init_timer(&epow_timer);
> +               epow_timer.function = epow_poweroff;
Variable initialization should be done while defining the variable
itself not in the code.
Consider using DEFINE_TIMER macro to define this.

> +
> +               /* Register EPOW event notifier */
> +               ret = opal_message_notifier_register(OPAL_MSG_EPOW,
> +                               &opal_epow_nb);
> +               if (ret) {
> +                       pr_err("EPOW event notifier registration failed, "
> +                                       "ret = %d\n", ret);
> +               }
> +       }
> +
> +       if (dpo_supported) {
> +               /* Register DPO event notifier */
> +               ret = opal_message_notifier_register(OPAL_MSG_DPO,
> +                               &opal_dpo_nb);
> +               if (ret)
> +                       pr_err("DPO event notifier registration failed, "
> +                                       "ret = %d\n", ret);
>         }
>
> +       /* Check for any existing EPOW or DPO events. */
> +       process_existing_poweroff_events();
Process existing events only after you have registered all the notifiers or you
may miss some OPAL_MSG_SHUTDOWN notifications.

> +
> +       /* Register IPMI poweroff events notifier */
> +       ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
> +                               &opal_power_control_nb);
> +       if (ret)
> +               pr_err("IPMI power-control events notifier registration "
> +                               "failed, ret = %d\n", ret);
> +
> +
> +       pr_info("Poweroff events support initialized\n");
> +
>         return 0;
>  }
> -machine_subsys_initcall(powernv, opal_power_control_init);
> +
> +machine_subsys_initcall(powernv, opal_poweroff_events_init);
> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
> index a7ade94..5d3c8e3 100644
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -249,6 +249,7 @@ OPAL_CALL(opal_pci_reinit,                  OPAL_PCI_REINIT);
>  OPAL_CALL(opal_pci_mask_pe_error,              OPAL_PCI_MASK_PE_ERROR);
>  OPAL_CALL(opal_set_slot_led_status,            OPAL_SET_SLOT_LED_STATUS);
>  OPAL_CALL(opal_get_epow_status,                        OPAL_GET_EPOW_STATUS);
> +OPAL_CALL(opal_get_dpo_status,                 OPAL_GET_DPO_STATUS);
>  OPAL_CALL(opal_set_system_attention_led,       OPAL_SET_SYSTEM_ATTENTION_LED);
>  OPAL_CALL(opal_pci_next_error,                 OPAL_PCI_NEXT_ERROR);
>  OPAL_CALL(opal_pci_poll,                       OPAL_PCI_POLL);
> --
> 1.9.3
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
Michael Ellerman May 11, 2015, 6:49 a.m. UTC | #3
On Thu, 2015-05-07 at 15:00 +0530, Vipin K Parashar wrote:
> This patch adds support for FSP EPOW (Early Power Off Warning) and
> DPO (Delayed Power Off) events support for PowerNV platform.  EPOW events
> are generated by SPCN/FSP due to various critical system conditions that
> need system shutdown.  Few examples of these conditions are high ambient
> temperature or system running on UPS power with low UPS battery. DPO event
> is generated in response to admin initiated system shutdown request.
> 	This patch enables host kernel on PowerNV platform to handle OPAL
> notifications for these events and initiate system poweroff. Since EPOW
> notifications are sent in advance of impending shutdown event and thus
> this patch also adds functionality to wait for EPOW condition to return to
> normal. Host allows MAX_POWEROFF_SYS_TIME (600 seconds) as system
> poweroff time (time for host + guests shutdown) and waits for remaining
> time for EPOW condition to return to normal. If EPOW condition doesn't
> return to normal in calculated time it proceeds with graceful system
> shutdown. For EPOW events with smaller timeouts values than
> MAX_POWEROFF_SYS_TIME it proceeds with system shutdown without any wait
> for EPOW condition to return to normal.


Can I suggest an alternative design:
 - when we recieve a DPO event call orderly_poweroff()
 - when we recieve an EPOW event call orderly_poweroff()

Thoughts?

cheers
Vipin K Parashar May 11, 2015, 7:01 a.m. UTC | #4
Hi Joel,
            Thanks for review. My comments below.

On 05/08/2015 06:56 AM, Joel Stanley wrote:
> Hello Vipin,
>
> On Thu, May 7, 2015 at 7:00 PM, Vipin K Parashar
> <vipin@linux.vnet.ibm.com> wrote:
>> This patch adds support for FSP EPOW (Early Power Off Warning) and
>> DPO (Delayed Power Off) events support for PowerNV platform.
> I reviewed this patch for the changes it made to the existing poweroff
> code, you still need someone to look at the EPOW code itself.
>
>> Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
>> ---
>>   arch/powerpc/include/asm/opal-api.h            |  30 ++
>>   arch/powerpc/include/asm/opal.h                |   3 +-
>>   arch/powerpc/platforms/powernv/opal-power.c    | 379 +++++++++++++++++++++++--
>>   arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>>   4 files changed, 391 insertions(+), 22 deletions(-)
>>
>>   /* Internal functions */
>>   extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
>> diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
>> index ac46c2c..7c1b2f8 100644
>> --- a/arch/powerpc/platforms/powernv/opal-power.c
>> +++ b/arch/powerpc/platforms/powernv/opal-power.c
>> @@ -1,5 +1,5 @@
>>   /*
>> - * PowerNV OPAL power control for graceful shutdown handling
>> + * PowerNV poweroff events support
>>    *
>>    * Copyright 2015 IBM Corp.
>>    *
>> @@ -9,58 +9,395 @@
>>    * 2 of the License, or (at your option) any later version.
>>    */
>>
>> +#define pr_fmt(fmt)    "POWEROFF_EVENT: "    fmt
> OPAL_POWER?

Agreed.

>
>> +
>>   #include <linux/kernel.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/timer.h>
>>   #include <linux/reboot.h>
>> -#include <linux/notifier.h>
>> -
>> +#include <linux/of.h>
>>   #include <asm/opal.h>
>>   #include <asm/machdep.h>
>>
>> -#define SOFT_OFF 0x00
>> -#define SOFT_REBOOT 0x01
>> +/* Power control event types */
>> +#define SOFT_OFF       0x00
>> +#define SOFT_REBOOT    0x01
> While you're touching this code, I think these should be moved to opal-api.h

Sure will do.

>
>> +
>> +/* Max time for graceful system shutdown including guests. */
>> +#define MAX_POWEROFF_SYS_TIME  600
>> +
>> +/* IPMI power-control events notifier */
>>   static int opal_power_control_event(struct notifier_block *nb,
>> -                                   unsigned long msg_type, void *msg)
>> +                               unsigned long msg_type, void *msg)
>>   {
>> -       struct opal_msg *power_msg = msg;
>>          uint64_t type;
>> +       struct opal_msg *power_msg = msg;
>>
>>          type = be64_to_cpu(power_msg->params[0]);
>>
>>          switch (type) {
>>          case SOFT_REBOOT:
>> -               pr_info("OPAL: reboot requested\n");
>> +               pr_info("Reboot requested\n");
> I prefer the OPAL prefix.

OPAL prefix will get added with pr_fmt used above. So separate tag not 
needed.

>
>>                  orderly_reboot();
>>                  break;
>>          case SOFT_OFF:
>> -               pr_info("OPAL: poweroff requested\n");
>> +               pr_info("Poweroff requested\n");
> Ditto.

Same as above.

>
>>                  orderly_poweroff(true);
>>                  break;
>>          default:
>> -               pr_err("OPAL: power control type unexpected %016llx\n", type);
>> +               pr_err("Unknown event %llu\n", type);
> Ditto.

Same as above.

>
>>          }
>>
>>          return 0;
>>   }
>>
>> +/* OPAL EPOW event notifier block */
>> +static struct notifier_block opal_epow_nb = {
>> +       .notifier_call  = opal_epow_event,
>> +       .next           = NULL,
>> +       .priority       = 0,
>> +};
>> +
>> +/* OPAL DPO event notifier block */
>> +static struct notifier_block opal_dpo_nb = {
>> +       .notifier_call  = opal_dpo_event,
>> +       .next           = NULL,
>> +       .priority       = 0,
>> +};
>> +
>> +/* OPAL Power control events */
>>   static struct notifier_block opal_power_control_nb = {
>> -       .notifier_call  = opal_power_control_event,
>> -       .next           = NULL,
>> -       .priority       = 0,
>> +       .notifier_call  = opal_power_control_event,
>> +       .next           = NULL,
>> +       .priority       = 0,
>>   };
> Looks like you changed the whitespace?

Probably otherwise no change needed here.

>
>> -static int __init opal_power_control_init(void)
>> +/* Poweroff events init */
>> +static int __init opal_poweroff_events_init(void)
> This comment does not add any value.
>
> Renaming the function doesn't add much either.

ok. Will retain original name.

>
>>   {
>>          int ret;
>> +       struct device_node *node_epow;
>>
>> -       ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
>> -                                            &opal_power_control_nb);
>> -       if (ret) {
>> -               pr_err("%s: Can't register OPAL event notifier (%d)\n",
>> -                               __func__, ret);
>> -               return ret;
>> +       /*
>> +       * Determine EPOW, DPO support in hardware.
>> +       */
>> +       node_epow = of_find_node_by_path("/ibm,opal/epow");
>> +       if (node_epow) {
>> +               if (of_device_is_compatible(node_epow, "ibm,opal-epow")) {
>> +                       epow_supported = true;
>> +                       dpo_supported = true;
> Why are these separate flags? Do we have any systems that will support
> EPOW but not DPO, or DPO without EPOW?
>
> I suggest merging them into the one flag.

Had introduced two flags as future BMC systems might have EPOW while DPO 
remains FSP specific.
But agree that since today both of these are FSP specific so will merge 
both of them into single flag.

>
>> +                       pr_info("OPAL EPOW, DPO support detected.\n");
>> +               }
>> +               of_node_put(node_epow);
>> +       }
>> +
>> +       /* Prepare to handle EPOW events */
>> +       if (epow_supported) {
>> +               /* Initialize poweroff timer */
>> +               init_timer(&epow_timer);
>> +               epow_timer.function = epow_poweroff;
>> +
>> +               /* Register EPOW event notifier */
>> +               ret = opal_message_notifier_register(OPAL_MSG_EPOW,
>> +                               &opal_epow_nb);
>> +               if (ret) {
>> +                       pr_err("EPOW event notifier registration failed, "
>> +                                       "ret = %d\n", ret);
>> +               }
>> +       }
>> +
>> +       if (dpo_supported) {
>> +               /* Register DPO event notifier */
>> +               ret = opal_message_notifier_register(OPAL_MSG_DPO,
>> +                               &opal_dpo_nb);
>> +               if (ret)
>> +                       pr_err("DPO event notifier registration failed, "
>> +                                       "ret = %d\n", ret);
>>          }
>>
>> +       /* Check for any existing EPOW or DPO events. */
>> +       process_existing_poweroff_events();
>> +
>> +       /* Register IPMI poweroff events notifier */
>> +       ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
>> +                               &opal_power_control_nb);
>> +       if (ret)
>> +               pr_err("IPMI power-control events notifier registration "
>> +                               "failed, ret = %d\n", ret);
> The comment and the pr_err message are both incorrect; this is not an
> IPMI event but an OPAL event.

Will make them just power-control. OPAL will anyway get added with 
PREFIX mentioned above

>
>> +
>> +
>> +       pr_info("Poweroff events support initialized\n");
> This is unnecessary.

hmm, just a informational message indicating kernel support for 
power-control and power-off events.
Comes handy for quickly checking support via kernel logs.

>
>>          return 0;
>>   }
>> -machine_subsys_initcall(powernv, opal_power_control_init);
>> +
>> +machine_subsys_initcall(powernv, opal_poweroff_events_init);
>> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
>> index a7ade94..5d3c8e3 100644
>> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
>> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
>> @@ -249,6 +249,7 @@ OPAL_CALL(opal_pci_reinit,                  OPAL_PCI_REINIT);
>>   OPAL_CALL(opal_pci_mask_pe_error,              OPAL_PCI_MASK_PE_ERROR);
>>   OPAL_CALL(opal_set_slot_led_status,            OPAL_SET_SLOT_LED_STATUS);
>>   OPAL_CALL(opal_get_epow_status,                        OPAL_GET_EPOW_STATUS);
>> +OPAL_CALL(opal_get_dpo_status,                 OPAL_GET_DPO_STATUS);
>>   OPAL_CALL(opal_set_system_attention_led,       OPAL_SET_SYSTEM_ATTENTION_LED);
>>   OPAL_CALL(opal_pci_next_error,                 OPAL_PCI_NEXT_ERROR);
>>   OPAL_CALL(opal_pci_poll,                       OPAL_PCI_POLL);
>> --
>> 1.9.3
>>
Vipin K Parashar May 11, 2015, 9:01 a.m. UTC | #5
On 05/11/2015 12:19 PM, Michael Ellerman wrote:
> On Thu, 2015-05-07 at 15:00 +0530, Vipin K Parashar wrote:
>> This patch adds support for FSP EPOW (Early Power Off Warning) and
>> DPO (Delayed Power Off) events support for PowerNV platform.  EPOW events
>> are generated by SPCN/FSP due to various critical system conditions that
>> need system shutdown.  Few examples of these conditions are high ambient
>> temperature or system running on UPS power with low UPS battery. DPO event
>> is generated in response to admin initiated system shutdown request.
>> 	This patch enables host kernel on PowerNV platform to handle OPAL
>> notifications for these events and initiate system poweroff. Since EPOW
>> notifications are sent in advance of impending shutdown event and thus
>> this patch also adds functionality to wait for EPOW condition to return to
>> normal. Host allows MAX_POWEROFF_SYS_TIME (600 seconds) as system
>> poweroff time (time for host + guests shutdown) and waits for remaining
>> time for EPOW condition to return to normal. If EPOW condition doesn't
>> return to normal in calculated time it proceeds with graceful system
>> shutdown. For EPOW events with smaller timeouts values than
>> MAX_POWEROFF_SYS_TIME it proceeds with system shutdown without any wait
>> for EPOW condition to return to normal.
>
> Can I suggest an alternative design:
>   - when we recieve a DPO event call orderly_poweroff()
>   - when we recieve an EPOW event call orderly_poweroff()
>
> Thoughts?

Current design is calling orderly_poweroff immediately upon DPO event
as there is not need to wait for user initiated shutdowns.
     EPOW is sent in anticipation of a poweroff needed ahead of time. A 
typical example
is EPOW due to system on UPS power with 15 mins timeout. There could be case
when power is restored back within timeout and a poweroff is not needed. 
In such case
HW sends EPOW reset informing that EPOW condition has returned to normal.
     Another example is EPOW due to high ambient temp with 15 mins timeout.
Here too if temp goes down to manageable limits within timeout window, 
HW sends EPOW reset
to avoid shutdown.
     So to handle such cases current design implements wait for
HW Timeout - MAX_POWEROFF_SYS_TIME seconds before poweroff. If EPOW 
condition
returns to normal within this time poweroff is cancelled. So immediate 
poweroff is avoided
here to handle cases when EPOW condition returns to  normal.
     For EPOW cases (like system on UPS power with UPS battery low or 
Ambient temp
critically high) which have timeouts lower than MAX_POWEROFF_SYS 
seconds, it calls immediate
orderly_poweroff.
      In concise design implements immediate orderly_poweroff for DPO as 
well as EPOW cases when
timeout is less than  MAX_POWEROFF_SYS_TIME while for EPOW cases with 
MAX_POWEROFF_SYS_TIME
timeout it implements a wait for EPOW to return the normal.

Suggestions/thoughts ?

--Vipin
> cheers
>
>
Vipin K Parashar May 11, 2015, 10:47 a.m. UTC | #6
On 05/11/2015 02:31 PM, Vipin K Parashar wrote:
>
> On 05/11/2015 12:19 PM, Michael Ellerman wrote:
>> On Thu, 2015-05-07 at 15:00 +0530, Vipin K Parashar wrote:
>>> This patch adds support for FSP EPOW (Early Power Off Warning) and
>>> DPO (Delayed Power Off) events support for PowerNV platform. EPOW 
>>> events
>>> are generated by SPCN/FSP due to various critical system conditions 
>>> that
>>> need system shutdown.  Few examples of these conditions are high 
>>> ambient
>>> temperature or system running on UPS power with low UPS battery. DPO 
>>> event
>>> is generated in response to admin initiated system shutdown request.
>>>     This patch enables host kernel on PowerNV platform to handle OPAL
>>> notifications for these events and initiate system poweroff. Since EPOW
>>> notifications are sent in advance of impending shutdown event and thus
>>> this patch also adds functionality to wait for EPOW condition to 
>>> return to
>>> normal. Host allows MAX_POWEROFF_SYS_TIME (600 seconds) as system
>>> poweroff time (time for host + guests shutdown) and waits for remaining
>>> time for EPOW condition to return to normal. If EPOW condition doesn't
>>> return to normal in calculated time it proceeds with graceful system
>>> shutdown. For EPOW events with smaller timeouts values than
>>> MAX_POWEROFF_SYS_TIME it proceeds with system shutdown without any wait
>>> for EPOW condition to return to normal.
>>
>> Can I suggest an alternative design:
>>   - when we recieve a DPO event call orderly_poweroff()
>>   - when we recieve an EPOW event call orderly_poweroff()
>>
>> Thoughts?
>
> Current design is calling orderly_poweroff immediately upon DPO event
> as there is not need to wait for user initiated shutdowns.
>     EPOW is sent in anticipation of a poweroff needed ahead of time. A 
> typical example
> is EPOW due to system on UPS power with 15 mins timeout. There could 
> be case
> when power is restored back within timeout and a poweroff is not 
> needed. In such case
> HW sends EPOW reset informing that EPOW condition has returned to normal.
>     Another example is EPOW due to high ambient temp with 15 mins 
> timeout.
> Here too if temp goes down to manageable limits within timeout window, 
> HW sends EPOW reset
> to avoid shutdown.
>     So to handle such cases current design implements wait for
> HW Timeout - MAX_POWEROFF_SYS_TIME seconds before poweroff. If EPOW 
> condition
> returns to normal within this time poweroff is cancelled. So immediate 
> poweroff is avoided
> here to handle cases when EPOW condition returns to  normal.
>     For EPOW cases (like system on UPS power with UPS battery low or 
> Ambient temp
> critically high) which have timeouts lower than MAX_POWEROFF_SYS 
> seconds, it calls immediate
> orderly_poweroff.
>      In concise design implements immediate orderly_poweroff for DPO 
> as well as EPOW cases when
> timeout is less than  MAX_POWEROFF_SYS_TIME while for EPOW cases with 
> MAX_POWEROFF_SYS_TIME
> timeout it implements a wait for EPOW to return the normal.
Correction as below for above line:
     ........... it implements a wait for EPOW to return to normal 
followed by a orderly_poweroff after wait time.

>
> Suggestions/thoughts ?
>
> --Vipin
>> cheers
>>
>>
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
Stewart Smith May 11, 2015, 10:31 p.m. UTC | #7
trigg <mr.triggtrigg@gmail.com> writes:
>> --- a/arch/powerpc/include/asm/opal-api.h
>> +++ b/arch/powerpc/include/asm/opal-api.h
>> @@ -730,6 +730,36 @@ struct opal_i2c_request {
>>         __be64 buffer_ra;               /* Buffer real address */
>>  };
>>
>> +/*
>> + * EPOW status sharing (OPAL and the host)
>> + *
>> + * The host will pass on OPAL, a buffer of length OPAL_EPOW_MAX_CLASSES
>> + * to fetch system wide EPOW status. Each element in the returned buffer
>> + * will contain bitwise EPOW status for each EPOW sub class.
>> + */
>> +
>> +/* EPOW types */
>> +enum OpalEpow {
>> +       OPAL_EPOW_POWER         = 0,    /* Power EPOW */
>> +       OPAL_EPOW_TEMP          = 1,    /* Temperature EPOW */
>> +       OPAL_EPOW_COOLING       = 2,    /* Cooling EPOW */
>> +       OPAL_MAX_EPOW_CLASSES   = 3,    /* Max EPOW categories */
>> +};
> Dont explicitly assign sequential numbers in an enum. Its taken care
> of by the compiler.

This header is shared with firmware, and the exact values of each item
does matter as it's ABI. The explicit numbers means that people think
twice before inserting a new value in the middle and subtley breaking
firmware ABI.
Triggering May 12, 2015, 5:09 p.m. UTC | #8
Accidentally hit the reply button instead of reply all. Re sending.

> On 12-May-2015, at 11:55, trigg <mr.triggtrigg@gmail.com> wrote:
> 
> Hi Stewart,
> 
> On Tue, May 12, 2015 at 4:01 AM, Stewart Smith
> <stewart@linux.vnet.ibm.com> wrote:
>> trigg <mr.triggtrigg@gmail.com> writes:
>>>> --- a/arch/powerpc/include/asm/opal-api.h
>>>> +++ b/arch/powerpc/include/asm/opal-api.h
>>>> @@ -730,6 +730,36 @@ struct opal_i2c_request {
>>>>        __be64 buffer_ra;               /* Buffer real address */
>>>> };
>>>> 
>>>> +/*
>>>> + * EPOW status sharing (OPAL and the host)
>>>> + *
>>>> + * The host will pass on OPAL, a buffer of length OPAL_EPOW_MAX_CLASSES
>>>> + * to fetch system wide EPOW status. Each element in the returned buffer
>>>> + * will contain bitwise EPOW status for each EPOW sub class.
>>>> + */
>>>> +
>>>> +/* EPOW types */
>>>> +enum OpalEpow {
>>>> +       OPAL_EPOW_POWER         = 0,    /* Power EPOW */
>>>> +       OPAL_EPOW_TEMP          = 1,    /* Temperature EPOW */
>>>> +       OPAL_EPOW_COOLING       = 2,    /* Cooling EPOW */
>>>> +       OPAL_MAX_EPOW_CLASSES   = 3,    /* Max EPOW categories */
>>>> +};
>>> Dont explicitly assign sequential numbers in an enum. Its taken care
>>> of by the compiler.
>> 
>> This header is shared with firmware, and the exact values of each item
>> does matter as it's ABI.
> 
> This enum has no semantic difference with a series of #defines. So you
> are better
> off using #defines for this. Using enums with explicitly defined
> values beats the whole
> purpose of using them.
> 
>> The explicit numbers means that people think
>> twice before inserting a new value in the middle and subtley breaking
>> firmware ABI.
> 
> I disagree. It wont prevent (even worsen) errors like two
> enum-constants being explicitly defined to same values.
> With implicit numbering you can atleast be sure that each
> enum-constant will be unique.
Vipin K Parashar May 13, 2015, 8:17 p.m. UTC | #9
On 05/11/2015 12:19 PM, Michael Ellerman wrote:
> On Thu, 2015-05-07 at 15:00 +0530, Vipin K Parashar wrote:
>> This patch adds support for FSP EPOW (Early Power Off Warning) and
>> DPO (Delayed Power Off) events support for PowerNV platform.  EPOW events
>> are generated by SPCN/FSP due to various critical system conditions that
>> need system shutdown.  Few examples of these conditions are high ambient
>> temperature or system running on UPS power with low UPS battery. DPO event
>> is generated in response to admin initiated system shutdown request.
>> 	This patch enables host kernel on PowerNV platform to handle OPAL
>> notifications for these events and initiate system poweroff. Since EPOW
>> notifications are sent in advance of impending shutdown event and thus
>> this patch also adds functionality to wait for EPOW condition to return to
>> normal. Host allows MAX_POWEROFF_SYS_TIME (600 seconds) as system
>> poweroff time (time for host + guests shutdown) and waits for remaining
>> time for EPOW condition to return to normal. If EPOW condition doesn't
>> return to normal in calculated time it proceeds with graceful system
>> shutdown. For EPOW events with smaller timeouts values than
>> MAX_POWEROFF_SYS_TIME it proceeds with system shutdown without any wait
>> for EPOW condition to return to normal.
>
> Can I suggest an alternative design:
>   - when we recieve a DPO event call orderly_poweroff()
>   - when we recieve an EPOW event call orderly_poweroff()
>
> Thoughts?

Will make changes as per suggested design above and sent out patch.

>
> cheers
>
>
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 0321a90..03b3cef 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -730,6 +730,36 @@  struct opal_i2c_request {
 	__be64 buffer_ra;		/* Buffer real address */
 };
 
+/*
+ * EPOW status sharing (OPAL and the host)
+ *
+ * The host will pass on OPAL, a buffer of length OPAL_EPOW_MAX_CLASSES
+ * to fetch system wide EPOW status. Each element in the returned buffer
+ * will contain bitwise EPOW status for each EPOW sub class.
+ */
+
+/* EPOW types */
+enum OpalEpow {
+	OPAL_EPOW_POWER         = 0,    /* Power EPOW */
+	OPAL_EPOW_TEMP          = 1,    /* Temperature EPOW */
+	OPAL_EPOW_COOLING       = 2,    /* Cooling EPOW */
+	OPAL_MAX_EPOW_CLASSES   = 3,    /* Max EPOW categories */
+};
+
+/* Power EPOW events */
+enum OpalEpowPower {
+	OPAL_EPOW_POWER_UPS     = 0x1, /* System on UPS power */
+	OPAL_EPOW_POWER_UPS_LOW = 0x2, /* System on UPS power with low battery*/
+};
+
+/* Temperature EPOW events */
+enum OpalEpowTemp {
+	OPAL_EPOW_TEMP_HIGH_AMB = 0x1, /* High ambient temperature */
+	OPAL_EPOW_TEMP_CRIT_AMB = 0x2, /* Critical ambient temperature */
+	OPAL_EPOW_TEMP_HIGH_INT = 0x4, /* High internal temperature */
+	OPAL_EPOW_TEMP_CRIT_INT = 0x8, /* Critical internal temperature */
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 042af1a..0777864 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -141,7 +141,6 @@  int64_t opal_pci_fence_phb(uint64_t phb_id);
 int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data);
 int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action);
 int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
-int64_t opal_get_epow_status(__be64 *status);
 int64_t opal_set_system_attention_led(uint8_t led_action);
 int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
 			    __be16 *pci_error_type, __be16 *severity);
@@ -200,6 +199,8 @@  int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf,
 		uint64_t size, uint64_t token);
 int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size,
 		uint64_t token);
+int32_t opal_get_epow_status(__be32 *status, __be32 *num_classes);
+int32_t opal_get_dpo_status(__be32 *timeout);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
index ac46c2c..7c1b2f8 100644
--- a/arch/powerpc/platforms/powernv/opal-power.c
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -1,5 +1,5 @@ 
 /*
- * PowerNV OPAL power control for graceful shutdown handling
+ * PowerNV poweroff events support
  *
  * Copyright 2015 IBM Corp.
  *
@@ -9,58 +9,395 @@ 
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt)	"POWEROFF_EVENT: "    fmt
+
 #include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
 #include <linux/reboot.h>
-#include <linux/notifier.h>
-
+#include <linux/of.h>
 #include <asm/opal.h>
 #include <asm/machdep.h>
 
-#define SOFT_OFF 0x00
-#define SOFT_REBOOT 0x01
+/* Power control event types */
+#define SOFT_OFF	0x00
+#define SOFT_REBOOT	0x01
+
+/* Max time for graceful system shutdown including guests. */
+#define MAX_POWEROFF_SYS_TIME	600
+
+/* System EPOW, DPO support variables */
+bool epow_supported = false;
+bool dpo_supported = false;
+
+/* System EPOW status */
+static u32 epow_status[OPAL_MAX_EPOW_CLASSES];
+static int num_epow_classes;
+
+/* EPOW event timer and corresponding locks */
+static struct timer_list epow_timer;
+static DEFINE_SPINLOCK(epow_timer_spinlock);
+
+/* EPOW events types */
+#define EPOW_POWER_UPS		0
+#define EPOW_POWER_UPS_LOW	1
+#define EPOW_TEMP_HIGH_AMB	2
+#define EPOW_TEMP_CRIT_AMB	3
+#define EPOW_TEMP_HIGH_INT	4
+#define EPOW_TEMP_CRIT_INT	5
+#define EPOW_UNKNOWN		6
+#define MAX_EPOW_EVENTS		7
+
+/* Poweroff EPOW events description */
+static const char * const epow_events_map[] = {
+	[EPOW_POWER_UPS]	= "ups",
+	[EPOW_POWER_UPS_LOW]	= "ups-low",
+	[EPOW_TEMP_HIGH_AMB]	= "high-ambient-temp",
+	[EPOW_TEMP_CRIT_AMB]	= "crit-ambient-temp",
+	[EPOW_TEMP_HIGH_INT]	= "high-internal-temp",
+	[EPOW_TEMP_CRIT_INT]	= "crit-internal-temp",
+	[EPOW_UNKNOWN]		= "Unknown",
+};
+
+/* Poweroff EPOW events timeout values in seconds */
+static const int epow_timeout[] = {
+	[EPOW_POWER_UPS]	= 900,
+	[EPOW_POWER_UPS_LOW]	= 20,
+	[EPOW_TEMP_HIGH_AMB]	= 900,
+	[EPOW_TEMP_CRIT_AMB]	= 20,
+	[EPOW_TEMP_HIGH_INT]	= 900,
+	[EPOW_TEMP_CRIT_INT]	= 20,
+	[EPOW_UNKNOWN]		= 0,
+};
+
+/* System poweroff function. */
+static void epow_poweroff(unsigned long event)
+{
+	pr_info("Powering off system due to %s EPOW event\n",
+				epow_events_map[event]);
+	orderly_poweroff(true);
+}
+
+/* Start EPOW poweroff timer */
+static void start_epow_timer(unsigned long event, int32_t timeout)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&epow_timer_spinlock, flags);
+	/* Check for already running epow poweroff timer */
+	if (timer_pending(&epow_timer)) {
+		/* Timer for same event */
+		if (epow_timer.data == event) {
+			spin_unlock_irqrestore(&epow_timer_spinlock, flags);
+			return;
+		}
+
+		/* Timer with early poweroff timeout */
+		if (epow_timer.expires <= (jiffies + timeout * HZ)) {
+			event = epow_timer.data;
+			spin_unlock_irqrestore(&epow_timer_spinlock, flags);
+			pr_info("Poweroff already scheduled for %s EPOW event "
+					"with earlier timeout.\n",
+					epow_events_map[event]);
+			return;
+		}
+	}
+
+	/* Start a new timer/modify existing timer with new timeout value */
+	epow_timer.data = event;
+	mod_timer(&epow_timer, jiffies + timeout  * HZ);
+	spin_unlock_irqrestore(&epow_timer_spinlock, flags);
+	pr_info("Scheduled system poweroff due to %s EPOW event "
+			"after %d seconds\n", epow_events_map[event], timeout);
+}
+
+/* Stop EPOW poweroff timer */
+static void stop_epow_timer(void)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&epow_timer_spinlock, flags);
+	rc = del_timer(&epow_timer);
+	spin_unlock_irqrestore(&epow_timer_spinlock, flags);
+
+	if (rc)
+		pr_info("Poweroff timer deactivated\n");
+}
 
+/* Get DPO status */
+static bool get_dpo_status(int32_t *dpo_timeout)
+{
+	int rc;
+	__be32 opal_dpo_timeout;
+
+	rc = opal_get_dpo_status(&opal_dpo_timeout);
+	if (rc == OPAL_WRONG_STATE) {
+		*dpo_timeout = 0;
+		return false;
+	}
+
+	*dpo_timeout = be32_to_cpu(opal_dpo_timeout);
+	return true;
+}
+
+/* Get EPOW status */
+static bool get_epow_status(void)
+{
+	int i;
+	bool epow_detected = false;
+
+	__be32 opal_epow_status[OPAL_MAX_EPOW_CLASSES] = {0};
+	__be32 epow_classes;
+
+	/* Send kernel EPOW classes supported info to OPAL */
+	epow_classes = cpu_to_be32(OPAL_MAX_EPOW_CLASSES);
+
+	/* Get EPOW events information from OPAL */
+	opal_get_epow_status(opal_epow_status, &epow_classes);
+
+	/* Copy EPOW status obtained from OPAL */
+	memset(epow_status, 0, sizeof(epow_status));
+	num_epow_classes = be32_to_cpu(epow_classes);
+	for (i = 0; i < num_epow_classes; i++) {
+		epow_status[i] = be32_to_cpu(opal_epow_status[i]);
+		if (epow_status[i])
+			epow_detected = true;
+	}
+
+	pr_info("EPOW classes supported OPAL = %d, Host = %d "
+			"EPOW Status = 0x%x, 0x%x, 0x%x\n",
+			num_epow_classes, OPAL_MAX_EPOW_CLASSES,
+			epow_status[0], epow_status[1], epow_status[2]);
+
+	return epow_detected;
+}
+
+/* Process EPOW information */
+static void process_epow(void)
+{
+	int timeout;
+	unsigned int event = EPOW_UNKNOWN;
+
+	/* EPOW events in order of criticality */
+	if (epow_status[OPAL_EPOW_POWER] & OPAL_EPOW_POWER_UPS) {
+		pr_info("EPOW due to system running on UPS power\n");
+		event = EPOW_POWER_UPS;
+	}
+
+	if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_HIGH_AMB) {
+		pr_info("EPOW due to high ambient temperature\n");
+		event = EPOW_TEMP_HIGH_AMB;
+	}
+
+	if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_HIGH_INT) {
+		pr_info("EPOW due to high internal temperature\n");
+		event = EPOW_TEMP_HIGH_INT;
+	}
+
+	if (epow_status[OPAL_EPOW_POWER] & OPAL_EPOW_POWER_UPS_LOW) {
+		pr_info("EPOW due to system running on UPS power "
+				"with low battery\n");
+		event = EPOW_POWER_UPS_LOW;
+	}
+
+	if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_CRIT_AMB) {
+		pr_info("EPOW due to critical ambient temperature\n");
+		event = EPOW_TEMP_CRIT_AMB;
+	}
+
+	if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_CRIT_INT) {
+		pr_info("EPOW due to critical internal temperature\n");
+		event = EPOW_TEMP_CRIT_INT;
+	}
+
+	if (event == EPOW_UNKNOWN) {
+		pr_err("Unknown EPOW event.\n");
+		return;
+	}
+
+	/*
+	* Check if we have enough time to perform graceful shutdown.
+	* Wait for EPOW condition to return to normal if we have more
+	* time than needed for graceful shutdown time. Otherwise proceed
+	* with immediate system shutdown
+	*/
+	timeout = epow_timeout[event];
+	if (timeout <= MAX_POWEROFF_SYS_TIME)
+		timeout = 0;
+	else
+		timeout = timeout - MAX_POWEROFF_SYS_TIME;
+
+	/* Start EPOW poweroff timer */
+	start_epow_timer(event, timeout);
+}
+
+/* Process existing EPOW, DPO events */
+static void process_existing_poweroff_events(void)
+{
+	bool status;
+	int32_t dpo_timeout;
+
+	/* Check for any existing DPO event */
+	if (dpo_supported) {
+		status = get_dpo_status(&dpo_timeout);
+		if (status) {
+			pr_info("Existing DPO event detected. "
+					"Powering off system\n");
+			orderly_poweroff(true);
+			return;
+		}
+		pr_info("No existing DPO event detected\n");
+	}
+
+	/* Check for any existing EPOW event */
+	if (epow_supported) {
+		status = get_epow_status();
+		if (status) {
+			pr_info("Existing EPOW event detected.\n");
+			process_epow();
+			return;
+		}
+		pr_info("No existing EPOW event detected\n");
+	}
+}
+
+/* EPOW event notifier */
+static int opal_epow_event(struct notifier_block *nb,
+			unsigned long msg_type, void *msg)
+{
+	bool epow_present;
+
+	pr_info("EPOW event received\n");
+
+	/* Get EPOW event details */
+	epow_present = get_epow_status();
+
+	/* Stop shutdown timer if EPOW condition has returned to normal */
+	if (!epow_present) {
+		stop_epow_timer();
+		return 0;
+	}
+
+	/* Process EPOW event information */
+	process_epow();
+
+	return 0;
+}
+
+/* DPO event notifier */
+static int opal_dpo_event(struct notifier_block *nb,
+				unsigned long msg_type, void *msg)
+{
+	pr_info("DPO event received. Powering off system as requested\n");
+	orderly_poweroff(true);
+
+	return 0;
+}
+
+/* IPMI power-control events notifier */
 static int opal_power_control_event(struct notifier_block *nb,
-				    unsigned long msg_type, void *msg)
+				unsigned long msg_type, void *msg)
 {
-	struct opal_msg *power_msg = msg;
 	uint64_t type;
+	struct opal_msg *power_msg = msg;
 
 	type = be64_to_cpu(power_msg->params[0]);
 
 	switch (type) {
 	case SOFT_REBOOT:
-		pr_info("OPAL: reboot requested\n");
+		pr_info("Reboot requested\n");
 		orderly_reboot();
 		break;
 	case SOFT_OFF:
-		pr_info("OPAL: poweroff requested\n");
+		pr_info("Poweroff requested\n");
 		orderly_poweroff(true);
 		break;
 	default:
-		pr_err("OPAL: power control type unexpected %016llx\n", type);
+		pr_err("Unknown event %llu\n", type);
 	}
 
 	return 0;
 }
 
+/* OPAL EPOW event notifier block */
+static struct notifier_block opal_epow_nb = {
+	.notifier_call  = opal_epow_event,
+	.next           = NULL,
+	.priority       = 0,
+};
+
+/* OPAL DPO event notifier block */
+static struct notifier_block opal_dpo_nb = {
+	.notifier_call  = opal_dpo_event,
+	.next           = NULL,
+	.priority       = 0,
+};
+
+/* OPAL Power control events */
 static struct notifier_block opal_power_control_nb = {
-	.notifier_call	= opal_power_control_event,
-	.next		= NULL,
-	.priority	= 0,
+	.notifier_call  = opal_power_control_event,
+	.next           = NULL,
+	.priority       = 0,
 };
 
-static int __init opal_power_control_init(void)
+/* Poweroff events init */
+static int __init opal_poweroff_events_init(void)
 {
 	int ret;
+	struct device_node *node_epow;
 
-	ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
-					     &opal_power_control_nb);
-	if (ret) {
-		pr_err("%s: Can't register OPAL event notifier (%d)\n",
-				__func__, ret);
-		return ret;
+	/*
+	* Determine EPOW, DPO support in hardware.
+	*/
+	node_epow = of_find_node_by_path("/ibm,opal/epow");
+	if (node_epow) {
+		if (of_device_is_compatible(node_epow, "ibm,opal-epow")) {
+			epow_supported = true;
+			dpo_supported = true;
+			pr_info("OPAL EPOW, DPO support detected.\n");
+		}
+		of_node_put(node_epow);
+	}
+
+	/* Prepare to handle EPOW events */
+	if (epow_supported) {
+		/* Initialize poweroff timer */
+		init_timer(&epow_timer);
+		epow_timer.function = epow_poweroff;
+
+		/* Register EPOW event notifier */
+		ret = opal_message_notifier_register(OPAL_MSG_EPOW,
+				&opal_epow_nb);
+		if (ret) {
+			pr_err("EPOW event notifier registration failed, "
+					"ret = %d\n", ret);
+		}
+	}
+
+	if (dpo_supported) {
+		/* Register DPO event notifier */
+		ret = opal_message_notifier_register(OPAL_MSG_DPO,
+				&opal_dpo_nb);
+		if (ret)
+			pr_err("DPO event notifier registration failed, "
+					"ret = %d\n", ret);
 	}
 
+	/* Check for any existing EPOW or DPO events. */
+	process_existing_poweroff_events();
+
+	/* Register IPMI poweroff events notifier */
+	ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
+				&opal_power_control_nb);
+	if (ret)
+		pr_err("IPMI power-control events notifier registration "
+				"failed, ret = %d\n", ret);
+
+
+	pr_info("Poweroff events support initialized\n");
+
 	return 0;
 }
-machine_subsys_initcall(powernv, opal_power_control_init);
+
+machine_subsys_initcall(powernv, opal_poweroff_events_init);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a7ade94..5d3c8e3 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -249,6 +249,7 @@  OPAL_CALL(opal_pci_reinit,			OPAL_PCI_REINIT);
 OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
 OPAL_CALL(opal_set_slot_led_status,		OPAL_SET_SLOT_LED_STATUS);
 OPAL_CALL(opal_get_epow_status,			OPAL_GET_EPOW_STATUS);
+OPAL_CALL(opal_get_dpo_status,			OPAL_GET_DPO_STATUS);
 OPAL_CALL(opal_set_system_attention_led,	OPAL_SET_SYSTEM_ATTENTION_LED);
 OPAL_CALL(opal_pci_next_error,			OPAL_PCI_NEXT_ERROR);
 OPAL_CALL(opal_pci_poll,			OPAL_PCI_POLL);