diff mbox

[v2,4/4] ipmi/wdt: Add ipmi watchdog timer support

Message ID 1423026466-31386-5-git-send-email-alistair@popple.id.au
State Accepted
Headers show

Commit Message

Alistair Popple Feb. 4, 2015, 5:07 a.m. UTC
Add support for an ipmi watchdog timer. During skiboot initialisation
this patch will cause the system to be reset if opal_run_pollers()
isn't called for more than 60 seconds.

Once the payload is started the watchdog timer will be reset and a
pre-timeout interrupt set. The payload should then receive the
interrupt and call into skiboot which will disable the watchdog timer.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---
 core/init.c                 |   5 +-
 core/ipmi.c                 |   6 +++
 hw/ipmi/Makefile.inc        |   1 +
 hw/ipmi/ipmi-watchdog.c     | 126 ++++++++++++++++++++++++++++++++++++++++++++
 include/ipmi.h              |  13 +++++
 platforms/astbmc/common.c   |   1 +
 platforms/astbmc/palmetto.c |   1 +
 7 files changed, 151 insertions(+), 2 deletions(-)
 create mode 100644 hw/ipmi/ipmi-watchdog.c

Comments

Joel Stanley Feb. 5, 2015, 5:19 a.m. UTC | #1
On Wed, Feb 4, 2015 at 3:37 PM, Alistair Popple <alistair@popple.id.au> wrote:
> Add support for an ipmi watchdog timer. During skiboot initialisation
> this patch will cause the system to be reset if opal_run_pollers()
> isn't called for more than 60 seconds.

Looks good. A question below about clearing the message flags.

> Once the payload is started the watchdog timer will be reset and a
> pre-timeout interrupt set. The payload should then receive the
> interrupt and call into skiboot which will disable the watchdog timer.
>
> Signed-off-by: Alistair Popple <alistair@popple.id.au>

Reviewed-by: Joel Stanley <joel@jms.id.au>


> index 99c2fd9..6489b7a 100644
> --- a/core/ipmi.c
> +++ b/core/ipmi.c
> @@ -153,6 +153,12 @@ static void ipmi_get_message_flags_complete(struct ipmi_msg *msg)
>
>         prlog(PR_DEBUG, "IPMI Get Message Flags: %02x\n", flags);
>
> +       /* Once we see an interrupt we assume the payload has
> +        * booted. We disable the wdt and let the OS setup its own
> +        * wdt. */
> +       if (flags & IPMI_MESSAGE_FLAGS_WATCHDOG_PRE_TIMEOUT)
> +               ipmi_wdt_stop();

I see that this will send a IPMI_SET_WDT to the BMC. Do you also need
to clear the flag?

> +
>         /* Message available in the event buffer? Queue a Read Event command
>          * to retrieve it. The flag is cleared by performing a read */
>         if (flags & IPMI_MESSAGE_FLAGS_EVENT_BUFFER) {
Alistair Popple Feb. 5, 2015, 5:35 a.m. UTC | #2
Hi,

> > index 99c2fd9..6489b7a 100644
> > --- a/core/ipmi.c
> > +++ b/core/ipmi.c
> > @@ -153,6 +153,12 @@ static void ipmi_get_message_flags_complete(struct
> > ipmi_msg *msg)> 
> >         prlog(PR_DEBUG, "IPMI Get Message Flags: %02x\n", flags);
> > 
> > +       /* Once we see an interrupt we assume the payload has
> > +        * booted. We disable the wdt and let the OS setup its own
> > +        * wdt. */
> > +       if (flags & IPMI_MESSAGE_FLAGS_WATCHDOG_PRE_TIMEOUT)
> > +               ipmi_wdt_stop();
> 
> I see that this will send a IPMI_SET_WDT to the BMC. Do you also need
> to clear the flag?
> 

That's a good question. I have no idea. I will run a few tests and see, the 
specification does not seem entirely clear in this regard.

Regards,

Alistair
diff mbox

Patch

diff --git a/core/init.c b/core/init.c
index cffa638..188b507 100644
--- a/core/init.c
+++ b/core/init.c
@@ -44,6 +44,8 @@ 
 #include <hostservices.h>
 #include <timer.h>
 
+#include <ipmi.h>
+
 /*
  * Boot semaphore, incremented by each CPU calling in
  *
@@ -367,7 +369,7 @@  void __noreturn load_and_boot_kernel(bool is_reboot)
 	}
 	fsp_console_select_stdout();
 
-	/* 
+	/*
 	 * OCC takes few secs to boot.  Call this as late as
 	 * as possible to avoid delay.
 	 */
@@ -707,4 +709,3 @@  void __noreturn secondary_cpu_entry(void)
 
 	__secondary_cpu_entry();
 }
-
diff --git a/core/ipmi.c b/core/ipmi.c
index 99c2fd9..6489b7a 100644
--- a/core/ipmi.c
+++ b/core/ipmi.c
@@ -153,6 +153,12 @@  static void ipmi_get_message_flags_complete(struct ipmi_msg *msg)
 
 	prlog(PR_DEBUG, "IPMI Get Message Flags: %02x\n", flags);
 
+	/* Once we see an interrupt we assume the payload has
+	 * booted. We disable the wdt and let the OS setup its own
+	 * wdt. */
+	if (flags & IPMI_MESSAGE_FLAGS_WATCHDOG_PRE_TIMEOUT)
+		ipmi_wdt_stop();
+
 	/* Message available in the event buffer? Queue a Read Event command
 	 * to retrieve it. The flag is cleared by performing a read */
 	if (flags & IPMI_MESSAGE_FLAGS_EVENT_BUFFER) {
diff --git a/hw/ipmi/Makefile.inc b/hw/ipmi/Makefile.inc
index 02670d7..1c358a9 100644
--- a/hw/ipmi/Makefile.inc
+++ b/hw/ipmi/Makefile.inc
@@ -1,5 +1,6 @@ 
 SUBDIRS += hw/ipmi
 
 IPMI_OBJS  = ipmi-rtc.o ipmi-power.o ipmi-opal.o ipmi-fru.o ipmi-sel.o
+IPMI_OBJS += ipmi-watchdog.o
 IPMI = hw/ipmi/built-in.o
 $(IPMI): $(IPMI_OBJS:%=hw/ipmi/%)
diff --git a/hw/ipmi/ipmi-watchdog.c b/hw/ipmi/ipmi-watchdog.c
new file mode 100644
index 0000000..072eccd
--- /dev/null
+++ b/hw/ipmi/ipmi-watchdog.c
@@ -0,0 +1,126 @@ 
+
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <ipmi.h>
+#include <lock.h>
+#include <opal.h>
+#include <device.h>
+#include <timer.h>
+#include <timebase.h>
+#include <pool.h>
+
+#define TIMER_USE_DONT_LOG	0x80
+#define TIMER_USE_DONT_STOP	0x40
+#define TIMER_USE_POST		0x02
+
+/* WDT expiration actions */
+#define WDT_PRETIMEOUT_SMI	0x10
+#define WDT_POWER_CYCLE_ACTION 	0x01
+#define WDT_NO_ACTION		0x00
+
+/* How long to set the overall watchdog timeout for. In units of
+ * 100ms. If the timer is not reset within this time the watchdog
+ * expiration action will occur. */
+#define WDT_TIMEOUT		600
+
+/* How often to reset the timer using schedule_timer(). Too short and
+we risk accidently resetting the system due to opal_run_pollers() not
+being called in time, too short and we waste time resetting the wdt
+more frequently than neccessary. */
+#define WDT_MARGIN		300
+
+static struct timer wdt_timer;
+static bool wdt_stopped = false;
+
+static void ipmi_wdt_complete(struct ipmi_msg *msg)
+{
+	if (msg->cmd == IPMI_CMD(IPMI_RESET_WDT) && !msg->user_data)
+		schedule_timer(&wdt_timer, msecs_to_tb(
+				       (WDT_TIMEOUT - WDT_MARGIN)*100));
+
+	ipmi_free_msg(msg);
+}
+
+static void set_wdt(uint8_t action, uint16_t count, uint8_t pretimeout)
+{
+	struct ipmi_msg *ipmi_msg;
+
+	ipmi_msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_SET_WDT,
+			      ipmi_wdt_complete, NULL, NULL, 6, 0);
+	if (!ipmi_msg) {
+		prerror("Unable to allocate set wdt message\n");
+		return;
+	}
+	ipmi_msg->error = ipmi_wdt_complete;
+	ipmi_msg->data[0] = TIMER_USE_POST |
+		TIMER_USE_DONT_LOG; 			/* Timer Use */
+	ipmi_msg->data[1] = action;			/* Timer Actions */
+	ipmi_msg->data[2] = pretimeout;			/* Pre-timeout Interval */
+	ipmi_msg->data[3] = 0;				/* Timer Use Flags */
+	ipmi_msg->data[4] = count & 0xff;		/* Initial countdown (lsb) */
+	ipmi_msg->data[5] = (count >> 8) & 0xff;	/* Initial countdown (msb) */
+	ipmi_queue_msg(ipmi_msg);
+}
+
+static void reset_wdt(struct timer *t __unused, void *data)
+{
+	struct ipmi_msg *ipmi_msg;
+
+	ipmi_msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_RESET_WDT,
+			      ipmi_wdt_complete, data, NULL, 0, 0);
+	if (!ipmi_msg) {
+		prerror("Unable to allocate reset wdt message\n");
+		return;
+	}
+
+	if (!data)
+		ipmi_queue_msg_sync(ipmi_msg);
+	else
+		ipmi_queue_msg(ipmi_msg);
+}
+
+void ipmi_wdt_stop(void)
+{
+	if (!wdt_stopped) {
+		wdt_stopped = true;
+		set_wdt(WDT_NO_ACTION, 100, 0);
+	}
+}
+
+void ipmi_wdt_final_reset(void)
+{
+	set_wdt(WDT_POWER_CYCLE_ACTION | WDT_PRETIMEOUT_SMI, WDT_TIMEOUT,
+		WDT_MARGIN/10);
+	reset_wdt(NULL, (void *) 1);
+	cancel_timer(&wdt_timer);
+}
+
+void ipmi_wdt_init(void)
+{
+	init_timer(&wdt_timer, reset_wdt, NULL);
+	set_wdt(WDT_POWER_CYCLE_ACTION, WDT_TIMEOUT, 0);
+
+	/* Start the WDT */
+	reset_wdt(NULL, NULL);
+
+	/* For some reason we have to reset it twice to get it to
+	 * actually start the first time. */
+	reset_wdt(NULL, NULL);
+
+	return;
+}
diff --git a/include/ipmi.h b/include/ipmi.h
index 5cee692..372a357 100644
--- a/include/ipmi.h
+++ b/include/ipmi.h
@@ -102,6 +102,8 @@ 
 #define IPMI_CHASSIS_CONTROL		IPMI_CODE(IPMI_NETFN_CHASSIS, 0x02)
 #define IPMI_SET_POWER_STATE		IPMI_CODE(IPMI_NETFN_APP, 0x06)
 #define IPMI_GET_POWER_STATE		IPMI_CODE(IPMI_NETFN_APP, 0x07)
+#define IPMI_RESET_WDT			IPMI_CODE(IPMI_NETFN_APP, 0x22)
+#define IPMI_SET_WDT			IPMI_CODE(IPMI_NETFN_APP, 0x24)
 #define IPMI_SET_ENABLES		IPMI_CODE(IPMI_NETFN_APP, 0x2E)
 #define IPMI_GET_ENABLES		IPMI_CODE(IPMI_NETFN_APP, 0x2F)
 #define IPMI_CLEAR_MESSAGE_FLAGS	IPMI_CODE(IPMI_NETFN_APP, 0x30)
@@ -187,6 +189,7 @@  void ipmi_init_msg(struct ipmi_msg *msg, int interface,
 
 /* called by backend code to indicate a SMS_ATN event */
 void ipmi_sms_attention(void);
+
 /* Add an ipmi message to the queue */
 int ipmi_queue_msg(struct ipmi_msg *msg);
 
@@ -226,4 +229,14 @@  int ipmi_elog_commit(struct errorlog *elog_buf);
 /* Callback to parse an OEM SEL message */
 void ipmi_parse_sel(struct ipmi_msg *msg);
 
+/* Starts the watchdog timer */
+void ipmi_wdt_init(void);
+
+/* Stop the wdt */
+void ipmi_wdt_stop(void);
+
+/* Reset the watchdog timer. Does not return until the timer has been
+ * reset and does not schedule future resets. */
+void ipmi_wdt_final_reset(void);
+
 #endif
diff --git a/platforms/astbmc/common.c b/platforms/astbmc/common.c
index 68df2bb..c7c03aa 100644
--- a/platforms/astbmc/common.c
+++ b/platforms/astbmc/common.c
@@ -97,6 +97,7 @@  void astbmc_init(void)
 
 	/* Register the BT interface with the IPMI layer */
 	bt_init();
+	ipmi_wdt_init();
 	ipmi_rtc_init();
 	ipmi_opal_init();
 	ipmi_fru_init(0x01);
diff --git a/platforms/astbmc/palmetto.c b/platforms/astbmc/palmetto.c
index a0030e8..b9ef4a4 100644
--- a/platforms/astbmc/palmetto.c
+++ b/platforms/astbmc/palmetto.c
@@ -51,4 +51,5 @@  DECLARE_PLATFORM(palmetto) = {
 	.cec_power_down         = astbmc_ipmi_power_down,
 	.cec_reboot             = astbmc_ipmi_reboot,
 	.elog_commit		= ipmi_elog_commit,
+	.exit			= ipmi_wdt_final_reset,
 };