diff mbox

[V2] hw/fsp: Do not queue SP and SPCN class messages during reset/reload

Message ID 148954449401.22399.3558228268747757336.stgit@thinktux.in.ibm.com
State Accepted
Headers show

Commit Message

Ananth N Mavinakayanahalli March 15, 2017, 2:21 a.m. UTC
During FSP R/R, the FSP is inaccessible and will lose state. Messages to the
FSP are generally queued for sending later.

It does seem like the FSP fails to process any subseuqent messages of certain
classes (SP info -- ipmi) if it receives queued mbox messages it isn't expecting.

In certain other cases (sensors), the FSP driver returns a default code (async
completion) even though there is no known bound from the time of this error
return to the actual data being available. The kernel driver keeps waiting
leading to soft-lockup on the host side.

Mitigate both these (known) cases by returning OPAL_BUSY so the host driver
knows to retry later.

With this change, the sensors command works fine when the FSP comes back.

This version also resolves the remaining IPMI issues


Signed-off-by: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Tested-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com>
---
 hw/fsp/fsp-ipmi.c   |    9 +++++++++
 hw/fsp/fsp-sensor.c |    6 ++++++
 hw/fsp/fsp.c        |   16 ++++++++++++++++
 include/fsp.h       |    1 +
 4 files changed, 32 insertions(+)

Comments

Stewart Smith March 16, 2017, 7:04 a.m. UTC | #1
Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> writes:
> During FSP R/R, the FSP is inaccessible and will lose state. Messages to the
> FSP are generally queued for sending later.
>
> It does seem like the FSP fails to process any subseuqent messages of certain
> classes (SP info -- ipmi) if it receives queued mbox messages it isn't expecting.
>
> In certain other cases (sensors), the FSP driver returns a default code (async
> completion) even though there is no known bound from the time of this error
> return to the actual data being available. The kernel driver keeps waiting
> leading to soft-lockup on the host side.
>
> Mitigate both these (known) cases by returning OPAL_BUSY so the host driver
> knows to retry later.
>
> With this change, the sensors command works fine when the FSP comes back.
>
> This version also resolves the remaining IPMI issues
>
>
> Signed-off-by: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
> Tested-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com>

Thanks, merged to master as of 4940b8148640c06e139aec8c6d0370af7dd3b184

cherry picked into 5.4.x as of a6d5bc107e76123440d60a05698c151084604180
cherry picked into 5.3.x as of d18986fbc4c2d8ec5495b063cafbd1686403293a
diff mbox

Patch

diff --git a/hw/fsp/fsp-ipmi.c b/hw/fsp/fsp-ipmi.c
index f803f17..d262cee 100644
--- a/hw/fsp/fsp-ipmi.c
+++ b/hw/fsp/fsp-ipmi.c
@@ -126,6 +126,9 @@  static int fsp_ipmi_send_request(void)
 	struct fsp_msg *msg;
 	int rc;
 
+	if (fsp_in_rr())
+		return OPAL_BUSY;
+
 	lock(&fsp_ipmi.lock);
 	/* An outstanding request is still pending */
 	if (fsp_ipmi.cur_msg) {
@@ -208,6 +211,9 @@  static int fsp_ipmi_queue_msg(struct ipmi_msg *ipmi_msg)
 	struct fsp_ipmi_msg *fsp_ipmi_msg = container_of(ipmi_msg,
 			struct fsp_ipmi_msg, ipmi_msg);
 
+	if (fsp_in_rr())
+		return OPAL_BUSY;
+
 	lock(&fsp_ipmi.lock);
 	list_add_tail(&fsp_ipmi.msg_queue, &fsp_ipmi_msg->link);
 	unlock(&fsp_ipmi.lock);
@@ -220,6 +226,9 @@  static int fsp_ipmi_queue_msg_head(struct ipmi_msg *ipmi_msg)
 	struct fsp_ipmi_msg *fsp_ipmi_msg = container_of(ipmi_msg,
 			struct fsp_ipmi_msg, ipmi_msg);
 
+	if (fsp_in_rr())
+		return OPAL_BUSY;
+
 	lock(&fsp_ipmi.lock);
 	list_add(&fsp_ipmi.msg_queue, &fsp_ipmi_msg->link);
 	unlock(&fsp_ipmi.lock);
diff --git a/hw/fsp/fsp-sensor.c b/hw/fsp/fsp-sensor.c
index 0fa3115..6363530 100644
--- a/hw/fsp/fsp-sensor.c
+++ b/hw/fsp/fsp-sensor.c
@@ -376,6 +376,9 @@  static int64_t fsp_sensor_send_read_request(struct opal_sensor_data *attr)
 	uint32_t align;
 	uint32_t cmd_header;
 
+	if (fsp_in_rr())
+		return OPAL_BUSY;
+
 	prlog(PR_INSANE, "Get the data for modifier [%x]\n",
 	      spcn_mod_data[attr->mod_index].mod);
 
@@ -516,6 +519,9 @@  int64_t fsp_opal_read_sensor(uint32_t sensor_hndl, int token,
 
 	prlog(PR_INSANE, "fsp_opal_read_sensor [%08x]\n", sensor_hndl);
 
+	if (fsp_in_rr())
+		return OPAL_BUSY;
+
 	if (sensor_state == SENSOR_PERMANENT_ERROR) {
 		rc = OPAL_HARDWARE;
 		goto out;
diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
index c25e42c..a0c5a78 100644
--- a/hw/fsp/fsp.c
+++ b/hw/fsp/fsp.c
@@ -409,6 +409,22 @@  static bool fsp_in_reset(struct fsp *fsp)
 	}
 }
 
+bool fsp_in_rr(void)
+{
+	struct fsp *fsp = fsp_get_active();
+	struct fsp_iopath *iop;
+
+	if (fsp->active_iopath < 0)
+		return true;
+
+	iop = &fsp->iopath[fsp->active_iopath];
+
+	if (fsp_in_reset(fsp) || fsp_in_hir(fsp) || !(psi_check_link_active(iop->psi)))
+		return true;
+
+	return false;
+}
+
 static bool fsp_hir_state_timeout(void)
 {
 	u64 now = mftb();
diff --git a/include/fsp.h b/include/fsp.h
index 6142ca3..f75b6ad 100644
--- a/include/fsp.h
+++ b/include/fsp.h
@@ -810,6 +810,7 @@  extern void fsp_ipmi_init(void);
 extern void fsp_reinit_fsp(void);
 extern void fsp_trigger_reset(void);
 extern void fsp_reset_links(void);
+extern bool fsp_in_rr(void);
 
 /* FSP memory errors */
 extern void fsp_memory_err_init(void);