From patchwork Thu Feb 5 05:45:52 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mahesh J Salgaonkar X-Patchwork-Id: 436616 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 6CF7F14016B for ; Thu, 5 Feb 2015 16:47:13 +1100 (AEDT) Received: from ozlabs.org (ozlabs.org [103.22.144.67]) by lists.ozlabs.org (Postfix) with ESMTP id 499511A0B87 for ; Thu, 5 Feb 2015 16:47:13 +1100 (AEDT) X-Original-To: skiboot@lists.ozlabs.org Delivered-To: skiboot@lists.ozlabs.org Received: from e28smtp06.in.ibm.com (e28smtp06.in.ibm.com [122.248.162.6]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id E67E21A0B60 for ; Thu, 5 Feb 2015 16:47:06 +1100 (AEDT) Received: from /spool/local by e28smtp06.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 5 Feb 2015 11:16:59 +0530 Received: from d28dlp03.in.ibm.com (9.184.220.128) by e28smtp06.in.ibm.com (192.168.1.136) with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted; Thu, 5 Feb 2015 11:16:58 +0530 Received: from d28relay01.in.ibm.com (d28relay01.in.ibm.com [9.184.220.58]) by d28dlp03.in.ibm.com (Postfix) with ESMTP id 5A23D1258059 for ; Thu, 5 Feb 2015 11:18:01 +0530 (IST) Received: from d28av02.in.ibm.com (d28av02.in.ibm.com [9.184.220.64]) by d28relay01.in.ibm.com (8.14.9/8.14.9/NCO v10.0) with ESMTP id t155jr9K393724 for ; Thu, 5 Feb 2015 11:15:53 +0530 Received: from d28av02.in.ibm.com (localhost [127.0.0.1]) by d28av02.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id t155jqTv004668 for ; Thu, 5 Feb 2015 11:15:53 +0530 Received: from [9.109.222.221] (mars.in.ibm.com [9.124.35.30] (may be forged)) by d28av02.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVin) with ESMTP id t155jq7j004648; Thu, 5 Feb 2015 11:15:52 +0530 From: Mahesh J Salgaonkar To: skiboot list , Benjamin Herrenschmidt Date: Thu, 05 Feb 2015 11:15:52 +0530 Message-ID: <20150205054531.1527.98003.stgit@mars> User-Agent: StGit/0.17-dirty MIME-Version: 1.0 X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 15020505-0021-0000-0000-000003A615E2 Subject: [Skiboot] [PATCH v2] opal: Do not overwrite same HMI event for multiple HMI errors. X-BeenThere: skiboot@lists.ozlabs.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: Mailing list for skiboot development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org Sender: "Skiboot" From: Mahesh Salgaonkar The current implementation overwrites the same HMI event if there are multiple HMI errors reported through a single HMI interrupt. This patch fixes that issue by sending separate HMI event per error. Signed-off-by: Mahesh Salgaonkar --- Change in V2: - Removed the forward declaration for queue_hmi_event() and moved the function on top instead. core/hmi.c | 73 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/core/hmi.c b/core/hmi.c index 45fc89b..f21ca50 100644 --- a/core/hmi.c +++ b/core/hmi.c @@ -146,6 +146,32 @@ static struct lock hmi_lock = LOCK_UNLOCKED; +static int queue_hmi_event(struct OpalHMIEvent *hmi_evt, int recover) +{ + uint64_t *hmi_data; + + /* Don't queue up event if recover == -1 */ + if (recover == -1) + return 0; + + /* set disposition */ + if (recover == 1) + hmi_evt->disposition = OpalHMI_DISPOSITION_RECOVERED; + else if (recover == 0) + hmi_evt->disposition = OpalHMI_DISPOSITION_NOT_RECOVERED; + + /* + * struct OpalHMIEvent is of (3 * 64 bits) size and well packed + * structure. Hence use uint64_t pointer to pass entire structure + * using 4 params in generic message format. + */ + hmi_data = (uint64_t *)hmi_evt; + + /* queue up for delivery to host. */ + return opal_queue_msg(OPAL_MSG_HMI_EVT, NULL, NULL, + hmi_data[0], hmi_data[1], hmi_data[2]); +} + static int is_capp_recoverable(int chip_id) { uint64_t reg; @@ -214,6 +240,7 @@ static int decode_malfunction(struct OpalHMIEvent *hmi_evt) int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) { int recover = 1; + uint64_t tfmr; printf("HMI: Received HMI interrupt: HMER = 0x%016llx\n", hmer); if (hmi_evt) @@ -223,6 +250,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) if (hmi_evt) { hmi_evt->severity = OpalHMI_SEV_NO_ERROR; hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE; + queue_hmi_event(hmi_evt, recover); } printf("HMI: Processor recovery Done.\n"); } @@ -231,6 +259,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) if (hmi_evt) { hmi_evt->severity = OpalHMI_SEV_NO_ERROR; hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED; + queue_hmi_event(hmi_evt, recover); } printf("HMI: Processor recovery Done (masked).\n"); } @@ -239,6 +268,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) if (hmi_evt) { hmi_evt->severity = OpalHMI_SEV_NO_ERROR; hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN; + queue_hmi_event(hmi_evt, recover); } printf("HMI: Processor recovery occurred again before" "bit2 was cleared\n"); @@ -247,8 +277,10 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) if (hmer & SPR_HMER_MALFUNCTION_ALERT) { hmer &= ~SPR_HMER_MALFUNCTION_ALERT; - if (hmi_evt) + if (hmi_evt) { recover = decode_malfunction(hmi_evt); + queue_hmi_event(hmi_evt, recover); + } } /* Assert if we see Hypervisor resource error, we can not continue. */ @@ -257,6 +289,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) if (hmi_evt) { hmi_evt->severity = OpalHMI_SEV_FATAL; hmi_evt->type = OpalHMI_ERROR_HYP_RESOURCE; + queue_hmi_event(hmi_evt, recover); } recover = 0; } @@ -266,22 +299,26 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) * TFMR and take corrective action wherever required. */ if (hmer & SPR_HMER_TFAC_ERROR) { + tfmr = mfspr(SPR_TFMR); /* save original TFMR */ hmer &= ~SPR_HMER_TFAC_ERROR; + recover = chiptod_recover_tb_errors(); if (hmi_evt) { hmi_evt->severity = OpalHMI_SEV_ERROR_SYNC; hmi_evt->type = OpalHMI_ERROR_TFAC; - hmi_evt->tfmr = mfspr(SPR_TFMR); + hmi_evt->tfmr = tfmr; + queue_hmi_event(hmi_evt, recover); } - recover = chiptod_recover_tb_errors(); } if (hmer & SPR_HMER_TFMR_PARITY_ERROR) { + tfmr = mfspr(SPR_TFMR); /* save original TFMR */ hmer &= ~SPR_HMER_TFMR_PARITY_ERROR; + recover = 0; if (hmi_evt) { hmi_evt->severity = OpalHMI_SEV_FATAL; hmi_evt->type = OpalHMI_ERROR_TFMR_PARITY; - hmi_evt->tfmr = mfspr(SPR_TFMR); + hmi_evt->tfmr = tfmr; + queue_hmi_event(hmi_evt, recover); } - recover = 0; } /* @@ -293,44 +330,20 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt) return recover; } -static int queue_hmi_event(struct OpalHMIEvent *hmi_evt) -{ - uint64_t *hmi_data; - - /* - * struct OpalHMIEvent is of (3 * 64 bits) size and well packed - * structure. Hence use uint64_t pointer to pass entire structure - * using 4 params in generic message format. - */ - hmi_data = (uint64_t *)hmi_evt; - - /* queue up for delivery to host. */ - return opal_queue_msg(OPAL_MSG_HMI_EVT, NULL, NULL, - hmi_data[0], hmi_data[1], hmi_data[2]); -} - static int64_t opal_handle_hmi(void) { uint64_t hmer; int rc = OPAL_SUCCESS; struct OpalHMIEvent hmi_evt; - int recover; memset(&hmi_evt, 0, sizeof(struct OpalHMIEvent)); hmi_evt.version = OpalHMIEvt_V1; lock(&hmi_lock); hmer = mfspr(SPR_HMER); /* Get HMER register value */ - recover = handle_hmi_exception(hmer, &hmi_evt); + handle_hmi_exception(hmer, &hmi_evt); unlock(&hmi_lock); - if (recover == 1) - hmi_evt.disposition = OpalHMI_DISPOSITION_RECOVERED; - else if (recover == 0) - hmi_evt.disposition = OpalHMI_DISPOSITION_NOT_RECOVERED; - - if (recover != -1) - queue_hmi_event(&hmi_evt); return rc; } opal_call(OPAL_HANDLE_HMI, opal_handle_hmi, 0);