diff mbox series

[RFC,1/2] hmi: Don't re-read HMER multiple times

Message ID 20180116044540.10707-1-benh@kernel.crashing.org
State Superseded
Headers show
Series [RFC,1/2] hmi: Don't re-read HMER multiple times | expand

Commit Message

Benjamin Herrenschmidt Jan. 16, 2018, 4:45 a.m. UTC
We want to make sure all reporting and actions are based
upon the same snapshot of HMER in case bits get added
by HW while we are in OPAL.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 core/hmi.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

Comments

Mahesh J Salgaonkar Jan. 17, 2018, 5:27 a.m. UTC | #1
On 01/16/2018 10:15 AM, Benjamin Herrenschmidt wrote:
> We want to make sure all reporting and actions are based
> upon the same snapshot of HMER in case bits get added
> by HW while we are in OPAL.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Acked-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

> ---
>  core/hmi.c | 35 ++++++++++++++---------------------
>  1 file changed, 14 insertions(+), 21 deletions(-)
> 
> diff --git a/core/hmi.c b/core/hmi.c
> index eb4faa38..5642bd0b 100644
> --- a/core/hmi.c
> +++ b/core/hmi.c
> @@ -719,16 +719,13 @@ static int get_split_core_mode(void)
>   *	- SPR_TFMR_TB_RESIDUE_ERR
>   *	- SPR_TFMR_HDEC_PARITY_ERROR
>   */
> -static void pre_recovery_cleanup_p8(void)
> +static void pre_recovery_cleanup_p8(uint64_t hmer)
>  {
> -	uint64_t hmer;
>  	uint64_t tfmr;
>  	uint32_t sibling_thread_mask;
>  	int split_core_mode, subcore_id, thread_id, threads_per_core;
>  	int i;
> 
> -	hmer = mfspr(SPR_HMER);
> -
>  	/* exit if it is not Time facility error. */
>  	if (!(hmer & SPR_HMER_TFAC_ERROR))
>  		return;
> @@ -826,15 +823,12 @@ static void pre_recovery_cleanup_p8(void)
>   *	- SPR_TFMR_TB_RESIDUE_ERR
>   *	- SPR_TFMR_HDEC_PARITY_ERROR
>   */
> -static void pre_recovery_cleanup_p9(void)
> +static void pre_recovery_cleanup_p9(uint64_t hmer)
>  {
> -	uint64_t hmer;
>  	uint64_t tfmr;
>  	int threads_per_core = cpu_thread_count;
>  	int i;
> 
> -	hmer = mfspr(SPR_HMER);
> -
>  	/* exit if it is not Time facility error. */
>  	if (!(hmer & SPR_HMER_TFAC_ERROR))
>  		return;
> @@ -912,12 +906,12 @@ static void pre_recovery_cleanup_p9(void)
>  	wait_for_cleanup_complete();
>  }
> 
> -static void pre_recovery_cleanup(void)
> +static void pre_recovery_cleanup(uint64_t hmer)
>  {
>  	if (proc_gen == proc_gen_p9)
> -		return pre_recovery_cleanup_p9();
> +		return pre_recovery_cleanup_p9(hmer);
>  	else
> -		return pre_recovery_cleanup_p8();
> +		return pre_recovery_cleanup_p8(hmer);
>  }
> 
>  static void hmi_exit(void)
> @@ -926,9 +920,8 @@ static void hmi_exit(void)
>  	*(this_cpu()->core_hmi_state_ptr) &= ~(this_cpu()->thread_mask);
>  }
> 
> -static void hmi_print_debug(const uint8_t *msg)
> +static void hmi_print_debug(const uint8_t *msg, uint64_t hmer)
>  {
> -	uint64_t hmer = mfspr(SPR_HMER);
>  	const char *loc;
>  	uint32_t core_id, thread_index;
> 
> @@ -959,7 +952,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	 * In case of split core, some of the Timer facility errors need
>  	 * cleanup to be done before we proceed with the error recovery.
>  	 */
> -	pre_recovery_cleanup();
> +	pre_recovery_cleanup(hmer);
> 
>  	lock(&hmi_lock);
>  	/*
> @@ -978,7 +971,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
>  			queue_hmi_event(hmi_evt, recover);
>  		}
> -		hmi_print_debug("Processor recovery Done.");
> +		hmi_print_debug("Processor recovery Done.", hmer);
>  	}
>  	if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) {
>  		hmer &= ~SPR_HMER_PROC_RECV_ERROR_MASKED;
> @@ -987,7 +980,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
>  			queue_hmi_event(hmi_evt, recover);
>  		}
> -		hmi_print_debug("Processor recovery Done (masked).");
> +		hmi_print_debug("Processor recovery Done (masked).", hmer);
>  	}
>  	if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
>  		hmer &= ~SPR_HMER_PROC_RECV_AGAIN;
> @@ -997,13 +990,13 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			queue_hmi_event(hmi_evt, recover);
>  		}
>  		hmi_print_debug("Processor recovery occurred again before"
> -				"bit2 was cleared\n");
> +				"bit2 was cleared\n", hmer);
>  	}
>  	/* Assert if we see malfunction alert, we can not continue. */
>  	if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
>  		hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
> 
> -		hmi_print_debug("Malfunction Alert");
> +		hmi_print_debug("Malfunction Alert", hmer);
>  		if (hmi_evt)
>  			decode_malfunction(hmi_evt);
>  	}
> @@ -1012,7 +1005,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
>  		hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
> 
> -		hmi_print_debug("Hypervisor resource error");
> +		hmi_print_debug("Hypervisor resource error", hmer);
>  		recover = 0;
>  		if (hmi_evt) {
>  			hmi_evt->severity = OpalHMI_SEV_FATAL;
> @@ -1028,7 +1021,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	if (hmer & SPR_HMER_TFAC_ERROR) {
>  		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
> 
> -		hmi_print_debug("Timer Facility Error");
> +		hmi_print_debug("Timer Facility Error", hmer);
> 
>  		hmer &= ~SPR_HMER_TFAC_ERROR;
>  		recover = chiptod_recover_tb_errors();
> @@ -1043,7 +1036,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
>  		hmer &= ~SPR_HMER_TFMR_PARITY_ERROR;
> 
> -		hmi_print_debug("TFMR parity Error");
> +		hmi_print_debug("TFMR parity Error", hmer);
>  		recover = chiptod_recover_tb_errors();
>  		if (hmi_evt) {
>  			hmi_evt->severity = OpalHMI_SEV_FATAL;
>
diff mbox series

Patch

diff --git a/core/hmi.c b/core/hmi.c
index eb4faa38..5642bd0b 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -719,16 +719,13 @@  static int get_split_core_mode(void)
  *	- SPR_TFMR_TB_RESIDUE_ERR
  *	- SPR_TFMR_HDEC_PARITY_ERROR
  */
-static void pre_recovery_cleanup_p8(void)
+static void pre_recovery_cleanup_p8(uint64_t hmer)
 {
-	uint64_t hmer;
 	uint64_t tfmr;
 	uint32_t sibling_thread_mask;
 	int split_core_mode, subcore_id, thread_id, threads_per_core;
 	int i;
 
-	hmer = mfspr(SPR_HMER);
-
 	/* exit if it is not Time facility error. */
 	if (!(hmer & SPR_HMER_TFAC_ERROR))
 		return;
@@ -826,15 +823,12 @@  static void pre_recovery_cleanup_p8(void)
  *	- SPR_TFMR_TB_RESIDUE_ERR
  *	- SPR_TFMR_HDEC_PARITY_ERROR
  */
-static void pre_recovery_cleanup_p9(void)
+static void pre_recovery_cleanup_p9(uint64_t hmer)
 {
-	uint64_t hmer;
 	uint64_t tfmr;
 	int threads_per_core = cpu_thread_count;
 	int i;
 
-	hmer = mfspr(SPR_HMER);
-
 	/* exit if it is not Time facility error. */
 	if (!(hmer & SPR_HMER_TFAC_ERROR))
 		return;
@@ -912,12 +906,12 @@  static void pre_recovery_cleanup_p9(void)
 	wait_for_cleanup_complete();
 }
 
-static void pre_recovery_cleanup(void)
+static void pre_recovery_cleanup(uint64_t hmer)
 {
 	if (proc_gen == proc_gen_p9)
-		return pre_recovery_cleanup_p9();
+		return pre_recovery_cleanup_p9(hmer);
 	else
-		return pre_recovery_cleanup_p8();
+		return pre_recovery_cleanup_p8(hmer);
 }
 
 static void hmi_exit(void)
@@ -926,9 +920,8 @@  static void hmi_exit(void)
 	*(this_cpu()->core_hmi_state_ptr) &= ~(this_cpu()->thread_mask);
 }
 
-static void hmi_print_debug(const uint8_t *msg)
+static void hmi_print_debug(const uint8_t *msg, uint64_t hmer)
 {
-	uint64_t hmer = mfspr(SPR_HMER);
 	const char *loc;
 	uint32_t core_id, thread_index;
 
@@ -959,7 +952,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	 * In case of split core, some of the Timer facility errors need
 	 * cleanup to be done before we proceed with the error recovery.
 	 */
-	pre_recovery_cleanup();
+	pre_recovery_cleanup(hmer);
 
 	lock(&hmi_lock);
 	/*
@@ -978,7 +971,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
 			queue_hmi_event(hmi_evt, recover);
 		}
-		hmi_print_debug("Processor recovery Done.");
+		hmi_print_debug("Processor recovery Done.", hmer);
 	}
 	if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) {
 		hmer &= ~SPR_HMER_PROC_RECV_ERROR_MASKED;
@@ -987,7 +980,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
 			queue_hmi_event(hmi_evt, recover);
 		}
-		hmi_print_debug("Processor recovery Done (masked).");
+		hmi_print_debug("Processor recovery Done (masked).", hmer);
 	}
 	if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
 		hmer &= ~SPR_HMER_PROC_RECV_AGAIN;
@@ -997,13 +990,13 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			queue_hmi_event(hmi_evt, recover);
 		}
 		hmi_print_debug("Processor recovery occurred again before"
-				"bit2 was cleared\n");
+				"bit2 was cleared\n", hmer);
 	}
 	/* Assert if we see malfunction alert, we can not continue. */
 	if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
 		hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
 
-		hmi_print_debug("Malfunction Alert");
+		hmi_print_debug("Malfunction Alert", hmer);
 		if (hmi_evt)
 			decode_malfunction(hmi_evt);
 	}
@@ -1012,7 +1005,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
 		hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
 
-		hmi_print_debug("Hypervisor resource error");
+		hmi_print_debug("Hypervisor resource error", hmer);
 		recover = 0;
 		if (hmi_evt) {
 			hmi_evt->severity = OpalHMI_SEV_FATAL;
@@ -1028,7 +1021,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	if (hmer & SPR_HMER_TFAC_ERROR) {
 		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
 
-		hmi_print_debug("Timer Facility Error");
+		hmi_print_debug("Timer Facility Error", hmer);
 
 		hmer &= ~SPR_HMER_TFAC_ERROR;
 		recover = chiptod_recover_tb_errors();
@@ -1043,7 +1036,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
 		hmer &= ~SPR_HMER_TFMR_PARITY_ERROR;
 
-		hmi_print_debug("TFMR parity Error");
+		hmi_print_debug("TFMR parity Error", hmer);
 		recover = chiptod_recover_tb_errors();
 		if (hmi_evt) {
 			hmi_evt->severity = OpalHMI_SEV_FATAL;