[RFC,1/2] hmi: Don't re-read HMER multiple times

Message ID 20180116044540.10707-1-benh@kernel.crashing.org
State New
Headers show
Series
  • [RFC,1/2] hmi: Don't re-read HMER multiple times
Related show

Commit Message

Benjamin Herrenschmidt Jan. 16, 2018, 4:45 a.m.
We want to make sure all reporting and actions are based
upon the same snapshot of HMER in case bits get added
by HW while we are in OPAL.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 core/hmi.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

Comments

Mahesh Jagannath Salgaonkar Jan. 17, 2018, 5:27 a.m. | #1
On 01/16/2018 10:15 AM, Benjamin Herrenschmidt wrote:
> We want to make sure all reporting and actions are based
> upon the same snapshot of HMER in case bits get added
> by HW while we are in OPAL.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Acked-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

> ---
>  core/hmi.c | 35 ++++++++++++++---------------------
>  1 file changed, 14 insertions(+), 21 deletions(-)
> 
> diff --git a/core/hmi.c b/core/hmi.c
> index eb4faa38..5642bd0b 100644
> --- a/core/hmi.c
> +++ b/core/hmi.c
> @@ -719,16 +719,13 @@ static int get_split_core_mode(void)
>   *	- SPR_TFMR_TB_RESIDUE_ERR
>   *	- SPR_TFMR_HDEC_PARITY_ERROR
>   */
> -static void pre_recovery_cleanup_p8(void)
> +static void pre_recovery_cleanup_p8(uint64_t hmer)
>  {
> -	uint64_t hmer;
>  	uint64_t tfmr;
>  	uint32_t sibling_thread_mask;
>  	int split_core_mode, subcore_id, thread_id, threads_per_core;
>  	int i;
> 
> -	hmer = mfspr(SPR_HMER);
> -
>  	/* exit if it is not Time facility error. */
>  	if (!(hmer & SPR_HMER_TFAC_ERROR))
>  		return;
> @@ -826,15 +823,12 @@ static void pre_recovery_cleanup_p8(void)
>   *	- SPR_TFMR_TB_RESIDUE_ERR
>   *	- SPR_TFMR_HDEC_PARITY_ERROR
>   */
> -static void pre_recovery_cleanup_p9(void)
> +static void pre_recovery_cleanup_p9(uint64_t hmer)
>  {
> -	uint64_t hmer;
>  	uint64_t tfmr;
>  	int threads_per_core = cpu_thread_count;
>  	int i;
> 
> -	hmer = mfspr(SPR_HMER);
> -
>  	/* exit if it is not Time facility error. */
>  	if (!(hmer & SPR_HMER_TFAC_ERROR))
>  		return;
> @@ -912,12 +906,12 @@ static void pre_recovery_cleanup_p9(void)
>  	wait_for_cleanup_complete();
>  }
> 
> -static void pre_recovery_cleanup(void)
> +static void pre_recovery_cleanup(uint64_t hmer)
>  {
>  	if (proc_gen == proc_gen_p9)
> -		return pre_recovery_cleanup_p9();
> +		return pre_recovery_cleanup_p9(hmer);
>  	else
> -		return pre_recovery_cleanup_p8();
> +		return pre_recovery_cleanup_p8(hmer);
>  }
> 
>  static void hmi_exit(void)
> @@ -926,9 +920,8 @@ static void hmi_exit(void)
>  	*(this_cpu()->core_hmi_state_ptr) &= ~(this_cpu()->thread_mask);
>  }
> 
> -static void hmi_print_debug(const uint8_t *msg)
> +static void hmi_print_debug(const uint8_t *msg, uint64_t hmer)
>  {
> -	uint64_t hmer = mfspr(SPR_HMER);
>  	const char *loc;
>  	uint32_t core_id, thread_index;
> 
> @@ -959,7 +952,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	 * In case of split core, some of the Timer facility errors need
>  	 * cleanup to be done before we proceed with the error recovery.
>  	 */
> -	pre_recovery_cleanup();
> +	pre_recovery_cleanup(hmer);
> 
>  	lock(&hmi_lock);
>  	/*
> @@ -978,7 +971,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
>  			queue_hmi_event(hmi_evt, recover);
>  		}
> -		hmi_print_debug("Processor recovery Done.");
> +		hmi_print_debug("Processor recovery Done.", hmer);
>  	}
>  	if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) {
>  		hmer &= ~SPR_HMER_PROC_RECV_ERROR_MASKED;
> @@ -987,7 +980,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
>  			queue_hmi_event(hmi_evt, recover);
>  		}
> -		hmi_print_debug("Processor recovery Done (masked).");
> +		hmi_print_debug("Processor recovery Done (masked).", hmer);
>  	}
>  	if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
>  		hmer &= ~SPR_HMER_PROC_RECV_AGAIN;
> @@ -997,13 +990,13 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			queue_hmi_event(hmi_evt, recover);
>  		}
>  		hmi_print_debug("Processor recovery occurred again before"
> -				"bit2 was cleared\n");
> +				"bit2 was cleared\n", hmer);
>  	}
>  	/* Assert if we see malfunction alert, we can not continue. */
>  	if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
>  		hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
> 
> -		hmi_print_debug("Malfunction Alert");
> +		hmi_print_debug("Malfunction Alert", hmer);
>  		if (hmi_evt)
>  			decode_malfunction(hmi_evt);
>  	}
> @@ -1012,7 +1005,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
>  		hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
> 
> -		hmi_print_debug("Hypervisor resource error");
> +		hmi_print_debug("Hypervisor resource error", hmer);
>  		recover = 0;
>  		if (hmi_evt) {
>  			hmi_evt->severity = OpalHMI_SEV_FATAL;
> @@ -1028,7 +1021,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	if (hmer & SPR_HMER_TFAC_ERROR) {
>  		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
> 
> -		hmi_print_debug("Timer Facility Error");
> +		hmi_print_debug("Timer Facility Error", hmer);
> 
>  		hmer &= ~SPR_HMER_TFAC_ERROR;
>  		recover = chiptod_recover_tb_errors();
> @@ -1043,7 +1036,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
>  		hmer &= ~SPR_HMER_TFMR_PARITY_ERROR;
> 
> -		hmi_print_debug("TFMR parity Error");
> +		hmi_print_debug("TFMR parity Error", hmer);
>  		recover = chiptod_recover_tb_errors();
>  		if (hmi_evt) {
>  			hmi_evt->severity = OpalHMI_SEV_FATAL;
>

Patch

diff --git a/core/hmi.c b/core/hmi.c
index eb4faa38..5642bd0b 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -719,16 +719,13 @@  static int get_split_core_mode(void)
  *	- SPR_TFMR_TB_RESIDUE_ERR
  *	- SPR_TFMR_HDEC_PARITY_ERROR
  */
-static void pre_recovery_cleanup_p8(void)
+static void pre_recovery_cleanup_p8(uint64_t hmer)
 {
-	uint64_t hmer;
 	uint64_t tfmr;
 	uint32_t sibling_thread_mask;
 	int split_core_mode, subcore_id, thread_id, threads_per_core;
 	int i;
 
-	hmer = mfspr(SPR_HMER);
-
 	/* exit if it is not Time facility error. */
 	if (!(hmer & SPR_HMER_TFAC_ERROR))
 		return;
@@ -826,15 +823,12 @@  static void pre_recovery_cleanup_p8(void)
  *	- SPR_TFMR_TB_RESIDUE_ERR
  *	- SPR_TFMR_HDEC_PARITY_ERROR
  */
-static void pre_recovery_cleanup_p9(void)
+static void pre_recovery_cleanup_p9(uint64_t hmer)
 {
-	uint64_t hmer;
 	uint64_t tfmr;
 	int threads_per_core = cpu_thread_count;
 	int i;
 
-	hmer = mfspr(SPR_HMER);
-
 	/* exit if it is not Time facility error. */
 	if (!(hmer & SPR_HMER_TFAC_ERROR))
 		return;
@@ -912,12 +906,12 @@  static void pre_recovery_cleanup_p9(void)
 	wait_for_cleanup_complete();
 }
 
-static void pre_recovery_cleanup(void)
+static void pre_recovery_cleanup(uint64_t hmer)
 {
 	if (proc_gen == proc_gen_p9)
-		return pre_recovery_cleanup_p9();
+		return pre_recovery_cleanup_p9(hmer);
 	else
-		return pre_recovery_cleanup_p8();
+		return pre_recovery_cleanup_p8(hmer);
 }
 
 static void hmi_exit(void)
@@ -926,9 +920,8 @@  static void hmi_exit(void)
 	*(this_cpu()->core_hmi_state_ptr) &= ~(this_cpu()->thread_mask);
 }
 
-static void hmi_print_debug(const uint8_t *msg)
+static void hmi_print_debug(const uint8_t *msg, uint64_t hmer)
 {
-	uint64_t hmer = mfspr(SPR_HMER);
 	const char *loc;
 	uint32_t core_id, thread_index;
 
@@ -959,7 +952,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	 * In case of split core, some of the Timer facility errors need
 	 * cleanup to be done before we proceed with the error recovery.
 	 */
-	pre_recovery_cleanup();
+	pre_recovery_cleanup(hmer);
 
 	lock(&hmi_lock);
 	/*
@@ -978,7 +971,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
 			queue_hmi_event(hmi_evt, recover);
 		}
-		hmi_print_debug("Processor recovery Done.");
+		hmi_print_debug("Processor recovery Done.", hmer);
 	}
 	if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) {
 		hmer &= ~SPR_HMER_PROC_RECV_ERROR_MASKED;
@@ -987,7 +980,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
 			queue_hmi_event(hmi_evt, recover);
 		}
-		hmi_print_debug("Processor recovery Done (masked).");
+		hmi_print_debug("Processor recovery Done (masked).", hmer);
 	}
 	if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
 		hmer &= ~SPR_HMER_PROC_RECV_AGAIN;
@@ -997,13 +990,13 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			queue_hmi_event(hmi_evt, recover);
 		}
 		hmi_print_debug("Processor recovery occurred again before"
-				"bit2 was cleared\n");
+				"bit2 was cleared\n", hmer);
 	}
 	/* Assert if we see malfunction alert, we can not continue. */
 	if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
 		hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
 
-		hmi_print_debug("Malfunction Alert");
+		hmi_print_debug("Malfunction Alert", hmer);
 		if (hmi_evt)
 			decode_malfunction(hmi_evt);
 	}
@@ -1012,7 +1005,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
 		hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
 
-		hmi_print_debug("Hypervisor resource error");
+		hmi_print_debug("Hypervisor resource error", hmer);
 		recover = 0;
 		if (hmi_evt) {
 			hmi_evt->severity = OpalHMI_SEV_FATAL;
@@ -1028,7 +1021,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	if (hmer & SPR_HMER_TFAC_ERROR) {
 		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
 
-		hmi_print_debug("Timer Facility Error");
+		hmi_print_debug("Timer Facility Error", hmer);
 
 		hmer &= ~SPR_HMER_TFAC_ERROR;
 		recover = chiptod_recover_tb_errors();
@@ -1043,7 +1036,7 @@  int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
 		hmer &= ~SPR_HMER_TFMR_PARITY_ERROR;
 
-		hmi_print_debug("TFMR parity Error");
+		hmi_print_debug("TFMR parity Error", hmer);
 		recover = chiptod_recover_tb_errors();
 		if (hmi_evt) {
 			hmi_evt->severity = OpalHMI_SEV_FATAL;