diff mbox series

[v2,1/2] opal/xscom: Move the delay inside xscom_reset() function.

Message ID 151273017576.22104.17324894016679784398.stgit@jupiter.in.ibm.com
State Accepted
Headers show
Series [v2,1/2] opal/xscom: Move the delay inside xscom_reset() function. | expand

Commit Message

Mahesh J Salgaonkar Dec. 8, 2017, 10:49 a.m. UTC
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

So caller of xscom_reset() does not have to bother about adding a delay
separately. Instead caller can control whether to add a delay or not using
second argument to xscom_reset().

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
Changes in V2:
- Add a bool argument to xscom_reset() to give the control to caller whether to
  delay is required or not after reset.
---
 hw/xscom.c |   39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

Comments

Nicholas Piggin Dec. 8, 2017, 1:24 p.m. UTC | #1
On Fri, 08 Dec 2017 16:19:35 +0530
Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> 
> So caller of xscom_reset() does not have to bother about adding a delay
> separately. Instead caller can control whether to add a delay or not using
> second argument to xscom_reset().
> 
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

> ---
> Changes in V2:
> - Add a bool argument to xscom_reset() to give the control to caller whether to
>   delay is required or not after reset.
> ---
>  hw/xscom.c |   39 +++++++++++++++++++++------------------
>  1 file changed, 21 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/xscom.c b/hw/xscom.c
> index 5b3bd88..d98f5ef 100644
> --- a/hw/xscom.c
> +++ b/hw/xscom.c
> @@ -92,10 +92,11 @@ static uint64_t xscom_wait_done(void)
>  	return mfspr(SPR_HMER);
>  }
>  
> -static void xscom_reset(uint32_t gcid)
> +static void xscom_reset(uint32_t gcid, bool need_delay)
>  {
>  	u64 hmer;
>  	uint32_t recv_status_reg, log_reg, err_reg;
> +	struct timespec ts;
>  
>  	/* Clear errors in HMER */
>  	mtspr(SPR_HMER, HMER_CLR_MASK);
> @@ -126,6 +127,21 @@ static void xscom_reset(uint32_t gcid)
>  	hmer = xscom_wait_done();
>  	if (hmer & SPR_HMER_XSCOM_FAIL)
>  		goto fail;
> +
> +	if (need_delay) {
> +		/*
> +		 * Its observed that sometimes immediate retry of
> +		 * XSCOM operation returns wrong data. Adding a
> +		 * delay for XSCOM reset to be effective. Delay of
> +		 * 10 ms is found to be working fine experimentally.
> +		 * FIXME: Replace 10ms delay by exact delay needed
> +		 * or other alternate method to confirm XSCOM reset
> +		 * completion, after checking from HW folks.
> +		 */
> +		ts.tv_sec = 0;
> +		ts.tv_nsec = 10 * 1000;
> +		nanosleep_nopoll(&ts, NULL);
> +	}
>  	return;
>   fail:
>  	/* Fatal error resetting XSCOM */
> @@ -140,7 +156,6 @@ static void xscom_reset(uint32_t gcid)
>  static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
>  			      bool is_write, int64_t retries)
>  {
> -	struct timespec ts;
>  	unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer);
>  	int64_t rc = OPAL_HARDWARE;
>  
> @@ -158,20 +173,8 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add
>  			prlog(PR_NOTICE, "XSCOM: Busy even after %d retries, "
>  				"resetting XSCOM now. Total retries  = %lld\n",
>  				XSCOM_BUSY_RESET_THRESHOLD, retries);
> -			xscom_reset(gcid);
> -
> -			/*
> -			 * Its observed that sometimes immediate retry of
> -			 * XSCOM operation returns wrong data. Adding a
> -			 * delay for XSCOM reset to be effective. Delay of
> -			 * 10 ms is found to be working fine experimentally.
> -			 * FIXME: Replace 10ms delay by exact delay needed
> -			 * or other alternate method to confirm XSCOM reset
> -			 * completion, after checking from HW folks.
> -			 */
> -			ts.tv_sec = 0;
> -			ts.tv_nsec = 10 * 1000;
> -			nanosleep_nopoll(&ts, NULL);
> +			xscom_reset(gcid, true);
> +
>  		}
>  
>  		/* Log error if we have retried enough and its still busy */
> @@ -183,7 +186,7 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add
>  		return OPAL_XSCOM_BUSY;
>  
>  	case 2: /* CPU is asleep, reset XSCOM engine and return */
> -		xscom_reset(gcid);
> +		xscom_reset(gcid, false);
>  		return OPAL_XSCOM_CHIPLET_OFF;
>  	case 3: /* Partial good */
>  		rc = OPAL_XSCOM_PARTIAL_GOOD;
> @@ -208,7 +211,7 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add
>  		is_write ? "write" : "read", gcid, pcb_addr, stat);
>  
>  	/* We need to reset the XSCOM or we'll hang on the next access */
> -	xscom_reset(gcid);
> +	xscom_reset(gcid, false);
>  
>  	/* Non recovered ... just fail */
>  	return rc;
> 
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
Stewart Smith Dec. 13, 2017, 3:09 a.m. UTC | #2
Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> writes:
> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>
> So caller of xscom_reset() does not have to bother about adding a delay
> separately. Instead caller can control whether to add a delay or not using
> second argument to xscom_reset().
>
> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> ---
> Changes in V2:
> - Add a bool argument to xscom_reset() to give the control to caller whether to
>   delay is required or not after reset.
> ---
>  hw/xscom.c |   39 +++++++++++++++++++++------------------
>  1 file changed, 21 insertions(+), 18 deletions(-)

Thanks, series merged to master as of
10f0a09239ddfd4faf47d792f04d3124fb347f88

Let me know if this should head to 5.9.x or not. Currently I haven't
cherry picked it back.
Vaidyanathan Srinivasan Dec. 13, 2017, 7:50 a.m. UTC | #3
* Stewart Smith <stewart@linux.vnet.ibm.com> [2017-12-13 14:09:47]:

> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> writes:
> > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
> >
> > So caller of xscom_reset() does not have to bother about adding a delay
> > separately. Instead caller can control whether to add a delay or not using
> > second argument to xscom_reset().
> >
> > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Reviewed-by:  Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>

> > ---
> > Changes in V2:
> > - Add a bool argument to xscom_reset() to give the control to caller whether to
> >   delay is required or not after reset.
> > ---
> >  hw/xscom.c |   39 +++++++++++++++++++++------------------
> >  1 file changed, 21 insertions(+), 18 deletions(-)
> 
> Thanks, series merged to master as of
> 10f0a09239ddfd4faf47d792f04d3124fb347f88
> 
> Let me know if this should head to 5.9.x or not. Currently I haven't
> cherry picked it back.

Yes Stewart, we need this in 5.9.x for all P9 systems. Kindly pick
this xscom recovery for skiboot stable also.

Thanks,
Vaidy
Stewart Smith Dec. 13, 2017, 10:58 p.m. UTC | #4
Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> writes:
>> > Changes in V2:
>> > - Add a bool argument to xscom_reset() to give the control to caller whether to
>> >   delay is required or not after reset.
>> > ---
>> >  hw/xscom.c |   39 +++++++++++++++++++++------------------
>> >  1 file changed, 21 insertions(+), 18 deletions(-)
>> 
>> Thanks, series merged to master as of
>> 10f0a09239ddfd4faf47d792f04d3124fb347f88
>> 
>> Let me know if this should head to 5.9.x or not. Currently I haven't
>> cherry picked it back.
>
> Yes Stewart, we need this in 5.9.x for all P9 systems. Kindly pick
> this xscom recovery for skiboot stable also.

Okay, I've cherry picked these back into 5.9.x as of
8627ac9fb6f8b2375fd65ba17f0320199458b722
diff mbox series

Patch

diff --git a/hw/xscom.c b/hw/xscom.c
index 5b3bd88..d98f5ef 100644
--- a/hw/xscom.c
+++ b/hw/xscom.c
@@ -92,10 +92,11 @@  static uint64_t xscom_wait_done(void)
 	return mfspr(SPR_HMER);
 }
 
-static void xscom_reset(uint32_t gcid)
+static void xscom_reset(uint32_t gcid, bool need_delay)
 {
 	u64 hmer;
 	uint32_t recv_status_reg, log_reg, err_reg;
+	struct timespec ts;
 
 	/* Clear errors in HMER */
 	mtspr(SPR_HMER, HMER_CLR_MASK);
@@ -126,6 +127,21 @@  static void xscom_reset(uint32_t gcid)
 	hmer = xscom_wait_done();
 	if (hmer & SPR_HMER_XSCOM_FAIL)
 		goto fail;
+
+	if (need_delay) {
+		/*
+		 * Its observed that sometimes immediate retry of
+		 * XSCOM operation returns wrong data. Adding a
+		 * delay for XSCOM reset to be effective. Delay of
+		 * 10 ms is found to be working fine experimentally.
+		 * FIXME: Replace 10ms delay by exact delay needed
+		 * or other alternate method to confirm XSCOM reset
+		 * completion, after checking from HW folks.
+		 */
+		ts.tv_sec = 0;
+		ts.tv_nsec = 10 * 1000;
+		nanosleep_nopoll(&ts, NULL);
+	}
 	return;
  fail:
 	/* Fatal error resetting XSCOM */
@@ -140,7 +156,6 @@  static void xscom_reset(uint32_t gcid)
 static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
 			      bool is_write, int64_t retries)
 {
-	struct timespec ts;
 	unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer);
 	int64_t rc = OPAL_HARDWARE;
 
@@ -158,20 +173,8 @@  static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add
 			prlog(PR_NOTICE, "XSCOM: Busy even after %d retries, "
 				"resetting XSCOM now. Total retries  = %lld\n",
 				XSCOM_BUSY_RESET_THRESHOLD, retries);
-			xscom_reset(gcid);
-
-			/*
-			 * Its observed that sometimes immediate retry of
-			 * XSCOM operation returns wrong data. Adding a
-			 * delay for XSCOM reset to be effective. Delay of
-			 * 10 ms is found to be working fine experimentally.
-			 * FIXME: Replace 10ms delay by exact delay needed
-			 * or other alternate method to confirm XSCOM reset
-			 * completion, after checking from HW folks.
-			 */
-			ts.tv_sec = 0;
-			ts.tv_nsec = 10 * 1000;
-			nanosleep_nopoll(&ts, NULL);
+			xscom_reset(gcid, true);
+
 		}
 
 		/* Log error if we have retried enough and its still busy */
@@ -183,7 +186,7 @@  static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add
 		return OPAL_XSCOM_BUSY;
 
 	case 2: /* CPU is asleep, reset XSCOM engine and return */
-		xscom_reset(gcid);
+		xscom_reset(gcid, false);
 		return OPAL_XSCOM_CHIPLET_OFF;
 	case 3: /* Partial good */
 		rc = OPAL_XSCOM_PARTIAL_GOOD;
@@ -208,7 +211,7 @@  static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add
 		is_write ? "write" : "read", gcid, pcb_addr, stat);
 
 	/* We need to reset the XSCOM or we'll hang on the next access */
-	xscom_reset(gcid);
+	xscom_reset(gcid, false);
 
 	/* Non recovered ... just fail */
 	return rc;