Message ID | 151273017576.22104.17324894016679784398.stgit@jupiter.in.ibm.com |
---|---|
State | Accepted |
Headers | show |
Series | [v2,1/2] opal/xscom: Move the delay inside xscom_reset() function. | expand |
On Fri, 08 Dec 2017 16:19:35 +0530 Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote: > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > So caller of xscom_reset() does not have to bother about adding a delay > separately. Instead caller can control whether to add a delay or not using > second argument to xscom_reset(). > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> > --- > Changes in V2: > - Add a bool argument to xscom_reset() to give the control to caller whether to > delay is required or not after reset. > --- > hw/xscom.c | 39 +++++++++++++++++++++------------------ > 1 file changed, 21 insertions(+), 18 deletions(-) > > diff --git a/hw/xscom.c b/hw/xscom.c > index 5b3bd88..d98f5ef 100644 > --- a/hw/xscom.c > +++ b/hw/xscom.c > @@ -92,10 +92,11 @@ static uint64_t xscom_wait_done(void) > return mfspr(SPR_HMER); > } > > -static void xscom_reset(uint32_t gcid) > +static void xscom_reset(uint32_t gcid, bool need_delay) > { > u64 hmer; > uint32_t recv_status_reg, log_reg, err_reg; > + struct timespec ts; > > /* Clear errors in HMER */ > mtspr(SPR_HMER, HMER_CLR_MASK); > @@ -126,6 +127,21 @@ static void xscom_reset(uint32_t gcid) > hmer = xscom_wait_done(); > if (hmer & SPR_HMER_XSCOM_FAIL) > goto fail; > + > + if (need_delay) { > + /* > + * Its observed that sometimes immediate retry of > + * XSCOM operation returns wrong data. Adding a > + * delay for XSCOM reset to be effective. Delay of > + * 10 ms is found to be working fine experimentally. > + * FIXME: Replace 10ms delay by exact delay needed > + * or other alternate method to confirm XSCOM reset > + * completion, after checking from HW folks. > + */ > + ts.tv_sec = 0; > + ts.tv_nsec = 10 * 1000; > + nanosleep_nopoll(&ts, NULL); > + } > return; > fail: > /* Fatal error resetting XSCOM */ > @@ -140,7 +156,6 @@ static void xscom_reset(uint32_t gcid) > static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr, > bool is_write, int64_t retries) > { > - struct timespec ts; > unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer); > int64_t rc = OPAL_HARDWARE; > > @@ -158,20 +173,8 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add > prlog(PR_NOTICE, "XSCOM: Busy even after %d retries, " > "resetting XSCOM now. Total retries = %lld\n", > XSCOM_BUSY_RESET_THRESHOLD, retries); > - xscom_reset(gcid); > - > - /* > - * Its observed that sometimes immediate retry of > - * XSCOM operation returns wrong data. Adding a > - * delay for XSCOM reset to be effective. Delay of > - * 10 ms is found to be working fine experimentally. > - * FIXME: Replace 10ms delay by exact delay needed > - * or other alternate method to confirm XSCOM reset > - * completion, after checking from HW folks. > - */ > - ts.tv_sec = 0; > - ts.tv_nsec = 10 * 1000; > - nanosleep_nopoll(&ts, NULL); > + xscom_reset(gcid, true); > + > } > > /* Log error if we have retried enough and its still busy */ > @@ -183,7 +186,7 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add > return OPAL_XSCOM_BUSY; > > case 2: /* CPU is asleep, reset XSCOM engine and return */ > - xscom_reset(gcid); > + xscom_reset(gcid, false); > return OPAL_XSCOM_CHIPLET_OFF; > case 3: /* Partial good */ > rc = OPAL_XSCOM_PARTIAL_GOOD; > @@ -208,7 +211,7 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add > is_write ? "write" : "read", gcid, pcb_addr, stat); > > /* We need to reset the XSCOM or we'll hang on the next access */ > - xscom_reset(gcid); > + xscom_reset(gcid, false); > > /* Non recovered ... just fail */ > return rc; > > _______________________________________________ > Skiboot mailing list > Skiboot@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/skiboot
Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> writes: > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > So caller of xscom_reset() does not have to bother about adding a delay > separately. Instead caller can control whether to add a delay or not using > second argument to xscom_reset(). > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > --- > Changes in V2: > - Add a bool argument to xscom_reset() to give the control to caller whether to > delay is required or not after reset. > --- > hw/xscom.c | 39 +++++++++++++++++++++------------------ > 1 file changed, 21 insertions(+), 18 deletions(-) Thanks, series merged to master as of 10f0a09239ddfd4faf47d792f04d3124fb347f88 Let me know if this should head to 5.9.x or not. Currently I haven't cherry picked it back.
* Stewart Smith <stewart@linux.vnet.ibm.com> [2017-12-13 14:09:47]: > Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> writes: > > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > > > So caller of xscom_reset() does not have to bother about adding a delay > > separately. Instead caller can control whether to add a delay or not using > > second argument to xscom_reset(). > > > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> > > --- > > Changes in V2: > > - Add a bool argument to xscom_reset() to give the control to caller whether to > > delay is required or not after reset. > > --- > > hw/xscom.c | 39 +++++++++++++++++++++------------------ > > 1 file changed, 21 insertions(+), 18 deletions(-) > > Thanks, series merged to master as of > 10f0a09239ddfd4faf47d792f04d3124fb347f88 > > Let me know if this should head to 5.9.x or not. Currently I haven't > cherry picked it back. Yes Stewart, we need this in 5.9.x for all P9 systems. Kindly pick this xscom recovery for skiboot stable also. Thanks, Vaidy
Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> writes: >> > Changes in V2: >> > - Add a bool argument to xscom_reset() to give the control to caller whether to >> > delay is required or not after reset. >> > --- >> > hw/xscom.c | 39 +++++++++++++++++++++------------------ >> > 1 file changed, 21 insertions(+), 18 deletions(-) >> >> Thanks, series merged to master as of >> 10f0a09239ddfd4faf47d792f04d3124fb347f88 >> >> Let me know if this should head to 5.9.x or not. Currently I haven't >> cherry picked it back. > > Yes Stewart, we need this in 5.9.x for all P9 systems. Kindly pick > this xscom recovery for skiboot stable also. Okay, I've cherry picked these back into 5.9.x as of 8627ac9fb6f8b2375fd65ba17f0320199458b722
diff --git a/hw/xscom.c b/hw/xscom.c index 5b3bd88..d98f5ef 100644 --- a/hw/xscom.c +++ b/hw/xscom.c @@ -92,10 +92,11 @@ static uint64_t xscom_wait_done(void) return mfspr(SPR_HMER); } -static void xscom_reset(uint32_t gcid) +static void xscom_reset(uint32_t gcid, bool need_delay) { u64 hmer; uint32_t recv_status_reg, log_reg, err_reg; + struct timespec ts; /* Clear errors in HMER */ mtspr(SPR_HMER, HMER_CLR_MASK); @@ -126,6 +127,21 @@ static void xscom_reset(uint32_t gcid) hmer = xscom_wait_done(); if (hmer & SPR_HMER_XSCOM_FAIL) goto fail; + + if (need_delay) { + /* + * Its observed that sometimes immediate retry of + * XSCOM operation returns wrong data. Adding a + * delay for XSCOM reset to be effective. Delay of + * 10 ms is found to be working fine experimentally. + * FIXME: Replace 10ms delay by exact delay needed + * or other alternate method to confirm XSCOM reset + * completion, after checking from HW folks. + */ + ts.tv_sec = 0; + ts.tv_nsec = 10 * 1000; + nanosleep_nopoll(&ts, NULL); + } return; fail: /* Fatal error resetting XSCOM */ @@ -140,7 +156,6 @@ static void xscom_reset(uint32_t gcid) static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr, bool is_write, int64_t retries) { - struct timespec ts; unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer); int64_t rc = OPAL_HARDWARE; @@ -158,20 +173,8 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add prlog(PR_NOTICE, "XSCOM: Busy even after %d retries, " "resetting XSCOM now. Total retries = %lld\n", XSCOM_BUSY_RESET_THRESHOLD, retries); - xscom_reset(gcid); - - /* - * Its observed that sometimes immediate retry of - * XSCOM operation returns wrong data. Adding a - * delay for XSCOM reset to be effective. Delay of - * 10 ms is found to be working fine experimentally. - * FIXME: Replace 10ms delay by exact delay needed - * or other alternate method to confirm XSCOM reset - * completion, after checking from HW folks. - */ - ts.tv_sec = 0; - ts.tv_nsec = 10 * 1000; - nanosleep_nopoll(&ts, NULL); + xscom_reset(gcid, true); + } /* Log error if we have retried enough and its still busy */ @@ -183,7 +186,7 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add return OPAL_XSCOM_BUSY; case 2: /* CPU is asleep, reset XSCOM engine and return */ - xscom_reset(gcid); + xscom_reset(gcid, false); return OPAL_XSCOM_CHIPLET_OFF; case 3: /* Partial good */ rc = OPAL_XSCOM_PARTIAL_GOOD; @@ -208,7 +211,7 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add is_write ? "write" : "read", gcid, pcb_addr, stat); /* We need to reset the XSCOM or we'll hang on the next access */ - xscom_reset(gcid); + xscom_reset(gcid, false); /* Non recovered ... just fail */ return rc;