[2/3] powerpc/powernv: Fix OPAL RTC driver OPAL_BUSY loops

Message ID 20180410114933.24581-3-npiggin@gmail.com
State Not Applicable
Headers show
Series
  • Untitled series #38185
Related show

Commit Message

Nicholas Piggin April 10, 2018, 11:49 a.m.
The OPAL RTC driver does not sleep in case it gets OPAL_BUSY or
OPAL_BUSY_EVENT from firmware, which causes large scheduling
latencies, up to 50 seconds have been observed here when RTC stops
responding (BMC reboot can do it).

Fix this by converting it to the standard form OPAL_BUSY loop that
sleeps.

Fixes 628daa8d5abfd ("powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks"
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: linux-rtc@vger.kernel.org
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/platforms/powernv/opal-rtc.c |  8 +++--
 drivers/rtc/rtc-opal.c                    | 37 ++++++++++++++---------
 2 files changed, 28 insertions(+), 17 deletions(-)

Comments

Alexandre Belloni April 10, 2018, 12:07 p.m. | #1
Hi Nicholas,

I would greatly appreciate a changelog and at least the cover letter
because it is difficult to grasp how this relates to the previous
patches you sent to the RTC mailing list. 

On 10/04/2018 21:49:32+1000, Nicholas Piggin wrote:
> The OPAL RTC driver does not sleep in case it gets OPAL_BUSY or
> OPAL_BUSY_EVENT from firmware, which causes large scheduling
> latencies, up to 50 seconds have been observed here when RTC stops
> responding (BMC reboot can do it).
> 
> Fix this by converting it to the standard form OPAL_BUSY loop that
> sleeps.
> 
> Fixes 628daa8d5abfd ("powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks"
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: linux-rtc@vger.kernel.org
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/opal-rtc.c |  8 +++--
>  drivers/rtc/rtc-opal.c                    | 37 ++++++++++++++---------

From what I understand, the changes in those files are fairly
independent, they should probably be separated to ease merging.

>  2 files changed, 28 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
> index f8868864f373..aa2a5139462e 100644
> --- a/arch/powerpc/platforms/powernv/opal-rtc.c
> +++ b/arch/powerpc/platforms/powernv/opal-rtc.c
> @@ -48,10 +48,12 @@ unsigned long __init opal_get_boot_time(void)
>  
>  	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
>  		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
> -		if (rc == OPAL_BUSY_EVENT)
> +		if (rc == OPAL_BUSY_EVENT) {
> +			mdelay(OPAL_BUSY_DELAY_MS);
>  			opal_poll_events(NULL);
> -		else if (rc == OPAL_BUSY)
> -			mdelay(10);
> +		} else if (rc == OPAL_BUSY) {
> +			mdelay(OPAL_BUSY_DELAY_MS);
> +		}
>  	}
>  	if (rc != OPAL_SUCCESS)
>  		return 0;
> diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
> index 304e891e35fc..60f2250fd96b 100644
> --- a/drivers/rtc/rtc-opal.c
> +++ b/drivers/rtc/rtc-opal.c
> @@ -57,7 +57,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms)
>  
>  static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
>  {
> -	long rc = OPAL_BUSY;
> +	s64 rc = OPAL_BUSY;
>  	int retries = 10;
>  	u32 y_m_d;
>  	u64 h_m_s_ms;
> @@ -66,13 +66,17 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
>  
>  	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
>  		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
> -		if (rc == OPAL_BUSY_EVENT)
> +		if (rc == OPAL_BUSY_EVENT) {
> +			msleep(OPAL_BUSY_DELAY_MS);
>  			opal_poll_events(NULL);
> -		else if (retries-- && (rc == OPAL_HARDWARE
> -				       || rc == OPAL_INTERNAL_ERROR))
> -			msleep(10);
> -		else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
> -			break;
> +		} else if (rc == OPAL_BUSY) {
> +			msleep(OPAL_BUSY_DELAY_MS);
> +		} else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
> +			if (retries--) {
> +				msleep(10); /* Wait 10ms before retry */
> +				rc = OPAL_BUSY; /* go around again */
> +			}
> +		}
>  	}
>  
>  	if (rc != OPAL_SUCCESS)
> @@ -87,21 +91,26 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
>  
>  static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
>  {
> -	long rc = OPAL_BUSY;
> +	s64 rc = OPAL_BUSY;
>  	int retries = 10;
>  	u32 y_m_d = 0;
>  	u64 h_m_s_ms = 0;
>  
>  	tm_to_opal(tm, &y_m_d, &h_m_s_ms);
> +
>  	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
>  		rc = opal_rtc_write(y_m_d, h_m_s_ms);
> -		if (rc == OPAL_BUSY_EVENT)
> +		if (rc == OPAL_BUSY_EVENT) {
> +			msleep(OPAL_BUSY_DELAY_MS);
>  			opal_poll_events(NULL);
> -		else if (retries-- && (rc == OPAL_HARDWARE
> -				       || rc == OPAL_INTERNAL_ERROR))
> -			msleep(10);
> -		else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
> -			break;
> +		} else if (rc == OPAL_BUSY) {
> +			msleep(OPAL_BUSY_DELAY_MS);
> +		} else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
> +			if (retries--) {
> +				msleep(10); /* Wait 10ms before retry */
> +				rc = OPAL_BUSY; /* go around again */
> +			}
> +		}
>  	}
>  
>  	return rc == OPAL_SUCCESS ? 0 : -EIO;
> -- 
> 2.17.0
>
Nicholas Piggin April 10, 2018, 1:01 p.m. | #2
On Tue, 10 Apr 2018 14:07:28 +0200
Alexandre Belloni <alexandre.belloni@bootlin.com> wrote:

> Hi Nicholas,
> 
> I would greatly appreciate a changelog and at least the cover letter
> because it is difficult to grasp how this relates to the previous
> patches you sent to the RTC mailing list. 

Yes good point. Basically this change is "standalone" except using
OPAL_BUSY_DELAY_MS define from patch 1. That patch has a lot of
comments about firmware delays I did not think would be too
interesting.

Basically we're adding msleep(10) here, because the firmware can
repeatedly return OPAL_BUSY for long periods, so we want to context
switch and respond to interrupts.

> 
> On 10/04/2018 21:49:32+1000, Nicholas Piggin wrote:
> > The OPAL RTC driver does not sleep in case it gets OPAL_BUSY or
> > OPAL_BUSY_EVENT from firmware, which causes large scheduling
> > latencies, up to 50 seconds have been observed here when RTC stops
> > responding (BMC reboot can do it).
> > 
> > Fix this by converting it to the standard form OPAL_BUSY loop that
> > sleeps.
> > 
> > Fixes 	 ("powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks"
> > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > Cc: linux-rtc@vger.kernel.org
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/platforms/powernv/opal-rtc.c |  8 +++--
> >  drivers/rtc/rtc-opal.c                    | 37 ++++++++++++++---------  
> 
> From what I understand, the changes in those files are fairly
> independent, they should probably be separated to ease merging.

I'm happy to do that. It's using the same firmware call, so I thought
a single patch would be fine. But I guess the boot call can be
dropped from this patch because it does not  not solve the problem
described in the changelog.

Would you be happy for the driver change to be merged via the powerpc
tree? The code being fixed here came from the same original patch as
a similar issue being fixed in the OPAL NVRAM driver so it might be
easier that way.

Thanks,
Nick
Alexandre Belloni April 24, 2018, 6:39 p.m. | #3
On 10/04/2018 23:01:36+1000, Nicholas Piggin wrote:
> On Tue, 10 Apr 2018 14:07:28 +0200
> Alexandre Belloni <alexandre.belloni@bootlin.com> wrote:
> > > Fixes 	 ("powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks"
> > > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > > Cc: linux-rtc@vger.kernel.org
> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > > ---
> > >  arch/powerpc/platforms/powernv/opal-rtc.c |  8 +++--
> > >  drivers/rtc/rtc-opal.c                    | 37 ++++++++++++++---------  
> > 
> > From what I understand, the changes in those files are fairly
> > independent, they should probably be separated to ease merging.
> 
> I'm happy to do that. It's using the same firmware call, so I thought
> a single patch would be fine. But I guess the boot call can be
> dropped from this patch because it does not  not solve the problem
> described in the changelog.
> 
> Would you be happy for the driver change to be merged via the powerpc
> tree? The code being fixed here came from the same original patch as
> a similar issue being fixed in the OPAL NVRAM driver so it might be
> easier that way.
> 

Ok then, just add my

Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>

and let it go through the powerpc tree.
Michael Ellerman April 25, 2018, 3:28 a.m. | #4
Alexandre Belloni <alexandre.belloni@bootlin.com> writes:
> On 10/04/2018 23:01:36+1000, Nicholas Piggin wrote:
>> On Tue, 10 Apr 2018 14:07:28 +0200
>> Alexandre Belloni <alexandre.belloni@bootlin.com> wrote:
>> > > Fixes 	 ("powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks"
>> > > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> > > Cc: linux-rtc@vger.kernel.org
>> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> > > ---
>> > >  arch/powerpc/platforms/powernv/opal-rtc.c |  8 +++--
>> > >  drivers/rtc/rtc-opal.c                    | 37 ++++++++++++++---------  
>> > 
>> > From what I understand, the changes in those files are fairly
>> > independent, they should probably be separated to ease merging.
>> 
>> I'm happy to do that. It's using the same firmware call, so I thought
>> a single patch would be fine. But I guess the boot call can be
>> dropped from this patch because it does not  not solve the problem
>> described in the changelog.
>> 
>> Would you be happy for the driver change to be merged via the powerpc
>> tree? The code being fixed here came from the same original patch as
>> a similar issue being fixed in the OPAL NVRAM driver so it might be
>> easier that way.
>
> Ok then, just add my
>
> Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
>
> and let it go through the powerpc tree.

Thanks.

It's still mostly an rtc patch by lines changed, so I changed the
subject to:

  rtc: opal: Fix OPAL RTC driver OPAL_BUSY loops

cheers
Alexandre Belloni April 25, 2018, 9:41 a.m. | #5
On 25/04/2018 13:28:27+1000, Michael Ellerman wrote:
> Alexandre Belloni <alexandre.belloni@bootlin.com> writes:
> > On 10/04/2018 23:01:36+1000, Nicholas Piggin wrote:
> >> On Tue, 10 Apr 2018 14:07:28 +0200
> >> Alexandre Belloni <alexandre.belloni@bootlin.com> wrote:
> >> > > Fixes 	 ("powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks"
> >> > > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> >> > > Cc: linux-rtc@vger.kernel.org
> >> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> >> > > ---
> >> > >  arch/powerpc/platforms/powernv/opal-rtc.c |  8 +++--
> >> > >  drivers/rtc/rtc-opal.c                    | 37 ++++++++++++++---------  
> >> > 
> >> > From what I understand, the changes in those files are fairly
> >> > independent, they should probably be separated to ease merging.
> >> 
> >> I'm happy to do that. It's using the same firmware call, so I thought
> >> a single patch would be fine. But I guess the boot call can be
> >> dropped from this patch because it does not  not solve the problem
> >> described in the changelog.
> >> 
> >> Would you be happy for the driver change to be merged via the powerpc
> >> tree? The code being fixed here came from the same original patch as
> >> a similar issue being fixed in the OPAL NVRAM driver so it might be
> >> easier that way.
> >
> > Ok then, just add my
> >
> > Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
> >
> > and let it go through the powerpc tree.
> 
> Thanks.
> 
> It's still mostly an rtc patch by lines changed, so I changed the
> subject to:
> 
>   rtc: opal: Fix OPAL RTC driver OPAL_BUSY loops
> 

Great, thanks!
Michael Ellerman April 26, 2018, 10:22 a.m. | #6
On Tue, 2018-04-10 at 11:49:32 UTC, Nicholas Piggin wrote:
> The OPAL RTC driver does not sleep in case it gets OPAL_BUSY or
> OPAL_BUSY_EVENT from firmware, which causes large scheduling
> latencies, up to 50 seconds have been observed here when RTC stops
> responding (BMC reboot can do it).
> 
> Fix this by converting it to the standard form OPAL_BUSY loop that
> sleeps.
> 
> Fixes 628daa8d5abfd ("powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks"
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: linux-rtc@vger.kernel.org
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/682e6b4da5cbe8e9a53f979a58c2a9

cheers

Patch

diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index f8868864f373..aa2a5139462e 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -48,10 +48,12 @@  unsigned long __init opal_get_boot_time(void)
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
-		if (rc == OPAL_BUSY_EVENT)
+		if (rc == OPAL_BUSY_EVENT) {
+			mdelay(OPAL_BUSY_DELAY_MS);
 			opal_poll_events(NULL);
-		else if (rc == OPAL_BUSY)
-			mdelay(10);
+		} else if (rc == OPAL_BUSY) {
+			mdelay(OPAL_BUSY_DELAY_MS);
+		}
 	}
 	if (rc != OPAL_SUCCESS)
 		return 0;
diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
index 304e891e35fc..60f2250fd96b 100644
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c
@@ -57,7 +57,7 @@  static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms)
 
 static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 {
-	long rc = OPAL_BUSY;
+	s64 rc = OPAL_BUSY;
 	int retries = 10;
 	u32 y_m_d;
 	u64 h_m_s_ms;
@@ -66,13 +66,17 @@  static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
-		if (rc == OPAL_BUSY_EVENT)
+		if (rc == OPAL_BUSY_EVENT) {
+			msleep(OPAL_BUSY_DELAY_MS);
 			opal_poll_events(NULL);
-		else if (retries-- && (rc == OPAL_HARDWARE
-				       || rc == OPAL_INTERNAL_ERROR))
-			msleep(10);
-		else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
-			break;
+		} else if (rc == OPAL_BUSY) {
+			msleep(OPAL_BUSY_DELAY_MS);
+		} else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
+			if (retries--) {
+				msleep(10); /* Wait 10ms before retry */
+				rc = OPAL_BUSY; /* go around again */
+			}
+		}
 	}
 
 	if (rc != OPAL_SUCCESS)
@@ -87,21 +91,26 @@  static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
 
 static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
 {
-	long rc = OPAL_BUSY;
+	s64 rc = OPAL_BUSY;
 	int retries = 10;
 	u32 y_m_d = 0;
 	u64 h_m_s_ms = 0;
 
 	tm_to_opal(tm, &y_m_d, &h_m_s_ms);
+
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_rtc_write(y_m_d, h_m_s_ms);
-		if (rc == OPAL_BUSY_EVENT)
+		if (rc == OPAL_BUSY_EVENT) {
+			msleep(OPAL_BUSY_DELAY_MS);
 			opal_poll_events(NULL);
-		else if (retries-- && (rc == OPAL_HARDWARE
-				       || rc == OPAL_INTERNAL_ERROR))
-			msleep(10);
-		else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
-			break;
+		} else if (rc == OPAL_BUSY) {
+			msleep(OPAL_BUSY_DELAY_MS);
+		} else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
+			if (retries--) {
+				msleep(10); /* Wait 10ms before retry */
+				rc = OPAL_BUSY; /* go around again */
+			}
+		}
 	}
 
 	return rc == OPAL_SUCCESS ? 0 : -EIO;