diff mbox

pseries/eeh: Handle RTAS delay requests in configure_bridge

Message ID 1458606895-24620-1-git-send-email-ruscur@russell.cc (mailing list archive)
State Changes Requested
Headers show

Commit Message

Russell Currey March 22, 2016, 12:34 a.m. UTC
In the configure_pe and configure_bridge RTAS calls, the spec states
that values of 9900-9905 can be returned, indicating that software
should delay for 10^x (where x is the last digit, i.e. 990x)
milliseconds and attempt the call again. Currently, the kernel doesn't
know about this, and respecting it fixes some PCI failures when the
hypervisor is busy.

The delay is capped at 0.2 seconds.

Signed-off-by: Russell Currey <ruscur@russell.cc>
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 63 +++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 16 deletions(-)

Comments

Russell Currey March 23, 2016, 12:28 a.m. UTC | #1
On Tue, 2016-03-22 at 11:34 +1100, Russell Currey wrote:
> In the configure_pe and configure_bridge RTAS calls, the spec states
> that values of 9900-9905 can be returned, indicating that software
> should delay for 10^x (where x is the last digit, i.e. 990x)
> milliseconds and attempt the call again. Currently, the kernel doesn't
> know about this, and respecting it fixes some PCI failures when the
> hypervisor is busy.
> 
> The delay is capped at 0.2 seconds.
> 
> Signed-off-by: Russell Currey <ruscur@russell.cc>

Forgot to mention this patch should go to stable, 3.10+
Michael Ellerman March 23, 2016, 12:38 a.m. UTC | #2
On Tue, 2016-22-03 at 00:34:55 UTC, Russell Currey wrote:
> In the configure_pe and configure_bridge RTAS calls, the spec states
> that values of 9900-9905 can be returned, indicating that software
> should delay for 10^x (where x is the last digit, i.e. 990x)
> milliseconds and attempt the call again. Currently, the kernel doesn't
> know about this, and respecting it fixes some PCI failures when the
> hypervisor is busy.
> 
> The delay is capped at 0.2 seconds.
> 
> Signed-off-by: Russell Currey <ruscur@russell.cc>
> ---
>  arch/powerpc/platforms/pseries/eeh_pseries.c | 63 +++++++++++++++++++++-------
>  1 file changed, 47 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index ac3ffd9..c5603185 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -614,30 +614,61 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u
>  static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
>  {
>  	int config_addr;
> -	int ret;
> +	int ret = -1;

If possible it's preferable not to pre-initialise your return value.

If you leave it uninitialised then the compiler can (hopefully) detect any
paths where you fail to initialise it.

> +	/* Waiting 0.2s maximum before skipping configuration */
> +	int max_wait = 200;
> +	int mwait;
>  
>  	/* Figure out the PE address */
>  	config_addr = pe->config_addr;
>  	if (pe->addr)
>  		config_addr = pe->addr;
>  
> -	/* Use new configure-pe function, if supported */
> -	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
> -		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> -				config_addr, BUID_HI(pe->phb->buid),
> -				BUID_LO(pe->phb->buid));
> -	} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
> -		ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
> -				config_addr, BUID_HI(pe->phb->buid),
> -				BUID_LO(pe->phb->buid));
> -	} else {
> -		return -EFAULT;
> -	}
> +	while (1) {
> +		if (max_wait < 0)
> +			goto err;

Can't you just do:

	while (max_wait > 0) {

?

> +
> +		/* Use new configure-pe function, if supported */
> +		if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
> +			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> +					config_addr, BUID_HI(pe->phb->buid),
> +					BUID_LO(pe->phb->buid));
> +		} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
> +			ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
> +					config_addr, BUID_HI(pe->phb->buid),
> +					BUID_LO(pe->phb->buid));
> +		} else {
> +			return -EFAULT;
> +		}

I realise you've just indented that code, but it would be nice to clean it up
as a precursor patch.

AFAICS the args are identical, so you could just put the token in a variable
and do the rtas_call() once. It also looks like we check at startup that we
have one of the tokens, so we could choose the appropriate token then and avoid
any conditinals in this code.

>  
> -	if (ret)
> -		pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
> -			__func__, pe->phb->global_number, pe->addr, ret);
> +		/*
> +		 * If RTAS returns a delay value, it expects software to sleep
> +		 * for 10^x milliseconds.  The max value it can return is thus
> +		 * 10^5 (RTAS_EXTENDED_DELAY_MAX), which is way too long.
> +		 */
>  
> +		switch (ret) {
> +		case 0:
> +			return ret;
> +		case RTAS_EXTENDED_DELAY_MIN:
> +			mwait = 1;
> +			break;
> +		case RTAS_EXTENDED_DELAY_MAX:
> +			mwait = 10;
> +			break;
> +		case RTAS_EXTENDED_DELAY_MIN+2:
> +			mwait = 100;
> +			break;
> +		default:
> +			goto err;
> +		}
> +
> +		max_wait -= mwait;
> +		msleep(mwait);

Can you use rtas_busy_delay() ?

> +	}
> + err:
> +	pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
> +		__func__, pe->phb->global_number, pe->addr, ret);
>  	return ret;
>  }


cheers
Russell Currey March 23, 2016, 2:52 a.m. UTC | #3
On Wed, 2016-03-23 at 11:38 +1100, Michael Ellerman wrote:
> On Tue, 2016-22-03 at 00:34:55 UTC, Russell Currey wrote:
> > 
> > In the configure_pe and configure_bridge RTAS calls, the spec states
> > that values of 9900-9905 can be returned, indicating that software
> > should delay for 10^x (where x is the last digit, i.e. 990x)
> > milliseconds and attempt the call again. Currently, the kernel doesn't
> > know about this, and respecting it fixes some PCI failures when the
> > hypervisor is busy.
> > 
> > The delay is capped at 0.2 seconds.
> > 
> > Signed-off-by: Russell Currey <ruscur@russell.cc>
> > ---
> >  arch/powerpc/platforms/pseries/eeh_pseries.c | 63
> > +++++++++++++++++++++-------
> >  1 file changed, 47 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c
> > b/arch/powerpc/platforms/pseries/eeh_pseries.c
> > index ac3ffd9..c5603185 100644
> > --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> > +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> > @@ -614,30 +614,61 @@ static int pseries_eeh_get_log(struct eeh_pe *pe,
> > int severity, char *drv_log, u
> >  static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
> >  {
> >  	int config_addr;
> > -	int ret;
> > +	int ret = -1;
> If possible it's preferable not to pre-initialise your return value.
> 
> If you leave it uninitialised then the compiler can (hopefully) detect
> any
> paths where you fail to initialise it.
> 
Relic of a previous iteration, my bad.
> > 
> > +	/* Waiting 0.2s maximum before skipping configuration */
> > +	int max_wait = 200;
> > +	int mwait;
> >  
> >  	/* Figure out the PE address */
> >  	config_addr = pe->config_addr;
> >  	if (pe->addr)
> >  		config_addr = pe->addr;
> >  
> > -	/* Use new configure-pe function, if supported */
> > -	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
> > -		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> > -				config_addr, BUID_HI(pe->phb->buid),
> > -				BUID_LO(pe->phb->buid));
> > -	} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
> > -		ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
> > -				config_addr, BUID_HI(pe->phb->buid),
> > -				BUID_LO(pe->phb->buid));
> > -	} else {
> > -		return -EFAULT;
> > -	}
> > +	while (1) {
> > +		if (max_wait < 0)
> > +			goto err;
> Can't you just do:
> 
> 	while (max_wait > 0) {
> 
> ?
...yes.
> 
> > 
> > +
> > +		/* Use new configure-pe function, if supported */
> > +		if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
> > +			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> > +					config_addr, BUID_HI(pe->phb-
> > >buid),
> > +					BUID_LO(pe->phb->buid));
> > +		} else if (ibm_configure_bridge !=
> > RTAS_UNKNOWN_SERVICE) {
> > +			ret = rtas_call(ibm_configure_bridge, 3, 1,
> > NULL,
> > +					config_addr, BUID_HI(pe->phb-
> > >buid),
> > +					BUID_LO(pe->phb->buid));
> > +		} else {
> > +			return -EFAULT;
> > +		}
> I realise you've just indented that code, but it would be nice to clean
> it up
> as a precursor patch.
> 
> AFAICS the args are identical, so you could just put the token in a
> variable
> and do the rtas_call() once. It also looks like we check at startup that
> we
> have one of the tokens, so we could choose the appropriate token then and
> avoid
> any conditinals in this code.

Yes, good idea.
> 
> > 
> >  
> > -	if (ret)
> > -		pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x
> > (%d)\n",
> > -			__func__, pe->phb->global_number, pe->addr,
> > ret);
> > +		/*
> > +		 * If RTAS returns a delay value, it expects software
> > to sleep
> > +		 * for 10^x milliseconds.  The max value it can return
> > is thus
> > +		 * 10^5 (RTAS_EXTENDED_DELAY_MAX), which is way too
> > long.
> > +		 */
> >  
> > +		switch (ret) {
> > +		case 0:
> > +			return ret;
> > +		case RTAS_EXTENDED_DELAY_MIN:
> > +			mwait = 1;
> > +			break;
> > +		case RTAS_EXTENDED_DELAY_MAX:
> > +			mwait = 10;
> > +			break;
> > +		case RTAS_EXTENDED_DELAY_MIN+2:
> > +			mwait = 100;
> > +			break;
> > +		default:
> > +			goto err;
> > +		}
> > +
> > +		max_wait -= mwait;
> > +		msleep(mwait);
> Can you use rtas_busy_delay() ?

Wasn't aware of that, makes life a lot easier.  Do you know if the 0.2s
maximum delay also applies across the board?  I definitely want to enforce
it here, but if it is ubiquitous then it should be in rtas_busy_delay.
> 
> > 
> > +	}
> > + err:
> > +	pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
> > +		__func__, pe->phb->global_number, pe->addr, ret);
> >  	return ret;
> >  }
> 
> cheers
Michael Ellerman March 23, 2016, 10:39 a.m. UTC | #4
On Wed, 2016-03-23 at 13:52 +1100, Russell Currey wrote:
> On Wed, 2016-03-23 at 11:38 +1100, Michael Ellerman wrote:
> > On Tue, 2016-22-03 at 00:34:55 UTC, Russell Currey wrote:
> > >
> > > +		case RTAS_EXTENDED_DELAY_MIN+2:
> > > +			mwait = 100;
> > > +			break;
> > > +		default:
> > > +			goto err;
> > > +		}
> > > +
> > > +		max_wait -= mwait;
> > > +		msleep(mwait);

> > Can you use rtas_busy_delay() ?
>
> Wasn't aware of that, makes life a lot easier.  Do you know if the 0.2s
> maximum delay also applies across the board?  I definitely want to enforce
> it here, but if it is ubiquitous then it should be in rtas_busy_delay.

Not sure sorry, you'll have to read PAPR :)

cheers
Russell Currey March 23, 2016, 11:56 p.m. UTC | #5
On Wed, 2016-03-23 at 21:39 +1100, Michael Ellerman wrote:
> On Wed, 2016-03-23 at 13:52 +1100, Russell Currey wrote:
> > 
> > On Wed, 2016-03-23 at 11:38 +1100, Michael Ellerman wrote:
> > > 
> > > On Tue, 2016-22-03 at 00:34:55 UTC, Russell Currey wrote:
> > > > 
> > > > 
> > > > +		case RTAS_EXTENDED_DELAY_MIN+2:
> > > > +			mwait = 100;
> > > > +			break;
> > > > +		default:
> > > > +			goto err;
> > > > +		}
> > > > +
> > > > +		max_wait -= mwait;
> > > > +		msleep(mwait);
> > 
> > > 
> > > Can you use rtas_busy_delay() ?
> > Wasn't aware of that, makes life a lot easier.  Do you know if the 0.2s
> > maximum delay also applies across the board?  I definitely want to
> > enforce
> > it here, but if it is ubiquitous then it should be in rtas_busy_delay.
> Not sure sorry, you'll have to read PAPR :)
It doesn't mention anything, but I was more concerned about whether it's
ever sensible to have that much of a sleep in any case.  Probably safe to
ignore given it hasn't been a problem up to this point.
> 
> cheers
>
diff mbox

Patch

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index ac3ffd9..c5603185 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -614,30 +614,61 @@  static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u
 static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 {
 	int config_addr;
-	int ret;
+	int ret = -1;
+	/* Waiting 0.2s maximum before skipping configuration */
+	int max_wait = 200;
+	int mwait;
 
 	/* Figure out the PE address */
 	config_addr = pe->config_addr;
 	if (pe->addr)
 		config_addr = pe->addr;
 
-	/* Use new configure-pe function, if supported */
-	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
-		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
-				config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid));
-	} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
-		ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
-				config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid));
-	} else {
-		return -EFAULT;
-	}
+	while (1) {
+		if (max_wait < 0)
+			goto err;
+
+		/* Use new configure-pe function, if supported */
+		if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
+			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+					config_addr, BUID_HI(pe->phb->buid),
+					BUID_LO(pe->phb->buid));
+		} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
+			ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
+					config_addr, BUID_HI(pe->phb->buid),
+					BUID_LO(pe->phb->buid));
+		} else {
+			return -EFAULT;
+		}
 
-	if (ret)
-		pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
-			__func__, pe->phb->global_number, pe->addr, ret);
+		/*
+		 * If RTAS returns a delay value, it expects software to sleep
+		 * for 10^x milliseconds.  The max value it can return is thus
+		 * 10^5 (RTAS_EXTENDED_DELAY_MAX), which is way too long.
+		 */
 
+		switch (ret) {
+		case 0:
+			return ret;
+		case RTAS_EXTENDED_DELAY_MIN:
+			mwait = 1;
+			break;
+		case RTAS_EXTENDED_DELAY_MAX:
+			mwait = 10;
+			break;
+		case RTAS_EXTENDED_DELAY_MIN+2:
+			mwait = 100;
+			break;
+		default:
+			goto err;
+		}
+
+		max_wait -= mwait;
+		msleep(mwait);
+	}
+ err:
+	pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
+		__func__, pe->phb->global_number, pe->addr, ret);
 	return ret;
 }