diff mbox

[v3,i2c/for-next] i2c: i801: recover from hardware PEC errors

Message ID 1432332868-12705-1-git-send-email-ellen@cumulusnetworks.com
State Superseded
Headers show

Commit Message

Ellen Wang May 22, 2015, 10:14 p.m. UTC
On a CRC error while using hardware-supported PEC, an additional
error bit is set in the auxiliary status register.  If this bit
isn't cleared, all subsequent operations will fail, essentially
hanging the controller.

The fix is simple: check, report, and clear the bit in
i801_check_post().  Also, in case the driver starts with the
hardware in that state, clear it in i801_check_pre() as well.

Signed-off-by: Ellen Wang <ellen@cumulusnetworks.com>
---
Update: fix typos in commit message, otherwise the same as v2

This is essentially the patch from Jean Delvare, which handles
the polling case while my original version didn't.  (Thank you!
Please add appropriate attribution if you wish.)

I tested all the additional code paths by selectively commenting
out code: with interrupts, without interrupts, relying on check_pre()
to clear CRCE, no clearing of CRCE at all (baseline).
---
 drivers/i2c/busses/i2c-i801.c |   53 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

Comments

Wolfram Sang July 9, 2015, 4:59 p.m. UTC | #1
On Fri, May 22, 2015 at 03:14:28PM -0700, Ellen Wang wrote:
> On a CRC error while using hardware-supported PEC, an additional
> error bit is set in the auxiliary status register.  If this bit
> isn't cleared, all subsequent operations will fail, essentially
> hanging the controller.
> 
> The fix is simple: check, report, and clear the bit in
> i801_check_post().  Also, in case the driver starts with the
> hardware in that state, clear it in i801_check_pre() as well.
> 
> Signed-off-by: Ellen Wang <ellen@cumulusnetworks.com>
> ---
> Update: fix typos in commit message, otherwise the same as v2
> 
> This is essentially the patch from Jean Delvare, which handles
> the polling case while my original version didn't.  (Thank you!
> Please add appropriate attribution if you wish.)
> 
> I tested all the additional code paths by selectively commenting
> out code: with interrupts, without interrupts, relying on check_pre()
> to clear CRCE, no clearing of CRCE at all (baseline).

Jean, any comments or tags to add?

> ---
>  drivers/i2c/busses/i2c-i801.c |   53 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 51 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
> index 5ecbb3f..fa50df0 100644
> --- a/drivers/i2c/busses/i2c-i801.c
> +++ b/drivers/i2c/busses/i2c-i801.c
> @@ -125,6 +125,10 @@
>  #define SMBHSTCFG_SMB_SMI_EN	2
>  #define SMBHSTCFG_I2C_EN	4
>  
> +/* Auxiliary status register bits, ICH4+ only */
> +#define SMBAUXSTS_CRCE		1
> +#define SMBAUXSTS_STCO		2
> +
>  /* Auxiliary control register bits, ICH4+ only */
>  #define SMBAUXCTL_CRC		1
>  #define SMBAUXCTL_E32B		2
> @@ -273,6 +277,29 @@ static int i801_check_pre(struct i801_priv *priv)
>  		}
>  	}
>  
> +	/*
> +	 * Clear CRC status if needed.
> +	 * During normal operation, i801_check_post() takes care
> +	 * of it after every operation.  We do it here only in case
> +	 * the hardware was already in this state when the driver
> +	 * started.
> +	 */
> +	if (priv->features & FEATURE_SMBUS_PEC) {
> +		status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE;
> +		if (status) {
> +			dev_dbg(&priv->pci_dev->dev,
> +				"Clearing aux status flags (%02x)\n", status);
> +			outb_p(status, SMBAUXSTS(priv));
> +			status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE;
> +			if (status) {
> +				dev_err(&priv->pci_dev->dev,
> +					"Failed clearing aux status flags (%02x)\n",
> +					status);
> +				return -EBUSY;
> +			}
> +		}
> +	}
> +
>  	return 0;
>  }
>  
> @@ -316,8 +343,30 @@ static int i801_check_post(struct i801_priv *priv, int status)
>  		dev_err(&priv->pci_dev->dev, "Transaction failed\n");
>  	}
>  	if (status & SMBHSTSTS_DEV_ERR) {
> -		result = -ENXIO;
> -		dev_dbg(&priv->pci_dev->dev, "No response\n");
> +		/*
> +		 * This may be a PEC error, check and clear it.
> +		 *
> +		 * AUXSTS is handled differently from HSTSTS.
> +		 * For HSTSTS, i801_isr() or i801_wait_intr()
> +		 * has already cleared the error bits in hardware,
> +		 * and we are passed a copy of the original value
> +		 * in "status".
> +		 * For AUXSTS, the hardware register is left
> +		 * for us to handle here.
> +		 * This is asymmetric, slightly iffy, but safe,
> +		 * since all this code is serialized and the CRCE
> +		 * bit is harmless as long as it's cleared before
> +		 * the next operation.
> +		 */
> +		if ((priv->features & FEATURE_SMBUS_PEC) &&
> +		    (inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE)) {
> +			outb_p(SMBAUXSTS_CRCE, SMBAUXSTS(priv));
> +			result = -EBADMSG;
> +			dev_dbg(&priv->pci_dev->dev, "PEC error\n");
> +		} else {
> +			result = -ENXIO;
> +			dev_dbg(&priv->pci_dev->dev, "No response\n");
> +		}
>  	}
>  	if (status & SMBHSTSTS_BUS_ERR) {
>  		result = -EAGAIN;
> -- 
> 1.7.10.4
>
Jean Delvare July 1, 2016, 9:30 a.m. UTC | #2
Hi Wolfram, Ellen,

On Thu, 9 Jul 2015 18:59:39 +0200, Wolfram Sang wrote:
> On Fri, May 22, 2015 at 03:14:28PM -0700, Ellen Wang wrote:
> > On a CRC error while using hardware-supported PEC, an additional
> > error bit is set in the auxiliary status register.  If this bit
> > isn't cleared, all subsequent operations will fail, essentially
> > hanging the controller.
> > 
> > The fix is simple: check, report, and clear the bit in
> > i801_check_post().  Also, in case the driver starts with the
> > hardware in that state, clear it in i801_check_pre() as well.
> > 
> > Signed-off-by: Ellen Wang <ellen@cumulusnetworks.com>
> > ---
> > Update: fix typos in commit message, otherwise the same as v2
> > 
> > This is essentially the patch from Jean Delvare, which handles
> > the polling case while my original version didn't.  (Thank you!
> > Please add appropriate attribution if you wish.)
> > 
> > I tested all the additional code paths by selectively commenting
> > out code: with interrupts, without interrupts, relying on check_pre()
> > to clear CRCE, no clearing of CRCE at all (baseline).
> 
> Jean, any comments or tags to add?

Sorry, this one fell trough the cracks. The patch is good and still
needed. I have tested it successfully on several systems. Wolfram,
please apply it.

Tested-by: Jean Delvare <jdelvare@suse.de>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Wolfram Sang July 1, 2016, 3:49 p.m. UTC | #3
> Sorry, this one fell trough the cracks. The patch is good and still
> needed. I have tested it successfully on several systems. Wolfram,
> please apply it.
> 
> Tested-by: Jean Delvare <jdelvare@suse.de>
> Reviewed-by: Jean Delvare <jdelvare@suse.de>

Thanks, will do. Do you happen to have rebased it to latest
i2c/for-next?
Wolfram Sang July 1, 2016, 9:13 p.m. UTC | #4
On Fri, May 22, 2015 at 03:14:28PM -0700, Ellen Wang wrote:
> On a CRC error while using hardware-supported PEC, an additional
> error bit is set in the auxiliary status register.  If this bit
> isn't cleared, all subsequent operations will fail, essentially
> hanging the controller.
> 
> The fix is simple: check, report, and clear the bit in
> i801_check_post().  Also, in case the driver starts with the
> hardware in that state, clear it in i801_check_pre() as well.
> 
> Signed-off-by: Ellen Wang <ellen@cumulusnetworks.com>

Applied to for-next, thanks!
diff mbox

Patch

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 5ecbb3f..fa50df0 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -125,6 +125,10 @@ 
 #define SMBHSTCFG_SMB_SMI_EN	2
 #define SMBHSTCFG_I2C_EN	4
 
+/* Auxiliary status register bits, ICH4+ only */
+#define SMBAUXSTS_CRCE		1
+#define SMBAUXSTS_STCO		2
+
 /* Auxiliary control register bits, ICH4+ only */
 #define SMBAUXCTL_CRC		1
 #define SMBAUXCTL_E32B		2
@@ -273,6 +277,29 @@  static int i801_check_pre(struct i801_priv *priv)
 		}
 	}
 
+	/*
+	 * Clear CRC status if needed.
+	 * During normal operation, i801_check_post() takes care
+	 * of it after every operation.  We do it here only in case
+	 * the hardware was already in this state when the driver
+	 * started.
+	 */
+	if (priv->features & FEATURE_SMBUS_PEC) {
+		status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE;
+		if (status) {
+			dev_dbg(&priv->pci_dev->dev,
+				"Clearing aux status flags (%02x)\n", status);
+			outb_p(status, SMBAUXSTS(priv));
+			status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE;
+			if (status) {
+				dev_err(&priv->pci_dev->dev,
+					"Failed clearing aux status flags (%02x)\n",
+					status);
+				return -EBUSY;
+			}
+		}
+	}
+
 	return 0;
 }
 
@@ -316,8 +343,30 @@  static int i801_check_post(struct i801_priv *priv, int status)
 		dev_err(&priv->pci_dev->dev, "Transaction failed\n");
 	}
 	if (status & SMBHSTSTS_DEV_ERR) {
-		result = -ENXIO;
-		dev_dbg(&priv->pci_dev->dev, "No response\n");
+		/*
+		 * This may be a PEC error, check and clear it.
+		 *
+		 * AUXSTS is handled differently from HSTSTS.
+		 * For HSTSTS, i801_isr() or i801_wait_intr()
+		 * has already cleared the error bits in hardware,
+		 * and we are passed a copy of the original value
+		 * in "status".
+		 * For AUXSTS, the hardware register is left
+		 * for us to handle here.
+		 * This is asymmetric, slightly iffy, but safe,
+		 * since all this code is serialized and the CRCE
+		 * bit is harmless as long as it's cleared before
+		 * the next operation.
+		 */
+		if ((priv->features & FEATURE_SMBUS_PEC) &&
+		    (inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE)) {
+			outb_p(SMBAUXSTS_CRCE, SMBAUXSTS(priv));
+			result = -EBADMSG;
+			dev_dbg(&priv->pci_dev->dev, "PEC error\n");
+		} else {
+			result = -ENXIO;
+			dev_dbg(&priv->pci_dev->dev, "No response\n");
+		}
 	}
 	if (status & SMBHSTSTS_BUS_ERR) {
 		result = -EAGAIN;