diff mbox

i2c: designware: use enable on resume instead initialization

Message ID 1433785828-4100-1-git-send-email-lucas.de.marchi@gmail.com
State Rejected
Headers show

Commit Message

Lucas De Marchi June 8, 2015, 5:50 p.m. UTC
From: Fabio Mello <fabio.mello@intel.com>

According to documentation and tests, initialization is not
necessary on module resume, since the controller keeps its state
between disable/enable. Change the target address is also allowed.

So, this patch replaces the initialization on module resume with a
simple enable, and removes the (non required anymore) enables and
disables.

Signed-off-by: Fabio Mello <fabio.mello@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---

These pictures explain a little more the consequence of letting the
enable+disable in the code:

	http://pub.politreco.com/paste/TEK0011-before.jpg
	http://pub.politreco.com/paste/TEK0007-after.jpg

The yellow line is a GPIO toggle in userspace to mark when we start and finish
the i2c transactions.  The blue line is the SCL in that i2c bus. Take a look on
the huge pauses we have between any 2 transactions.  These pauses are removed
with this patch and we are able to read our sensor's values in 950usec rather
than 5.24msec we had before.  We are testing this using a Minnowboard Max that
has a designware i2c controller.

There's this comment in the code suggesting the designware controller might
have problems if we don't disable it after each transfer.  We left a stress
code running for 3 days to check if anything bad would happen.  This is the
test code using a PCA9685 (just because it was the easiest device we had
available to check read+write commands):

	http://pub.politreco.com/paste/test-i2c.c

 drivers/i2c/busses/i2c-designware-core.c    | 19 ++++---------------
 drivers/i2c/busses/i2c-designware-platdrv.c |  2 +-
 2 files changed, 5 insertions(+), 16 deletions(-)

Comments

Mika Westerberg June 9, 2015, 8:51 a.m. UTC | #1
On Mon, Jun 08, 2015 at 02:50:28PM -0300, lucas.de.marchi@gmail.com wrote:
> From: Fabio Mello <fabio.mello@intel.com>
> 
> According to documentation and tests, initialization is not
> necessary on module resume, since the controller keeps its state
> between disable/enable. Change the target address is also allowed.
> 
> So, this patch replaces the initialization on module resume with a
> simple enable, and removes the (non required anymore) enables and
> disables.
> 
> Signed-off-by: Fabio Mello <fabio.mello@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
> 
> These pictures explain a little more the consequence of letting the
> enable+disable in the code:
> 
> 	http://pub.politreco.com/paste/TEK0011-before.jpg
> 	http://pub.politreco.com/paste/TEK0007-after.jpg
> 
> The yellow line is a GPIO toggle in userspace to mark when we start and finish
> the i2c transactions.  The blue line is the SCL in that i2c bus. Take a look on
> the huge pauses we have between any 2 transactions.  These pauses are removed
> with this patch and we are able to read our sensor's values in 950usec rather
> than 5.24msec we had before.  We are testing this using a Minnowboard Max that
> has a designware i2c controller.

Did you test this on any other platform than Intel Baytrail?

I'm adding Christian Ruppert (keeping the context) if he has any
comments. IIRC he added the per transfer enable/disable some time ago.

> 
> There's this comment in the code suggesting the designware controller might
> have problems if we don't disable it after each transfer.  We left a stress
> code running for 3 days to check if anything bad would happen.  This is the
> test code using a PCA9685 (just because it was the easiest device we had
> available to check read+write commands):
> 
> 	http://pub.politreco.com/paste/test-i2c.c
> 
>  drivers/i2c/busses/i2c-designware-core.c    | 19 ++++---------------
>  drivers/i2c/busses/i2c-designware-platdrv.c |  2 +-
>  2 files changed, 5 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
> index 6e25c01..b386c10 100644
> --- a/drivers/i2c/busses/i2c-designware-core.c
> +++ b/drivers/i2c/busses/i2c-designware-core.c
> @@ -375,8 +375,12 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
>  	/* configure the i2c master */
>  	dw_writel(dev, dev->master_cfg , DW_IC_CON);
>  
> +	/* Enable the adapter */
> +	__i2c_dw_enable(dev, true);
> +
>  	if (dev->release_lock)
>  		dev->release_lock(dev);
> +
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(i2c_dw_init);
> @@ -405,9 +409,6 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
>  	struct i2c_msg *msgs = dev->msgs;
>  	u32 ic_con, ic_tar = 0;
>  
> -	/* Disable the adapter */
> -	__i2c_dw_enable(dev, false);
> -
>  	/* if the slave address is ten bit address, enable 10BITADDR */
>  	ic_con = dw_readl(dev, DW_IC_CON);
>  	if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) {
> @@ -434,9 +435,6 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
>  	/* enforce disabled interrupts (due to HW issues) */
>  	i2c_dw_disable_int(dev);
>  
> -	/* Enable the adapter */
> -	__i2c_dw_enable(dev, true);
> -
>  	/* Clear and enable interrupts */
>  	i2c_dw_clear_int(dev);
>  	dw_writel(dev, DW_IC_INTR_DEFAULT_MASK, DW_IC_INTR_MASK);
> @@ -665,15 +663,6 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
>  		goto done;
>  	}
>  
> -	/*
> -	 * We must disable the adapter before unlocking the &dev->lock mutex
> -	 * below. Otherwise the hardware might continue generating interrupts
> -	 * which in turn causes a race condition with the following transfer.
> -	 * Needs some more investigation if the additional interrupts are
> -	 * a hardware bug or this driver doesn't handle them correctly yet.
> -	 */
> -	__i2c_dw_enable(dev, false);
> -
>  	if (dev->msg_err) {
>  		ret = dev->msg_err;
>  		goto done;
> diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
> index c270f5f..0598695 100644
> --- a/drivers/i2c/busses/i2c-designware-platdrv.c
> +++ b/drivers/i2c/busses/i2c-designware-platdrv.c
> @@ -320,7 +320,7 @@ static int dw_i2c_resume(struct device *dev)
>  	clk_prepare_enable(i_dev->clk);
>  
>  	if (!i_dev->pm_runtime_disabled)
> -		i2c_dw_init(i_dev);
> +		i2c_dw_enable(i_dev);
>  
>  	return 0;
>  }
> -- 
> 2.4.2
--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas De Marchi June 9, 2015, 6:29 p.m. UTC | #2
Hi Mika,

On Tue, Jun 9, 2015 at 5:51 AM, Mika Westerberg
<mika.westerberg@linux.intel.com> wrote:
> On Mon, Jun 08, 2015 at 02:50:28PM -0300, lucas.de.marchi@gmail.com wrote:
>> From: Fabio Mello <fabio.mello@intel.com>
>>
>> According to documentation and tests, initialization is not
>> necessary on module resume, since the controller keeps its state
>> between disable/enable. Change the target address is also allowed.
>>
>> So, this patch replaces the initialization on module resume with a
>> simple enable, and removes the (non required anymore) enables and
>> disables.
>>
>> Signed-off-by: Fabio Mello <fabio.mello@intel.com>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>> ---
>>
>> These pictures explain a little more the consequence of letting the
>> enable+disable in the code:
>>
>>       http://pub.politreco.com/paste/TEK0011-before.jpg
>>       http://pub.politreco.com/paste/TEK0007-after.jpg
>>
>> The yellow line is a GPIO toggle in userspace to mark when we start and finish
>> the i2c transactions.  The blue line is the SCL in that i2c bus. Take a look on
>> the huge pauses we have between any 2 transactions.  These pauses are removed
>> with this patch and we are able to read our sensor's values in 950usec rather
>> than 5.24msec we had before.  We are testing this using a Minnowboard Max that
>> has a designware i2c controller.
>
> Did you test this on any other platform than Intel Baytrail?

No. The only soc we have here with this controller is the Baytrail.
Mika Westerberg June 10, 2015, 7:07 a.m. UTC | #3
On Tue, Jun 09, 2015 at 03:29:01PM -0300, Lucas De Marchi wrote:
> Hi Mika,
> 
> On Tue, Jun 9, 2015 at 5:51 AM, Mika Westerberg
> <mika.westerberg@linux.intel.com> wrote:
> > On Mon, Jun 08, 2015 at 02:50:28PM -0300, lucas.de.marchi@gmail.com wrote:
> >> From: Fabio Mello <fabio.mello@intel.com>
> >>
> >> According to documentation and tests, initialization is not
> >> necessary on module resume, since the controller keeps its state
> >> between disable/enable. Change the target address is also allowed.
> >>
> >> So, this patch replaces the initialization on module resume with a
> >> simple enable, and removes the (non required anymore) enables and
> >> disables.
> >>
> >> Signed-off-by: Fabio Mello <fabio.mello@intel.com>
> >> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> >> ---
> >>
> >> These pictures explain a little more the consequence of letting the
> >> enable+disable in the code:
> >>
> >>       http://pub.politreco.com/paste/TEK0011-before.jpg
> >>       http://pub.politreco.com/paste/TEK0007-after.jpg
> >>
> >> The yellow line is a GPIO toggle in userspace to mark when we start and finish
> >> the i2c transactions.  The blue line is the SCL in that i2c bus. Take a look on
> >> the huge pauses we have between any 2 transactions.  These pauses are removed
> >> with this patch and we are able to read our sensor's values in 950usec rather
> >> than 5.24msec we had before.  We are testing this using a Minnowboard Max that
> >> has a designware i2c controller.
> >
> > Did you test this on any other platform than Intel Baytrail?
> 
> No. The only soc we have here with this controller is the Baytrail.

My concern is that this patch might break some non-Intel platform. It
would be nice if someone (Christian?) could try this out.
--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mika Westerberg June 10, 2015, 7:55 a.m. UTC | #4
On Mon, Jun 08, 2015 at 02:50:28PM -0300, lucas.de.marchi@gmail.com wrote:
> @@ -320,7 +320,7 @@ static int dw_i2c_resume(struct device *dev)
>  	clk_prepare_enable(i_dev->clk);
>  
>  	if (!i_dev->pm_runtime_disabled)
> -		i2c_dw_init(i_dev);
> +		i2c_dw_enable(i_dev);

This will not work if the device power gets removed (for example being
put to D3cold) as it looses context.
--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mika Westerberg June 11, 2015, 9:38 a.m. UTC | #5
On Wed, Jun 10, 2015 at 05:05:16PM +0200, christian.ruppert@alitech.com wrote:
>    We should understand why the controller was disabled after successful
>    transfers in the first place, however. Maybe some quirk with older
>    versions of the hardware? Mika, do you have any memories about this?

I think it was in the original driver even before I did Intel specific
changes to that. I have no idea why it is done. Probably, like you said,
because some older version of the hardware needed it.
--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas De Marchi June 11, 2015, 2:48 p.m. UTC | #6
Hi,

On 06/10, christian.ruppert@alitech.com wrote:
> Mika Westerberg <mika.westerberg@linux.intel.com> wrote on 10.06.2015 
> 09:07:22:
> > My concern is that this patch might break some non-Intel platform. It
> > would be nice if someone (Christian?) could try this out.
> 
> Ouch, this one brings back painful memories. Take a look at patch 
> 38d7fadef4973bb94e36897fcb6bb6a12fdd10c9 (
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=38d7fadef4973bb94e36897fcb6bb6a12fdd10c9
> ) for the context.
> 
> In brief:
> - Before patch 38d7fade, the driver disabled the hardware after successful 
> transfers. I do not know the reason for this and I cannot judge whether 
> this is necessary or not. I thus decided not to modify this behaviour in 
> patch 38d7fade.
> 
> - After patch 38d7fade, the driver disabled the dw controller after all 
> transfers, in particular in the case of unsuccessful transfers. This 
> modification was necessary because of a race condition triggered by an i2c 
> slave device which interrupted transfers in the middle. In this case, the 
> dw controller (at least our version) seems to send spurious interrupts 
> later if it is not disabled. The interrupt handler is not designed to be 
> called on already aborted transfers, however, which leads to undesirable 
> behaviour if the interrupt occurs at the wrong moment (system hangs in irq 
> loop).

Yeah. So before we had:

 - sucessful transfer
   -> disable the adapter after complete
 - timeout transfer
   -> disable the adapter after complete

And your patch added the disable in case there wasn't a timeout but
dev->msg_err was set.


> I will try to dig out the test setup we used to validate the patch at the 
> time but given the fact that this was two years ago this might take a 
> little while. In the meantime, do you have any means to stress test the 
> case of unexpected events on the bus (client aborts transfer, timeout 
> etc.)? An alternative might be to only disable the controller in the case 
> of errors and leave it active after successful transfers. We should 
> understand why the controller was disabled after successful transfers in 

It seems pretty rad to disable the controller after each transfer.  It
may be due to previous revisions of the hw but may well be because of
"it was the simplest way to do it" at the time.  Disabling the
controller after each transfer even if successful. For unsuccessful
transfers we may want to disable it if there's a hw bug, but it would be
good to check this indeed.


> the first place, however. Maybe some quirk with older versions of the 
> hardware? Mika, do you have any memories about this?

I'm not sure if it's a hw bug or if it's just an oversight from previous
versions of this driver.  So adding a quirk with correct versions will
be difficult IMO.  We've been only testing this with well behaved
slaves.  Do you have any idea how to test the bad guys?  Maybe
connecting a slave with a long wire to introduce errors?
Lucas De Marchi June 12, 2015, 10:45 p.m. UTC | #7
Hi Mika,

On Wed, Jun 10, 2015 at 4:55 AM, Mika Westerberg
<mika.westerberg@linux.intel.com> wrote:
> On Mon, Jun 08, 2015 at 02:50:28PM -0300, lucas.de.marchi@gmail.com wrote:
>> @@ -320,7 +320,7 @@ static int dw_i2c_resume(struct device *dev)
>>       clk_prepare_enable(i_dev->clk);
>>
>>       if (!i_dev->pm_runtime_disabled)
>> -             i2c_dw_init(i_dev);
>> +             i2c_dw_enable(i_dev);
>
> This will not work if the device power gets removed (for example being
> put to D3cold) as it looses context.

Do you mean we should keep the i2c_dw_init() here?
Mika Westerberg June 15, 2015, 9:29 a.m. UTC | #8
On Fri, Jun 12, 2015 at 07:45:00PM -0300, Lucas De Marchi wrote:
> Hi Mika,
> 
> On Wed, Jun 10, 2015 at 4:55 AM, Mika Westerberg
> <mika.westerberg@linux.intel.com> wrote:
> > On Mon, Jun 08, 2015 at 02:50:28PM -0300, lucas.de.marchi@gmail.com wrote:
> >> @@ -320,7 +320,7 @@ static int dw_i2c_resume(struct device *dev)
> >>       clk_prepare_enable(i_dev->clk);
> >>
> >>       if (!i_dev->pm_runtime_disabled)
> >> -             i2c_dw_init(i_dev);
> >> +             i2c_dw_enable(i_dev);
> >
> > This will not work if the device power gets removed (for example being
> > put to D3cold) as it looses context.
> 
> Do you mean we should keep the i2c_dw_init() here?

Yes, that's safer if the controller power gets removed.
--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christian Ruppert June 23, 2015, 4:45 p.m. UTC | #9
Hello,

Christian Ruppert/ALi_GVA/ALi wrote on 10.06.2015 17:05:16:
> Mika Westerberg <mika.westerberg@linux.intel.com> wrote on 10.06.
> 2015 09:07:22:
> > On Tue, Jun 09, 2015 at 03:29:01PM -0300, Lucas De Marchi wrote:
> > > Hi Mika,
> > > 
> > > On Tue, Jun 9, 2015 at 5:51 AM, Mika Westerberg
> > > <mika.westerberg@linux.intel.com> wrote:
> > > > On Mon, Jun 08, 2015 at 02:50:28PM -0300, 
> lucas.de.marchi@gmail.com wrote:
> > > >> From: Fabio Mello <fabio.mello@intel.com>
> > > >>
> > > >> According to documentation and tests, initialization is not
> > > >> necessary on module resume, since the controller keeps its state
> > > >> between disable/enable. Change the target address is also 
allowed.
> > > >>
> > > >> So, this patch replaces the initialization on module resume with 
a
> > > >> simple enable, and removes the (non required anymore) enables and
> > > >> disables.
> > > >>
> > > >> Signed-off-by: Fabio Mello <fabio.mello@intel.com>
> > > >> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> > > >> ---
> > > >>
> > > >> These pictures explain a little more the consequence of letting 
the
> > > >> enable+disable in the code:
> > > >>
> > > >>       http://pub.politreco.com/paste/TEK0011-before.jpg
> > > >>       http://pub.politreco.com/paste/TEK0007-after.jpg
> > > >>
> > > >> The yellow line is a GPIO toggle in userspace to mark when we 
> > start and finish
> > > >> the i2c transactions.  The blue line is the SCL in that i2c 
> > bus. Take a look on
> > > >> the huge pauses we have between any 2 transactions.  These 
> > pauses are removed
> > > >> with this patch and we are able to read our sensor's values in 
> > 950usec rather
> > > >> than 5.24msec we had before.  We are testing this using a 
> > Minnowboard Max that
> > > >> has a designware i2c controller.
> > > >
> > > > Did you test this on any other platform than Intel Baytrail?
> > > 
> > > No. The only soc we have here with this controller is the Baytrail.
> > 
> > My concern is that this patch might break some non-Intel platform. It
> > would be nice if someone (Christian?) could try this out.
> 
> Ouch, this one brings back painful memories. Take a look at patch 
> 38d7fadef4973bb94e36897fcb6bb6a12fdd10c9 (https://git.kernel.org/
> cgit/linux/kernel/git/torvalds/linux.git/commit/?
> id=38d7fadef4973bb94e36897fcb6bb6a12fdd10c9) for the context.
> 
> In brief:
> - Before patch 38d7fade, the driver disabled the hardware after 
> successful transfers. I do not know the reason for this and I cannot
> judge whether this is necessary or not. I thus decided not to modify
> this behaviour in patch 38d7fade.
> 
> - After patch 38d7fade, the driver disabled the dw controller after 
> all transfers, in particular in the case of unsuccessful transfers. 
> This modification was necessary because of a race condition 
> triggered by an i2c slave device which interrupted transfers in the 
> middle. In this case, the dw controller (at least our version) seems
> to send spurious interrupts later if it is not disabled. The 
> interrupt handler is not designed to be called on already aborted 
> transfers, however, which leads to undesirable behaviour if the 
> interrupt occurs at the wrong moment (system hangs in irq loop).
> 
> I will try to dig out the test setup we used to validate the patch 
> at the time but given the fact that this was two years ago this 
> might take a little while. In the meantime, do you have any means to
> stress test the case of unexpected events on the bus (client aborts 
> transfer, timeout etc.)? An alternative might be to only disable the
> controller in the case of errors and leave it active after 
> successful transfers. We should understand why the controller was 
> disabled after successful transfers in the first place, however. 
> Maybe some quirk with older versions of the hardware? Mika, do you 
> have any memories about this?

As promised I tried to dig out the test setup we used to validate patch 
38d7fade at the time but without success. (I half expected that after such 
a long time...)

So I said to myself, let's give the patch a try nevertheless and see if it 
works in our system at least in the nominal case (no i2c bus errors).

The result is not very encouraging: Out of five (identical) designware i2c 
controllers we have on my test SOC, the first one initialises properly but 
the second one gets stuck in the famous irq loop right away when the 
module is enabled in i2c_dw_init. The system never gets around to try 
initialising the remaining three controllers due to the irq loop. I didn't 
have the time to investigate the details yet but I suspect this is 
triggered by some nastily behaved device on the bus. For example, some of 
our external devices are notorious for keeping i2c  lines tied to zero 
before being properly powered on/reset/initialised by their respective 
drivers (which in turn depend on the i2c master to be initialised first, 
obviously). I'll grab an oscilloscope and dump the waves to confirm this 
suspicion on the occasion.
However, similar situations might occur in multi-master setups (which we 
don't have). I suspect the driver/hardware to also end up in the irq loop 
after loosing arbitration with this patch. Has anybody the means to test 
this in a multi-master system?

Greetings,
  Christian


--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas De Marchi June 23, 2015, 5:02 p.m. UTC | #10
On Tue, Jun 23, 2015 at 1:45 PM,  <christian.ruppert@alitech.com> wrote:
> Hello,
>
> Christian Ruppert/ALi_GVA/ALi wrote on 10.06.2015 17:05:16:
>> Mika Westerberg <mika.westerberg@linux.intel.com> wrote on 10.06.
>> 2015 09:07:22:
>> > On Tue, Jun 09, 2015 at 03:29:01PM -0300, Lucas De Marchi wrote:
>> > > Hi Mika,
>> > >
>> > > On Tue, Jun 9, 2015 at 5:51 AM, Mika Westerberg
>> > > <mika.westerberg@linux.intel.com> wrote:
>> > > > On Mon, Jun 08, 2015 at 02:50:28PM -0300,
>> lucas.de.marchi@gmail.com wrote:
>> > > >> From: Fabio Mello <fabio.mello@intel.com>
>> > > >>
>> > > >> According to documentation and tests, initialization is not
>> > > >> necessary on module resume, since the controller keeps its state
>> > > >> between disable/enable. Change the target address is also
> allowed.
>> > > >>
>> > > >> So, this patch replaces the initialization on module resume with
> a
>> > > >> simple enable, and removes the (non required anymore) enables and
>> > > >> disables.
>> > > >>
>> > > >> Signed-off-by: Fabio Mello <fabio.mello@intel.com>
>> > > >> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>> > > >> ---
>> > > >>
>> > > >> These pictures explain a little more the consequence of letting
> the
>> > > >> enable+disable in the code:
>> > > >>
>> > > >>       http://pub.politreco.com/paste/TEK0011-before.jpg
>> > > >>       http://pub.politreco.com/paste/TEK0007-after.jpg
>> > > >>
>> > > >> The yellow line is a GPIO toggle in userspace to mark when we
>> > start and finish
>> > > >> the i2c transactions.  The blue line is the SCL in that i2c
>> > bus. Take a look on
>> > > >> the huge pauses we have between any 2 transactions.  These
>> > pauses are removed
>> > > >> with this patch and we are able to read our sensor's values in
>> > 950usec rather
>> > > >> than 5.24msec we had before.  We are testing this using a
>> > Minnowboard Max that
>> > > >> has a designware i2c controller.
>> > > >
>> > > > Did you test this on any other platform than Intel Baytrail?
>> > >
>> > > No. The only soc we have here with this controller is the Baytrail.
>> >
>> > My concern is that this patch might break some non-Intel platform. It
>> > would be nice if someone (Christian?) could try this out.
>>
>> Ouch, this one brings back painful memories. Take a look at patch
>> 38d7fadef4973bb94e36897fcb6bb6a12fdd10c9 (https://git.kernel.org/
>> cgit/linux/kernel/git/torvalds/linux.git/commit/?
>> id=38d7fadef4973bb94e36897fcb6bb6a12fdd10c9) for the context.
>>
>> In brief:
>> - Before patch 38d7fade, the driver disabled the hardware after
>> successful transfers. I do not know the reason for this and I cannot
>> judge whether this is necessary or not. I thus decided not to modify
>> this behaviour in patch 38d7fade.
>>
>> - After patch 38d7fade, the driver disabled the dw controller after
>> all transfers, in particular in the case of unsuccessful transfers.
>> This modification was necessary because of a race condition
>> triggered by an i2c slave device which interrupted transfers in the
>> middle. In this case, the dw controller (at least our version) seems
>> to send spurious interrupts later if it is not disabled. The
>> interrupt handler is not designed to be called on already aborted
>> transfers, however, which leads to undesirable behaviour if the
>> interrupt occurs at the wrong moment (system hangs in irq loop).
>>
>> I will try to dig out the test setup we used to validate the patch
>> at the time but given the fact that this was two years ago this
>> might take a little while. In the meantime, do you have any means to
>> stress test the case of unexpected events on the bus (client aborts
>> transfer, timeout etc.)? An alternative might be to only disable the
>> controller in the case of errors and leave it active after
>> successful transfers. We should understand why the controller was
>> disabled after successful transfers in the first place, however.
>> Maybe some quirk with older versions of the hardware? Mika, do you
>> have any memories about this?
>
> As promised I tried to dig out the test setup we used to validate patch
> 38d7fade at the time but without success. (I half expected that after such
> a long time...)
>
> So I said to myself, let's give the patch a try nevertheless and see if it
> works in our system at least in the nominal case (no i2c bus errors).
>
> The result is not very encouraging: Out of five (identical) designware i2c
> controllers we have on my test SOC, the first one initialises properly but
> the second one gets stuck in the famous irq loop right away when the
> module is enabled in i2c_dw_init. The system never gets around to try

Are you using the pci or platform driver?  I noticed yesterday the pci
version is failing here with a NULL pointer dereference.

> initialising the remaining three controllers due to the irq loop. I didn't
> have the time to investigate the details yet but I suspect this is
> triggered by some nastily behaved device on the bus. For example, some of
> our external devices are notorious for keeping i2c  lines tied to zero
> before being properly powered on/reset/initialised by their respective
> drivers (which in turn depend on the i2c master to be initialised first,
> obviously). I'll grab an oscilloscope and dump the waves to confirm this
> suspicion on the occasion.

Yeah, it'd be great to have it.

thanks for testing it with your setup.
Christian Ruppert June 24, 2015, 7:36 a.m. UTC | #11
Dear Lucas,

Lucas De Marchi <lucas.de.marchi@gmail.com> wrote on 23.06.2015 19:02:03:
> On Tue, Jun 23, 2015 at 1:45 PM,  <christian.ruppert@alitech.com> wrote:
> > Hello,
> >
> > Christian Ruppert/ALi_GVA/ALi wrote on 10.06.2015 17:05:16:
> [...]
> > The result is not very encouraging: Out of five (identical) designware 
i2c
> > controllers we have on my test SOC, the first one initialises properly 
but
> > the second one gets stuck in the famous irq loop right away when the
> > module is enabled in i2c_dw_init. The system never gets around to try
> 
> Are you using the pci or platform driver?  I noticed yesterday the pci
> version is failing here with a NULL pointer dereference.

The test was performed with the platform driver (instantiated through 
device tree).
I just re-checked and the ultimate problem which hangs/kills the system in 
my case is the IRQ loop.
I haven't observed any NULL pointer dereferences on the road.

Greetings,
  Christian

--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mika Westerberg June 24, 2015, 11:27 a.m. UTC | #12
On Wed, Jun 24, 2015 at 09:36:43AM +0200, christian.ruppert@alitech.com wrote:
> Dear Lucas,
> 
> Lucas De Marchi <lucas.de.marchi@gmail.com> wrote on 23.06.2015 19:02:03:
> > On Tue, Jun 23, 2015 at 1:45 PM,  <christian.ruppert@alitech.com> wrote:
> > > Hello,
> > >
> > > Christian Ruppert/ALi_GVA/ALi wrote on 10.06.2015 17:05:16:
> > [...]
> > > The result is not very encouraging: Out of five (identical) designware 
> i2c
> > > controllers we have on my test SOC, the first one initialises properly 
> but
> > > the second one gets stuck in the famous irq loop right away when the
> > > module is enabled in i2c_dw_init. The system never gets around to try
> > 
> > Are you using the pci or platform driver?  I noticed yesterday the pci
> > version is failing here with a NULL pointer dereference.
> 
> The test was performed with the platform driver (instantiated through 
> device tree).
> I just re-checked and the ultimate problem which hangs/kills the system in 
> my case is the IRQ loop.
> I haven't observed any NULL pointer dereferences on the road.

Thanks Christian for testing.

Since the patch causes problems on your hardware, I don't think it is
good idea to merge it.
--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas De Marchi June 24, 2015, 12:56 p.m. UTC | #13
T24gV2VkLCAyMDE1LTA2LTI0IGF0IDE0OjI3ICswMzAwLCBNaWthIFdlc3RlcmJlcmcgd3JvdGU6
DQo+IE9uIFdlZCwgSnVuIDI0LCAyMDE1IGF0IDA5OjM2OjQzQU0gKzAyMDAsIGNocmlzdGlhbi5y
dXBwZXJ0QGFsaXRlY2guY29tIHdyb3QNCj4gZToNCj4gPiBEZWFyIEx1Y2FzLA0KPiA+IA0KPiA+
IEx1Y2FzIERlIE1hcmNoaSA8bHVjYXMuZGUubWFyY2hpQGdtYWlsLmNvbT4gd3JvdGUgb24gMjMu
MDYuMjAxNSAxOTowMjowMzoNCj4gPiA+IE9uIFR1ZSwgSnVuIDIzLCAyMDE1IGF0IDE6NDUgUE0s
ICA8Y2hyaXN0aWFuLnJ1cHBlcnRAYWxpdGVjaC5jb20+IHdyb3RlOg0KPiA+ID4gPiBIZWxsbywN
Cj4gPiA+ID4gDQo+ID4gPiA+IENocmlzdGlhbiBSdXBwZXJ0L0FMaV9HVkEvQUxpIHdyb3RlIG9u
IDEwLjA2LjIwMTUgMTc6MDU6MTY6DQo+ID4gPiBbLi4uXQ0KPiA+ID4gPiBUaGUgcmVzdWx0IGlz
IG5vdCB2ZXJ5IGVuY291cmFnaW5nOiBPdXQgb2YgZml2ZSAoaWRlbnRpY2FsKSBkZXNpZ253YXJl
IA0KPiA+ID4gPiANCj4gPiBpMmMNCj4gPiA+ID4gY29udHJvbGxlcnMgd2UgaGF2ZSBvbiBteSB0
ZXN0IFNPQywgdGhlIGZpcnN0IG9uZSBpbml0aWFsaXNlcyBwcm9wZXJseSANCj4gPiA+ID4gDQo+
ID4gYnV0DQo+ID4gPiA+IHRoZSBzZWNvbmQgb25lIGdldHMgc3R1Y2sgaW4gdGhlIGZhbW91cyBp
cnEgbG9vcCByaWdodCBhd2F5IHdoZW4gdGhlDQo+ID4gPiA+IG1vZHVsZSBpcyBlbmFibGVkIGlu
IGkyY19kd19pbml0LiBUaGUgc3lzdGVtIG5ldmVyIGdldHMgYXJvdW5kIHRvIHRyeQ0KPiA+ID4g
DQo+ID4gPiBBcmUgeW91IHVzaW5nIHRoZSBwY2kgb3IgcGxhdGZvcm0gZHJpdmVyPyAgSSBub3Rp
Y2VkIHllc3RlcmRheSB0aGUgcGNpDQo+ID4gPiB2ZXJzaW9uIGlzIGZhaWxpbmcgaGVyZSB3aXRo
IGEgTlVMTCBwb2ludGVyIGRlcmVmZXJlbmNlLg0KPiA+IA0KPiA+IFRoZSB0ZXN0IHdhcyBwZXJm
b3JtZWQgd2l0aCB0aGUgcGxhdGZvcm0gZHJpdmVyIChpbnN0YW50aWF0ZWQgdGhyb3VnaCANCj4g
PiBkZXZpY2UgdHJlZSkuDQo+ID4gSSBqdXN0IHJlLWNoZWNrZWQgYW5kIHRoZSB1bHRpbWF0ZSBw
cm9ibGVtIHdoaWNoIGhhbmdzL2tpbGxzIHRoZSBzeXN0ZW0gaW4gDQo+ID4gDQo+ID4gbXkgY2Fz
ZSBpcyB0aGUgSVJRIGxvb3AuDQo+ID4gSSBoYXZlbid0IG9ic2VydmVkIGFueSBOVUxMIHBvaW50
ZXIgZGVyZWZlcmVuY2VzIG9uIHRoZSByb2FkLg0KPiANCj4gVGhhbmtzIENocmlzdGlhbiBmb3Ig
dGVzdGluZy4NCj4gDQo+IFNpbmNlIHRoZSBwYXRjaCBjYXVzZXMgcHJvYmxlbXMgb24geW91ciBo
YXJkd2FyZSwgSSBkb24ndCB0aGluayBpdCBpcw0KPiBnb29kIGlkZWEgdG8gbWVyZ2UgaXQuDQoN
Cg0KWWVhaCwgYnV0IGl0IHdvdWxkIGJlIGJhZCB0byBpZ25vcmUgdGhlIHByb2JsZW0gYXMgd2Vs
bC4gVGhlIHdheSBpdCBpcyBub3cNCmtpbGxzIGFueSBwb3NzaWJpbGl0eSBvZiB1c2luZyBEVyBj
b250cm9sbGVyIGZvciByZWFkaW5nIHNlbnNvcnMgbGlrZQ0KZ3lyb3Njb3BlLCBhY2NlbGVyb21l
dGVyLCBiYXJvbWV0ZXIgdGhhdCBoYXZlIGhpZ2hlciBzYW1wbGluZyByYXRlIGV0Yy4gIEknbGwN
CnRyeSB0byBjb21lIHVwIHdpdGggYSBuZXcgcGF0Y2ggYnV0IHNpbmNlIEkgY2FuJ3QgcmVwcm9k
dWNlIHRoZSBwcm9ibGVtIGhlcmUNCml0J2QgYmUgZ29vZCB0byBrbm93IGlmIHRoZXJlJ3MgYW55
IG1lYW5zIGZvciBtZSB0byB0ZXN0LiAgV2hhdCBkbyB5b3UgdGhpbmsNCnRoYXQgY291bGQgYmUg
ZG9uZT8gTWF5YmUgcHV0dGluZyB0aGUgY29udHJvbGxlciB0byBzbGVlcCBvbmx5IGluIGNhc2Ug
b2YNCmVycm9ycz8NCg0KdGhhbmtzDQoNCi0tIA0KTHVjYXMgRGUgTWFyY2hp
--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mika Westerberg June 24, 2015, 1:18 p.m. UTC | #14
On Wed, Jun 24, 2015 at 12:56:19PM +0000, De Marchi, Lucas wrote:
> Yeah, but it would be bad to ignore the problem as well. The way it is now
> kills any possibility of using DW controller for reading sensors like
> gyroscope, accelerometer, barometer that have higher sampling rate etc.  I'll
> try to come up with a new patch but since I can't reproduce the problem here
> it'd be good to know if there's any means for me to test.  What do you think
> that could be done? Maybe putting the controller to sleep only in case of
> errors?

Instead of disabling the adapter after each transfer, I wonder if it is
enough if we just mask all interrupts? That should also prevent the
interrupt loop Christian is seeing on his hardware.
--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christian Ruppert June 24, 2015, 2:06 p.m. UTC | #15
Dear Lucas,

"De Marchi, Lucas" <lucas.demarchi@intel.com> wrote on 24.06.2015 
14:56:19:
> On Wed, 2015-06-24 at 14:27 +0300, Mika Westerberg wrote:
> > On Wed, Jun 24, 2015 at 09:36:43AM +0200, 
christian.ruppert@alitech.com wrot
> > e:
> > > Dear Lucas,
> > > 
> > > Lucas De Marchi <lucas.de.marchi@gmail.com> wrote on 23.06.2015 
19:02:03:
> > > > On Tue, Jun 23, 2015 at 1:45 PM,  <christian.ruppert@alitech.com> 
wrote:
> > > > > Hello,
> > > > > 
> > > > > Christian Ruppert/ALi_GVA/ALi wrote on 10.06.2015 17:05:16:
> > > > [...]
> > > > > The result is not very encouraging: Out of five (identical) 
> designware 
> > > > > 
> > > i2c
> > > > > controllers we have on my test SOC, the first one 
> initialises properly 
> > > > > 
> > > but
> > > > > the second one gets stuck in the famous irq loop right away when 
the
> > > > > module is enabled in i2c_dw_init. The system never gets around 
to try
> > > > 
> > > > Are you using the pci or platform driver?  I noticed yesterday the 
pci
> > > > version is failing here with a NULL pointer dereference.
> > > 
> > > The test was performed with the platform driver (instantiated 
through 
> > > device tree).
> > > I just re-checked and the ultimate problem which hangs/kills 
thesystem in 
> > > 
> > > my case is the IRQ loop.
> > > I haven't observed any NULL pointer dereferences on the road.
> > 
> > Thanks Christian for testing.
> > 
> > Since the patch causes problems on your hardware, I don't think it is
> > good idea to merge it.
> 
> 
> Yeah, but it would be bad to ignore the problem as well. The way it is 
now
> kills any possibility of using DW controller for reading sensors like
> gyroscope, accelerometer, barometer that have higher sampling rate etc. 
I'll
> try to come up with a new patch but since I can't reproduce the problem 
here
> it'd be good to know if there's any means for me to test.

I'll analyse the problem a bit further and send you a description (sda and 
scl state at enable time) as soon as I can put my hands on an oscilloscope 
one evening.

Greetings,
  Christian

--
To unsubscribe from this list: send the line "unsubscribe linux-i2c" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index 6e25c01..b386c10 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -375,8 +375,12 @@  int i2c_dw_init(struct dw_i2c_dev *dev)
 	/* configure the i2c master */
 	dw_writel(dev, dev->master_cfg , DW_IC_CON);
 
+	/* Enable the adapter */
+	__i2c_dw_enable(dev, true);
+
 	if (dev->release_lock)
 		dev->release_lock(dev);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(i2c_dw_init);
@@ -405,9 +409,6 @@  static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 	struct i2c_msg *msgs = dev->msgs;
 	u32 ic_con, ic_tar = 0;
 
-	/* Disable the adapter */
-	__i2c_dw_enable(dev, false);
-
 	/* if the slave address is ten bit address, enable 10BITADDR */
 	ic_con = dw_readl(dev, DW_IC_CON);
 	if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) {
@@ -434,9 +435,6 @@  static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 	/* enforce disabled interrupts (due to HW issues) */
 	i2c_dw_disable_int(dev);
 
-	/* Enable the adapter */
-	__i2c_dw_enable(dev, true);
-
 	/* Clear and enable interrupts */
 	i2c_dw_clear_int(dev);
 	dw_writel(dev, DW_IC_INTR_DEFAULT_MASK, DW_IC_INTR_MASK);
@@ -665,15 +663,6 @@  i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 		goto done;
 	}
 
-	/*
-	 * We must disable the adapter before unlocking the &dev->lock mutex
-	 * below. Otherwise the hardware might continue generating interrupts
-	 * which in turn causes a race condition with the following transfer.
-	 * Needs some more investigation if the additional interrupts are
-	 * a hardware bug or this driver doesn't handle them correctly yet.
-	 */
-	__i2c_dw_enable(dev, false);
-
 	if (dev->msg_err) {
 		ret = dev->msg_err;
 		goto done;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index c270f5f..0598695 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -320,7 +320,7 @@  static int dw_i2c_resume(struct device *dev)
 	clk_prepare_enable(i_dev->clk);
 
 	if (!i_dev->pm_runtime_disabled)
-		i2c_dw_init(i_dev);
+		i2c_dw_enable(i_dev);
 
 	return 0;
 }