diff mbox series

[2/5] i2c: xiic: Drop broken interrupt handler

Message ID 20200613150751.114595-2-marex@denx.de
State New
Headers show
Series [1/5] i2c: xiic: Fix broken locking on tx_msg | expand

Commit Message

Marek Vasut June 13, 2020, 3:07 p.m. UTC
The interrupt handler is missing locking when reading out registers
and is racing with other threads which might access the driver. Drop
it altogether, so that the threaded interrupt is always executed, as
that one is already serialized by the driver mutex. This also allows
dropping local_irq_save()/local_irq_restore() in xiic_start_recv().

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
Cc: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-xiic.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

Comments

Shubhrajyoti Datta June 17, 2020, 12:25 p.m. UTC | #1
Hi Marek,

On Sat, Jun 13, 2020 at 8:39 PM Marek Vasut <marex@denx.de> wrote:
>
> The interrupt handler is missing locking when reading out registers
> and is racing with other threads which might access the driver. Drop
> it altogether, so that the threaded interrupt is always executed, as
> that one is already serialized by the driver mutex. This also allows
> dropping local_irq_save()/local_irq_restore() in xiic_start_recv().
>
The idea of the local_irq_save / restore was to make it atomic in case
there are a lot
of non i2c interrupts.

so it should still be needed right?
Marek Vasut June 17, 2020, 12:31 p.m. UTC | #2
On 6/17/20 2:25 PM, Shubhrajyoti Datta wrote:
> Hi Marek,

Hi,

> On Sat, Jun 13, 2020 at 8:39 PM Marek Vasut <marex@denx.de> wrote:
>>
>> The interrupt handler is missing locking when reading out registers
>> and is racing with other threads which might access the driver. Drop
>> it altogether, so that the threaded interrupt is always executed, as
>> that one is already serialized by the driver mutex. This also allows
>> dropping local_irq_save()/local_irq_restore() in xiic_start_recv().
>>
> The idea of the local_irq_save / restore was to make it atomic in case
> there are a lot
> of non i2c interrupts.

Make what atomic ? Two consecutive register writes cannot be atomic
unless there is some hardware way to do that. The XIIC has no such way.

> so it should still be needed right?

No, if there is a mutex around both the threaded interrupt handler and
this function, the register accesses will not be interleaved between the
two functions. I think that is what this local_irq_*() was trying to
prevent, right ?
Raviteja Narayanam June 26, 2020, 12:13 p.m. UTC | #3
> -----Original Message-----
> From: linux-i2c-owner@vger.kernel.org <linux-i2c-owner@vger.kernel.org> On
> Behalf Of Marek Vasut
> Sent: Saturday, June 13, 2020 8:38 PM
> To: linux-i2c@vger.kernel.org
> Cc: Marek Vasut <marex@denx.de>; Michal Simek <michals@xilinx.com>;
> Shubhrajyoti Datta <shubhraj@xilinx.com>; Wolfram Sang <wsa@kernel.org>
> Subject: [PATCH 2/5] i2c: xiic: Drop broken interrupt handler
> 
> The interrupt handler is missing locking when reading out registers and is
> racing with other threads which might access the driver. Drop it altogether, so
> that the threaded interrupt is always executed, as that one is already serialized
> by the driver mutex. This also allows dropping
> local_irq_save()/local_irq_restore() in xiic_start_recv().
> 
> Signed-off-by: Marek Vasut <marex@denx.de>
> Cc: Michal Simek <michal.simek@xilinx.com>
> Cc: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
> Cc: Wolfram Sang <wsa@kernel.org>
> ---
>  drivers/i2c/busses/i2c-xiic.c | 25 +------------------------
>  1 file changed, 1 insertion(+), 24 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index
> 0777e577720db..6db71c0fb6583 100644
> --- a/drivers/i2c/busses/i2c-xiic.c
> +++ b/drivers/i2c/busses/i2c-xiic.c
> @@ -543,7 +543,6 @@ static void xiic_start_recv(struct xiic_i2c *i2c)  {
>  	u8 rx_watermark;
>  	struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg;
> -	unsigned long flags;
> 
>  	/* Clear and enable Rx full interrupt. */
>  	xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK |
> XIIC_INTR_TX_ERROR_MASK); @@ -559,7 +558,6 @@ static void
> xiic_start_recv(struct xiic_i2c *i2c)
>  		rx_watermark = IIC_RX_FIFO_DEPTH;
>  	xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1);
> 
> -	local_irq_save(flags);

It is added as part of (i2c: xiic: Make the start and the byte count write atomic - ae7304c3ea28a3ba47a7a8312c76c654ef24967e) commit
to make the below 2 register writes atomic so that the controller doesn't produce a wrong transaction. (If there is a delay between the 2 register
writes, controller is messing up with the transaction). It is not intended for locking between this function and isr. So, this can't be removed. 

>  	if (!(msg->flags & I2C_M_NOSTART))
>  		/* write the address */
>  		xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, @@ -569,7 +567,6
> @@ static void xiic_start_recv(struct xiic_i2c *i2c)
> 
>  	xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET,
>  		msg->len | ((i2c->nmsgs == 1) ? XIIC_TX_DYN_STOP_MASK :
> 0));
> -	local_irq_restore(flags);
> 
>  	if (i2c->nmsgs == 1)
>  		/* very last, enable bus not busy as well */ @@ -609,26 +606,6
> @@ static void xiic_start_send(struct xiic_i2c *i2c)
>  		XIIC_INTR_BNB_MASK);
>  }
> 
> -static irqreturn_t xiic_isr(int irq, void *dev_id) -{
> -	struct xiic_i2c *i2c = dev_id;
> -	u32 pend, isr, ier;
> -	irqreturn_t ret = IRQ_NONE;
> -	/* Do not processes a devices interrupts if the device has no
> -	 * interrupts pending
> -	 */
> -
> -	dev_dbg(i2c->adap.dev.parent, "%s entry\n", __func__);
> -
> -	isr = xiic_getreg32(i2c, XIIC_IISR_OFFSET);
> -	ier = xiic_getreg32(i2c, XIIC_IIER_OFFSET);
> -	pend = isr & ier;
> -	if (pend)
> -		ret = IRQ_WAKE_THREAD;
> -
> -	return ret;
> -}
> -
>  static void __xiic_start_xfer(struct xiic_i2c *i2c)  {
>  	int first = 1;
> @@ -807,7 +784,7 @@ static int xiic_i2c_probe(struct platform_device *pdev)
>  	pm_runtime_use_autosuspend(i2c->dev);
>  	pm_runtime_set_active(i2c->dev);
>  	pm_runtime_enable(i2c->dev);
> -	ret = devm_request_threaded_irq(&pdev->dev, irq, xiic_isr,
> +	ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
>  					xiic_process, IRQF_ONESHOT,
>  					pdev->name, i2c);
> 
Removal of isr is fine.

Raviteja N
Marek Vasut June 28, 2020, 11:41 p.m. UTC | #4
On 6/26/20 2:13 PM, Raviteja Narayanam wrote:

Hi,

[...]

>> diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index
>> 0777e577720db..6db71c0fb6583 100644
>> --- a/drivers/i2c/busses/i2c-xiic.c
>> +++ b/drivers/i2c/busses/i2c-xiic.c
>> @@ -543,7 +543,6 @@ static void xiic_start_recv(struct xiic_i2c *i2c)  {
>>  	u8 rx_watermark;
>>  	struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg;
>> -	unsigned long flags;
>>
>>  	/* Clear and enable Rx full interrupt. */
>>  	xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK |
>> XIIC_INTR_TX_ERROR_MASK); @@ -559,7 +558,6 @@ static void
>> xiic_start_recv(struct xiic_i2c *i2c)
>>  		rx_watermark = IIC_RX_FIFO_DEPTH;
>>  	xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1);
>>
>> -	local_irq_save(flags);
> 
> It is added as part of (i2c: xiic: Make the start and the byte count write atomic - ae7304c3ea28a3ba47a7a8312c76c654ef24967e) commit
> to make the below 2 register writes atomic so that the controller doesn't produce a wrong transaction.

Two consecutive register writes are not atomic, they are posted as two
consecutive writes on the bus and will reach the hardware IP as two
separate writes with arbitrary delay between them.

I think the intention of the patch ae7304c3ea28 ("i2c: xiic: Make the
start and the byte count write atomic") is to make sure that there will
be no read/write register access to the XIIC IP while those registers in
local_irq_save()/local_irq_restore() block are written, and that makes
sense.

However, local_irq_save()/local_irq_restore() is local to one CPU core,
it does not prevent another CPU core from posting register read/write
access to the XIIC IP (for example from xiic_process() threaded
interrupt handler, which might just be running on that other CPU core).

The &i2c->lock mutex is locked by both the xiic_start() and
xiic_process(), and therefore guarantees that no other thread can post
read/write register access to the XIIC IP while xiic_start() (and thus
xiic_recv_start(), which is called from it) is running.

And so, I think this local_irq_save()/local_irq_restore() can be removed.

> (If there is a delay between the 2 register
> writes, controller is messing up with the transaction). It is not intended for locking between this function and isr. So, this can't be removed. 

But the bus can insert arbitrary delay between two consecutive register
writes to the XIIC IP, so if the timing between the two register writes
is really critical, then I don't see how to guarantee the timing
requirement. Do you happen to have more details on what really happens
on the hardware level here , maybe some errata document for the XIIC IP?

Thanks

>>  	if (!(msg->flags & I2C_M_NOSTART))
>>  		/* write the address */
>>  		xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, @@ -569,7 +567,6

[...]
Raviteja Narayanam June 29, 2020, 12:52 p.m. UTC | #5
> -----Original Message-----
> From: Marek Vasut <marex@denx.de>
> Sent: Monday, June 29, 2020 5:11 AM
> To: Raviteja Narayanam <rna@xilinx.com>; linux-i2c@vger.kernel.org
> Cc: Michal Simek <michals@xilinx.com>; Shubhrajyoti Datta
> <shubhraj@xilinx.com>; Wolfram Sang <wsa@kernel.org>
> Subject: Re: [PATCH 2/5] i2c: xiic: Drop broken interrupt handler
> 
> On 6/26/20 2:13 PM, Raviteja Narayanam wrote:
> 
> Hi,
> 
> [...]
> 
> >> diff --git a/drivers/i2c/busses/i2c-xiic.c
> >> b/drivers/i2c/busses/i2c-xiic.c index
> >> 0777e577720db..6db71c0fb6583 100644
> >> --- a/drivers/i2c/busses/i2c-xiic.c
> >> +++ b/drivers/i2c/busses/i2c-xiic.c
> >> @@ -543,7 +543,6 @@ static void xiic_start_recv(struct xiic_i2c *i2c)  {
> >>  	u8 rx_watermark;
> >>  	struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg;
> >> -	unsigned long flags;
> >>
> >>  	/* Clear and enable Rx full interrupt. */
> >>  	xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK |
> >> XIIC_INTR_TX_ERROR_MASK); @@ -559,7 +558,6 @@ static void
> >> xiic_start_recv(struct xiic_i2c *i2c)
> >>  		rx_watermark = IIC_RX_FIFO_DEPTH;
> >>  	xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1);
> >>
> >> -	local_irq_save(flags);
> >
> > It is added as part of (i2c: xiic: Make the start and the byte count
> > write atomic - ae7304c3ea28a3ba47a7a8312c76c654ef24967e) commit to
> make the below 2 register writes atomic so that the controller doesn't produce
> a wrong transaction.
> 
> Two consecutive register writes are not atomic, they are posted as two
> consecutive writes on the bus and will reach the hardware IP as two separate
> writes with arbitrary delay between them.
> 
> I think the intention of the patch ae7304c3ea28 ("i2c: xiic: Make the start and
> the byte count write atomic") is to make sure that there will be no read/write
> register access to the XIIC IP while those registers in
> local_irq_save()/local_irq_restore() block are written, and that makes sense.
> 
> However, local_irq_save()/local_irq_restore() is local to one CPU core, it does
> not prevent another CPU core from posting register read/write access to the
> XIIC IP (for example from xiic_process() threaded interrupt handler, which
> might just be running on that other CPU core).
> 
> The &i2c->lock mutex is locked by both the xiic_start() and xiic_process(), and
> therefore guarantees that no other thread can post read/write register access
> to the XIIC IP while xiic_start() (and thus xiic_recv_start(), which is called from
> it) is running.
> 
> And so, I think this local_irq_save()/local_irq_restore() can be removed.
> 
> > (If there is a delay between the 2 register writes, controller is
> > messing up with the transaction). It is not intended for locking between this
> function and isr. So, this can't be removed.
> 
> But the bus can insert arbitrary delay between two consecutive register writes
> to the XIIC IP, so if the timing between the two register writes is really critical,
> then I don't see how to guarantee the timing requirement. Do you happen to
> have more details on what really happens on the hardware level here , maybe
> some errata document for the XIIC IP?

From the commit description of "i2c: xiic: Make the start and the byte count write atomic"(ae7304c3ea28a3ba47a7a8312c76c654ef24967e),

[Disable interrupts while configuring the transfer and enable them back.

We have below as the programming sequence
1. start and slave address
2. byte count and stop

In some customer platform there was a lot of interrupts between 1 and 2
and after slave address (around 7 clock cyles) if 2 is not executed
then the transaction is nacked.

To fix this case make the 2 writes atomic.]

So, this local_irq_save()/local_irq_restore() is not related to exclusive access in the driver (xiic_process vs xiic_start),
but to supply the byte count to controller before it completes transmitting START and slave address.

> 
> Thanks
> 
> >>  	if (!(msg->flags & I2C_M_NOSTART))
> >>  		/* write the address */
> >>  		xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, @@ -569,7 +567,6
> 
> [...]
Marek Vasut June 29, 2020, 9:50 p.m. UTC | #6
On 6/29/20 2:52 PM, Raviteja Narayanam wrote:

Hi,

[...]

>>>> diff --git a/drivers/i2c/busses/i2c-xiic.c
>>>> b/drivers/i2c/busses/i2c-xiic.c index
>>>> 0777e577720db..6db71c0fb6583 100644
>>>> --- a/drivers/i2c/busses/i2c-xiic.c
>>>> +++ b/drivers/i2c/busses/i2c-xiic.c
>>>> @@ -543,7 +543,6 @@ static void xiic_start_recv(struct xiic_i2c *i2c)  {
>>>>  	u8 rx_watermark;
>>>>  	struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg;
>>>> -	unsigned long flags;
>>>>
>>>>  	/* Clear and enable Rx full interrupt. */
>>>>  	xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK |
>>>> XIIC_INTR_TX_ERROR_MASK); @@ -559,7 +558,6 @@ static void
>>>> xiic_start_recv(struct xiic_i2c *i2c)
>>>>  		rx_watermark = IIC_RX_FIFO_DEPTH;
>>>>  	xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1);
>>>>
>>>> -	local_irq_save(flags);
>>>
>>> It is added as part of (i2c: xiic: Make the start and the byte count
>>> write atomic - ae7304c3ea28a3ba47a7a8312c76c654ef24967e) commit to
>> make the below 2 register writes atomic so that the controller doesn't produce
>> a wrong transaction.
>>
>> Two consecutive register writes are not atomic, they are posted as two
>> consecutive writes on the bus and will reach the hardware IP as two separate
>> writes with arbitrary delay between them.
>>
>> I think the intention of the patch ae7304c3ea28 ("i2c: xiic: Make the start and
>> the byte count write atomic") is to make sure that there will be no read/write
>> register access to the XIIC IP while those registers in
>> local_irq_save()/local_irq_restore() block are written, and that makes sense.
>>
>> However, local_irq_save()/local_irq_restore() is local to one CPU core, it does
>> not prevent another CPU core from posting register read/write access to the
>> XIIC IP (for example from xiic_process() threaded interrupt handler, which
>> might just be running on that other CPU core).
>>
>> The &i2c->lock mutex is locked by both the xiic_start() and xiic_process(), and
>> therefore guarantees that no other thread can post read/write register access
>> to the XIIC IP while xiic_start() (and thus xiic_recv_start(), which is called from
>> it) is running.
>>
>> And so, I think this local_irq_save()/local_irq_restore() can be removed.
>>
>>> (If there is a delay between the 2 register writes, controller is
>>> messing up with the transaction). It is not intended for locking between this
>> function and isr. So, this can't be removed.
>>
>> But the bus can insert arbitrary delay between two consecutive register writes
>> to the XIIC IP, so if the timing between the two register writes is really critical,
>> then I don't see how to guarantee the timing requirement. Do you happen to
>> have more details on what really happens on the hardware level here , maybe
>> some errata document for the XIIC IP?
> 
> From the commit description of "i2c: xiic: Make the start and the byte count write atomic"(ae7304c3ea28a3ba47a7a8312c76c654ef24967e),
> 
> [Disable interrupts while configuring the transfer and enable them back.
> 
> We have below as the programming sequence
> 1. start and slave address
> 2. byte count and stop
> 
> In some customer platform there was a lot of interrupts between 1 and 2
> and after slave address (around 7 clock cyles) if 2 is not executed
> then the transaction is nacked.

Is that a hardware property of the XIIC IP, that the transaction is
NACKed after 7 clock cycles of the XIIC IP ?

> To fix this case make the 2 writes atomic.]

But using local_irq_save()/local_irq_restore() will not make two
separate register writes atomic, they will still be two separate
consecutive register writes.

Also note that another CPU core can very well be generating a lot of
traffic on the bus between current CPU core and the XIIC IP, so the
first write from current CPU core to the XIIC IP would arrive at the
XIIC IP, then the bus will be busy for a long time with other requiests
(more than 7 cycles), and then the second write to the XIIC IP will
arrive at the XIIC IP. The local_irq_save()/local_irq_restore() can not
prevent this scenario, but this is very real on a system under load.

> So, this local_irq_save()/local_irq_restore() is not related to exclusive access in the driver (xiic_process vs xiic_start),
> but to supply the byte count to controller before it completes transmitting START and slave address.

But then shouldn't the XIIC IP be fixed / extended with support for such
an atomic update instead ?

>> Thanks
>>
>>>>  	if (!(msg->flags & I2C_M_NOSTART))
>>>>  		/* write the address */
>>>>  		xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, @@ -569,7 +567,6
>>
>> [...]
Raviteja Narayanam July 8, 2020, 3:23 p.m. UTC | #7
Hi Marek,

> -----Original Message-----
> From: Marek Vasut <marex@denx.de>
> Sent: Tuesday, June 30, 2020 3:20 AM
> To: Raviteja Narayanam <rna@xilinx.com>; linux-i2c@vger.kernel.org
> Cc: Michal Simek <michals@xilinx.com>; Shubhrajyoti Datta
> <shubhraj@xilinx.com>; Wolfram Sang <wsa@kernel.org>
> Subject: Re: [PATCH 2/5] i2c: xiic: Drop broken interrupt handler
> 
> On 6/29/20 2:52 PM, Raviteja Narayanam wrote:
> 
> Hi,
> 
> [...]
> 
> >>>> diff --git a/drivers/i2c/busses/i2c-xiic.c
> >>>> b/drivers/i2c/busses/i2c-xiic.c index
> >>>> 0777e577720db..6db71c0fb6583 100644
> >>>> --- a/drivers/i2c/busses/i2c-xiic.c
> >>>> +++ b/drivers/i2c/busses/i2c-xiic.c
> >>>> @@ -543,7 +543,6 @@ static void xiic_start_recv(struct xiic_i2c *i2c)  {
> >>>>  	u8 rx_watermark;
> >>>>  	struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg;
> >>>> -	unsigned long flags;
> >>>>
> >>>>  	/* Clear and enable Rx full interrupt. */
> >>>>  	xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK |
> >>>> XIIC_INTR_TX_ERROR_MASK); @@ -559,7 +558,6 @@ static void
> >>>> xiic_start_recv(struct xiic_i2c *i2c)
> >>>>  		rx_watermark = IIC_RX_FIFO_DEPTH;
> >>>>  	xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1);
> >>>>
> >>>> -	local_irq_save(flags);
> >>>
> >>> It is added as part of (i2c: xiic: Make the start and the byte count
> >>> write atomic - ae7304c3ea28a3ba47a7a8312c76c654ef24967e) commit to
> >> make the below 2 register writes atomic so that the controller
> >> doesn't produce a wrong transaction.
> >>
> >> Two consecutive register writes are not atomic, they are posted as
> >> two consecutive writes on the bus and will reach the hardware IP as
> >> two separate writes with arbitrary delay between them.
> >>
> >> I think the intention of the patch ae7304c3ea28 ("i2c: xiic: Make the
> >> start and the byte count write atomic") is to make sure that there
> >> will be no read/write register access to the XIIC IP while those
> >> registers in
> >> local_irq_save()/local_irq_restore() block are written, and that makes sense.
> >>
> >> However, local_irq_save()/local_irq_restore() is local to one CPU
> >> core, it does not prevent another CPU core from posting register
> >> read/write access to the XIIC IP (for example from xiic_process()
> >> threaded interrupt handler, which might just be running on that other CPU
> core).
> >>
> >> The &i2c->lock mutex is locked by both the xiic_start() and
> >> xiic_process(), and therefore guarantees that no other thread can
> >> post read/write register access to the XIIC IP while xiic_start()
> >> (and thus xiic_recv_start(), which is called from
> >> it) is running.
> >>
> >> And so, I think this local_irq_save()/local_irq_restore() can be removed.
> >>
> >>> (If there is a delay between the 2 register writes, controller is
> >>> messing up with the transaction). It is not intended for locking
> >>> between this
> >> function and isr. So, this can't be removed.
> >>
> >> But the bus can insert arbitrary delay between two consecutive
> >> register writes to the XIIC IP, so if the timing between the two
> >> register writes is really critical, then I don't see how to guarantee
> >> the timing requirement. Do you happen to have more details on what
> >> really happens on the hardware level here , maybe some errata document
> for the XIIC IP?
> >
> > From the commit description of "i2c: xiic: Make the start and the byte
> > count write atomic"(ae7304c3ea28a3ba47a7a8312c76c654ef24967e),
> >
> > [Disable interrupts while configuring the transfer and enable them back.
> >
> > We have below as the programming sequence 1. start and slave address
> > 2. byte count and stop
> >
> > In some customer platform there was a lot of interrupts between 1 and
> > 2 and after slave address (around 7 clock cyles) if 2 is not executed
> > then the transaction is nacked.
> 
> Is that a hardware property of the XIIC IP, that the transaction is NACKed after
> 7 clock cycles of the XIIC IP ?

Yes, it is from IP. 

> 
> > To fix this case make the 2 writes atomic.]
> 
> But using local_irq_save()/local_irq_restore() will not make two separate
> register writes atomic, they will still be two separate consecutive register
> writes.
> 
> Also note that another CPU core can very well be generating a lot of traffic on
> the bus between current CPU core and the XIIC IP, so the first write from
> current CPU core to the XIIC IP would arrive at the XIIC IP, then the bus will be
> busy for a long time with other requiests (more than 7 cycles), and then the
> second write to the XIIC IP will arrive at the XIIC IP. The
> local_irq_save()/local_irq_restore() can not prevent this scenario, but this is
> very real on a system under load.

Yeah, such possibility is there. local_irq_save()/local_irq_restore() can't be the right solution
as you said.

> 
> > So, this local_irq_save()/local_irq_restore() is not related to
> > exclusive access in the driver (xiic_process vs xiic_start), but to supply the
> byte count to controller before it completes transmitting START and slave
> address.
> 
> But then shouldn't the XIIC IP be fixed / extended with support for such an
> atomic update instead ?

I have started communicating with the hardware team on why the IP behavior is like this. 
But, that will take more time to get to a conclusion. We can continue this discussion here.
So, how about we remove this patch from the current series and go ahead with the rest of patches?
In that case please send a v2 with the minimal changes suggested in other patches (4 and 5)?

Thanks
Raviteja N
> 
> >> Thanks
> >>
> >>>>  	if (!(msg->flags & I2C_M_NOSTART))
> >>>>  		/* write the address */
> >>>>  		xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, @@ -569,7 +567,6
> >>
> >> [...]
Marek Vasut Aug. 19, 2020, 5:42 p.m. UTC | #8
On 7/8/20 5:23 PM, Raviteja Narayanam wrote:
> Hi Marek,

Hi,

[...]

>>> So, this local_irq_save()/local_irq_restore() is not related to
>>> exclusive access in the driver (xiic_process vs xiic_start), but to supply the
>> byte count to controller before it completes transmitting START and slave
>> address.
>>
>> But then shouldn't the XIIC IP be fixed / extended with support for such an
>> atomic update instead ?
> 
> I have started communicating with the hardware team on why the IP behavior is like this.

Any news from the hardware team ?

[...]
Raviteja Narayanam Aug. 24, 2020, 8:27 a.m. UTC | #9
> -----Original Message-----
> From: Marek Vasut <marex@denx.de>
> Sent: Wednesday, August 19, 2020 11:12 PM
> To: Raviteja Narayanam <rna@xilinx.com>; linux-i2c@vger.kernel.org
> Cc: Michal Simek <michals@xilinx.com>; Shubhrajyoti Datta
> <shubhraj@xilinx.com>; Wolfram Sang <wsa@kernel.org>
> Subject: Re: [PATCH 2/5] i2c: xiic: Drop broken interrupt handler
> 
> On 7/8/20 5:23 PM, Raviteja Narayanam wrote:
> > Hi Marek,
> 
> Hi,
> 
> [...]
> 
> >>> So, this local_irq_save()/local_irq_restore() is not related to
> >>> exclusive access in the driver (xiic_process vs xiic_start), but to
> >>> supply the
> >> byte count to controller before it completes transmitting START and
> >> slave address.
> >>
> >> But then shouldn't the XIIC IP be fixed / extended with support for
> >> such an atomic update instead ?
> >
> > I have started communicating with the hardware team on why the IP
> behavior is like this.
> 
> Any news from the hardware team ?

" The expectation from the IP is un interrupted read i.e the read command should be un interrupted and max delay expected is not more than 35-40 us provided IIC frequency is 100 Khz. Please check if we can manage this in Software. If delay is not managed enable iic control only after stop related data is received"
That was the response as is. 
The workaround suggested above is to enable the AXI I2C only after second register write(STOP bit info with read count) is completed. This is not generic, so we couldn't move forward.
Our typical application scenario is like this "START WRITE, REPEATED START READ, STOP". So, we must enable AXI I2C before read count is sent.

> 
> [...]
Marek Vasut Aug. 24, 2020, 11:58 a.m. UTC | #10
On 8/24/20 10:27 AM, Raviteja Narayanam wrote:

[...]

>>>>> So, this local_irq_save()/local_irq_restore() is not related to
>>>>> exclusive access in the driver (xiic_process vs xiic_start), but to
>>>>> supply the
>>>> byte count to controller before it completes transmitting START and
>>>> slave address.
>>>>
>>>> But then shouldn't the XIIC IP be fixed / extended with support for
>>>> such an atomic update instead ?
>>>
>>> I have started communicating with the hardware team on why the IP
>> behavior is like this.
>>
>> Any news from the hardware team ?
> 
> " The expectation from the IP is un interrupted read i.e the read command should be un interrupted and max delay expected is not more than 35-40 us provided IIC frequency is 100 Khz. Please check if we can manage this in Software. If delay is not managed enable iic control only after stop related data is received"
> That was the response as is. 
> The workaround suggested above is to enable the AXI I2C only after second register write(STOP bit info with read count) is completed. This is not generic, so we couldn't move forward.
> Our typical application scenario is like this "START WRITE, REPEATED START READ, STOP". So, we must enable AXI I2C before read count is sent.

So, if I understand it correctly, the XIIC IP is broken and cannot be
fixed in software reliably ? How shall we proceed then ?
Raviteja Narayanam Aug. 25, 2020, 9:44 a.m. UTC | #11
> -----Original Message-----
> From: Marek Vasut <marex@denx.de>
> Sent: Monday, August 24, 2020 5:28 PM
> To: Raviteja Narayanam <rna@xilinx.com>; linux-i2c@vger.kernel.org
> Cc: Michal Simek <michals@xilinx.com>; Shubhrajyoti Datta
> <shubhraj@xilinx.com>; Wolfram Sang <wsa@kernel.org>; Srinivas Goud
> <sgoud@xilinx.com>
> Subject: Re: [PATCH 2/5] i2c: xiic: Drop broken interrupt handler
> 
> On 8/24/20 10:27 AM, Raviteja Narayanam wrote:
> 
> [...]
> 
> >>>>> So, this local_irq_save()/local_irq_restore() is not related to
> >>>>> exclusive access in the driver (xiic_process vs xiic_start), but
> >>>>> to supply the
> >>>> byte count to controller before it completes transmitting START and
> >>>> slave address.
> >>>>
> >>>> But then shouldn't the XIIC IP be fixed / extended with support for
> >>>> such an atomic update instead ?
> >>>
> >>> I have started communicating with the hardware team on why the IP
> >> behavior is like this.
> >>
> >> Any news from the hardware team ?
> >
> > " The expectation from the IP is un interrupted read i.e the read command
> should be un interrupted and max delay expected is not more than 35-40 us
> provided IIC frequency is 100 Khz. Please check if we can manage this in
> Software. If delay is not managed enable iic control only after stop related
> data is received"
> > That was the response as is.
> > The workaround suggested above is to enable the AXI I2C only after
> second register write(STOP bit info with read count) is completed. This is not
> generic, so we couldn't move forward.
> > Our typical application scenario is like this "START WRITE, REPEATED START
> READ, STOP". So, we must enable AXI I2C before read count is sent.
> 
> So, if I understand it correctly, the XIIC IP is broken and cannot be fixed in
> software reliably ? How shall we proceed then ?

We are thinking of two options.
1. Trying for a SW fix to workaround this problem. Waiting on the HW IP team for any other suggestions.
2. XIIC IP has two modes of operation as stated in the user guide - Dynamic and Standard modes.
Currently, the linux driver is using only dynamic mode and this bug appears here.
The SW programming for standard mode is different and we are testing it for all possible use cases. Once
That comes out successful, we will send out the patches.
Marek Vasut Aug. 25, 2020, 8:50 p.m. UTC | #12
On 8/25/20 11:44 AM, Raviteja Narayanam wrote:

Hi,

[...]

>>>>>>> So, this local_irq_save()/local_irq_restore() is not related to
>>>>>>> exclusive access in the driver (xiic_process vs xiic_start), but
>>>>>>> to supply the
>>>>>> byte count to controller before it completes transmitting START and
>>>>>> slave address.
>>>>>>
>>>>>> But then shouldn't the XIIC IP be fixed / extended with support for
>>>>>> such an atomic update instead ?
>>>>>
>>>>> I have started communicating with the hardware team on why the IP
>>>> behavior is like this.
>>>>
>>>> Any news from the hardware team ?
>>>
>>> " The expectation from the IP is un interrupted read i.e the read command
>> should be un interrupted and max delay expected is not more than 35-40 us
>> provided IIC frequency is 100 Khz. Please check if we can manage this in
>> Software. If delay is not managed enable iic control only after stop related
>> data is received"
>>> That was the response as is.
>>> The workaround suggested above is to enable the AXI I2C only after
>> second register write(STOP bit info with read count) is completed. This is not
>> generic, so we couldn't move forward.
>>> Our typical application scenario is like this "START WRITE, REPEATED START
>> READ, STOP". So, we must enable AXI I2C before read count is sent.
>>
>> So, if I understand it correctly, the XIIC IP is broken and cannot be fixed in
>> software reliably ? How shall we proceed then ?
> 
> We are thinking of two options.
> 1. Trying for a SW fix to workaround this problem. Waiting on the HW IP team for any other suggestions.
> 2. XIIC IP has two modes of operation as stated in the user guide - Dynamic and Standard modes.
> Currently, the linux driver is using only dynamic mode and this bug appears here.
> The SW programming for standard mode is different and we are testing it for all possible use cases. Once
> That comes out successful, we will send out the patches.

Is this dynamic/standard mode switching what is already in the
downstream xilinx kernel tree ?

I think we should fix what is already upstream first, and only then add
more complexity.

[...]
Raviteja Narayanam Sept. 7, 2020, 8:51 a.m. UTC | #13
> -----Original Message-----
> From: Marek Vasut <marex@denx.de>
> Sent: Wednesday, August 26, 2020 2:21 AM
> To: Raviteja Narayanam <rna@xilinx.com>; linux-i2c@vger.kernel.org
> Cc: Michal Simek <michals@xilinx.com>; Shubhrajyoti Datta
> <shubhraj@xilinx.com>; Wolfram Sang <wsa@kernel.org>; Srinivas Goud
> <sgoud@xilinx.com>
> Subject: Re: [PATCH 2/5] i2c: xiic: Drop broken interrupt handler
> 
> On 8/25/20 11:44 AM, Raviteja Narayanam wrote:
> 
> Hi,
> 
> [...]
> 
> >>>>>>> So, this local_irq_save()/local_irq_restore() is not related to
> >>>>>>> exclusive access in the driver (xiic_process vs xiic_start), but
> >>>>>>> to supply the
> >>>>>> byte count to controller before it completes transmitting START
> >>>>>> and slave address.
> >>>>>>
> >>>>>> But then shouldn't the XIIC IP be fixed / extended with support
> >>>>>> for such an atomic update instead ?
> >>>>>
> >>>>> I have started communicating with the hardware team on why the IP
> >>>> behavior is like this.
> >>>>
> >>>> Any news from the hardware team ?
> >>>
> >>> " The expectation from the IP is un interrupted read i.e the read
> >>> command
> >> should be un interrupted and max delay expected is not more than
> >> 35-40 us provided IIC frequency is 100 Khz. Please check if we can
> >> manage this in Software. If delay is not managed enable iic control
> >> only after stop related data is received"
> >>> That was the response as is.
> >>> The workaround suggested above is to enable the AXI I2C only after
> >> second register write(STOP bit info with read count) is completed.
> >> This is not generic, so we couldn't move forward.
> >>> Our typical application scenario is like this "START WRITE, REPEATED
> >>> START
> >> READ, STOP". So, we must enable AXI I2C before read count is sent.
> >>
> >> So, if I understand it correctly, the XIIC IP is broken and cannot be
> >> fixed in software reliably ? How shall we proceed then ?
> >
> > We are thinking of two options.
> > 1. Trying for a SW fix to workaround this problem. Waiting on the HW IP
> team for any other suggestions.
> > 2. XIIC IP has two modes of operation as stated in the user guide - Dynamic
> and Standard modes.
> > Currently, the linux driver is using only dynamic mode and this bug appears
> here.
> > The SW programming for standard mode is different and we are testing
> > it for all possible use cases. Once That comes out successful, we will send
> out the patches.
> 
> Is this dynamic/standard mode switching what is already in the downstream
> xilinx kernel tree ?
> 
> I think we should fix what is already upstream first, and only then add more
> complexity.

Yes, agreed. But, that would be a hardware fix in the future IP release. Since we have
to support existing IP versions, we are planning to upstream the standard mode
and based on the DT property (IP version), we will switch.

Thanks,
Raviteja N

> 
> [...]
Marek Vasut Sept. 8, 2020, 2:04 p.m. UTC | #14
On 9/7/20 10:51 AM, Raviteja Narayanam wrote:

[...]

>> Is this dynamic/standard mode switching what is already in the downstream
>> xilinx kernel tree ?
>>
>> I think we should fix what is already upstream first, and only then add more
>> complexity.
> 
> Yes, agreed. But, that would be a hardware fix in the future IP release. Since we have
> to support existing IP versions, we are planning to upstream the standard mode
> and based on the DT property (IP version), we will switch.

Is the dynamic mode addition be backported to stable as well ?
It is a lot of code, so I have a feeling that would be difficult.

Maybe it would be better to mark the xiic driver as broken until the
hardware is fixed ?
Michal Simek Sept. 11, 2020, 10:28 a.m. UTC | #15
On 08. 09. 20 16:04, Marek Vasut wrote:
> On 9/7/20 10:51 AM, Raviteja Narayanam wrote:
> 
> [...]
> 
>>> Is this dynamic/standard mode switching what is already in the downstream
>>> xilinx kernel tree ?
>>>
>>> I think we should fix what is already upstream first, and only then add more
>>> complexity.
>>
>> Yes, agreed. But, that would be a hardware fix in the future IP release. Since we have
>> to support existing IP versions, we are planning to upstream the standard mode
>> and based on the DT property (IP version), we will switch.
> 
> Is the dynamic mode addition be backported to stable as well ?
> It is a lot of code, so I have a feeling that would be difficult.

Let's see when that changes are available.

> Maybe it would be better to mark the xiic driver as broken until the
> hardware is fixed ?

I don't have a preference here.
Wolfram: What do you think?

Thanks,
Michal
diff mbox series

Patch

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 0777e577720db..6db71c0fb6583 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -543,7 +543,6 @@  static void xiic_start_recv(struct xiic_i2c *i2c)
 {
 	u8 rx_watermark;
 	struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg;
-	unsigned long flags;
 
 	/* Clear and enable Rx full interrupt. */
 	xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK | XIIC_INTR_TX_ERROR_MASK);
@@ -559,7 +558,6 @@  static void xiic_start_recv(struct xiic_i2c *i2c)
 		rx_watermark = IIC_RX_FIFO_DEPTH;
 	xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1);
 
-	local_irq_save(flags);
 	if (!(msg->flags & I2C_M_NOSTART))
 		/* write the address */
 		xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET,
@@ -569,7 +567,6 @@  static void xiic_start_recv(struct xiic_i2c *i2c)
 
 	xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET,
 		msg->len | ((i2c->nmsgs == 1) ? XIIC_TX_DYN_STOP_MASK : 0));
-	local_irq_restore(flags);
 
 	if (i2c->nmsgs == 1)
 		/* very last, enable bus not busy as well */
@@ -609,26 +606,6 @@  static void xiic_start_send(struct xiic_i2c *i2c)
 		XIIC_INTR_BNB_MASK);
 }
 
-static irqreturn_t xiic_isr(int irq, void *dev_id)
-{
-	struct xiic_i2c *i2c = dev_id;
-	u32 pend, isr, ier;
-	irqreturn_t ret = IRQ_NONE;
-	/* Do not processes a devices interrupts if the device has no
-	 * interrupts pending
-	 */
-
-	dev_dbg(i2c->adap.dev.parent, "%s entry\n", __func__);
-
-	isr = xiic_getreg32(i2c, XIIC_IISR_OFFSET);
-	ier = xiic_getreg32(i2c, XIIC_IIER_OFFSET);
-	pend = isr & ier;
-	if (pend)
-		ret = IRQ_WAKE_THREAD;
-
-	return ret;
-}
-
 static void __xiic_start_xfer(struct xiic_i2c *i2c)
 {
 	int first = 1;
@@ -807,7 +784,7 @@  static int xiic_i2c_probe(struct platform_device *pdev)
 	pm_runtime_use_autosuspend(i2c->dev);
 	pm_runtime_set_active(i2c->dev);
 	pm_runtime_enable(i2c->dev);
-	ret = devm_request_threaded_irq(&pdev->dev, irq, xiic_isr,
+	ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
 					xiic_process, IRQF_ONESHOT,
 					pdev->name, i2c);