diff mbox series

[RFC] i2c: gpio: fault-injector: add 'lose_arbitration' injector

Message ID 20190121142839.19011-1-wsa+renesas@sang-engineering.com
State Superseded
Headers show
Series [RFC] i2c: gpio: fault-injector: add 'lose_arbitration' injector | expand

Commit Message

Wolfram Sang Jan. 21, 2019, 2:28 p.m. UTC
Here is a fault injector simulating 'arbitration lost' from multi-master
setups. Read the docs for its usage.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---

This is the most reliable result I came up with so far for simulating lost
arbitration (after playing a lot with falling SDA as trigger first, but SCL
seems the way to go). Works fine with the i2c-sh_mobile driver on a Renesas
Lager board. I am not super-happy with the interrupt latency causing some bits
to be non-disturbed, but for now, I don't see a way around it except for
busy-polling which I think is too excessive. RFC for now because someone still
might have a better idea :)

Needs my previous fault-injector cleanup patches, a branch for consuming is here:

git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git renesas/iic-arbitration-lost

Now let's see how to fix the sh_mobile driver...

 Documentation/i2c/gpio-fault-injection | 26 +++++++++++++++
 drivers/i2c/busses/i2c-gpio.c          | 61 ++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)

Comments

Wolfram Sang Feb. 5, 2019, 1:31 p.m. UTC | #1
On Mon, Jan 21, 2019 at 03:28:39PM +0100, Wolfram Sang wrote:
> Here is a fault injector simulating 'arbitration lost' from multi-master
> setups. Read the docs for its usage.
> 
> Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>

Geert, if you would have time for a high-level review, I'd appreciate
this very much!

> ---
> 
> This is the most reliable result I came up with so far for simulating lost
> arbitration (after playing a lot with falling SDA as trigger first, but SCL
> seems the way to go). Works fine with the i2c-sh_mobile driver on a Renesas
> Lager board. I am not super-happy with the interrupt latency causing some bits
> to be non-disturbed, but for now, I don't see a way around it except for
> busy-polling which I think is too excessive. RFC for now because someone still
> might have a better idea :)
> 
> Needs my previous fault-injector cleanup patches, a branch for consuming is here:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git renesas/iic-arbitration-lost
> 
> Now let's see how to fix the sh_mobile driver...
> 
>  Documentation/i2c/gpio-fault-injection | 26 +++++++++++++++
>  drivers/i2c/busses/i2c-gpio.c          | 61 ++++++++++++++++++++++++++++++++++
>  2 files changed, 87 insertions(+)
> 
> diff --git a/Documentation/i2c/gpio-fault-injection b/Documentation/i2c/gpio-fault-injection
> index 1a44e3edc0c4..b6f36ffe55e1 100644
> --- a/Documentation/i2c/gpio-fault-injection
> +++ b/Documentation/i2c/gpio-fault-injection
> @@ -83,3 +83,29 @@ This is why bus recovery (up to 9 clock pulses) must either check SDA or send
>  additional STOP conditions to ensure the bus has been released. Otherwise
>  random data will be written to a device!
>  
> +Lost arbitration
> +================
> +
> +Here, we want to simulate the condition where the master under tests loses the
> +bus arbitration against another master in a multi-master setup.
> +
> +"lose_arbitration"
> +------------------
> +
> +This file is write only and you need to write the number of desired lost
> +arbitrations in a row. The calling process will then sleep and interfere with
> +transfers from the master under test when they appear until that number is
> +reached. The process is interruptible, though.
> +
> +Arbitration lost is achieved by waiting for SCL going down by the master under
> +test and then pulling SDA low for some time. So, the I2C address sent out
> +should be corrupted and that should be detected properly. That means that the
> +address sent out should have a lot of '1' bits to be able to detect corruption.
> +There doesn't need to be a device at this address because arbitration lost
> +should be detected beforehand. Also note, that SCL going down is monitored
> +using interrupts, so the interrupt latency might cause the first bits to be not
> +corrupted. A good starting script for using this fault injector:
> +
> +# echo 1 > lose_arbitration &
> +# i2cget -y <bus_to_test> 0x3f
> +
> diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
> index ca04fa25a141..c630172b4787 100644
> --- a/drivers/i2c/busses/i2c-gpio.c
> +++ b/drivers/i2c/busses/i2c-gpio.c
> @@ -7,12 +7,14 @@
>   * it under the terms of the GNU General Public License version 2 as
>   * published by the Free Software Foundation.
>   */
> +#include <linux/completion.h>
>  #include <linux/debugfs.h>
>  #include <linux/delay.h>
>  #include <linux/gpio/consumer.h>
>  #include <linux/i2c-algo-bit.h>
>  #include <linux/i2c.h>
>  #include <linux/init.h>
> +#include <linux/interrupt.h>
>  #include <linux/module.h>
>  #include <linux/of.h>
>  #include <linux/platform_data/i2c-gpio.h>
> @@ -27,6 +29,7 @@ struct i2c_gpio_private_data {
>  	struct i2c_gpio_platform_data pdata;
>  #ifdef CONFIG_I2C_GPIO_FAULT_INJECTOR
>  	struct dentry *debug_dir;
> +	struct completion irq_happened;
>  #endif
>  };
>  
> @@ -162,6 +165,59 @@ static int fops_incomplete_write_byte_set(void *data, u64 addr)
>  }
>  DEFINE_DEBUGFS_ATTRIBUTE(fops_incomplete_write_byte, NULL, fops_incomplete_write_byte_set, "%llu\n");
>  
> +static irqreturn_t lose_arbitration_irq(int irq, void *dev_id)
> +{
> +	struct i2c_gpio_private_data *priv = dev_id;
> +
> +	setsda(&priv->bit_data, 0);
> +	udelay(200);
> +	setsda(&priv->bit_data, 1);
> +
> +	complete(&priv->irq_happened);
> +	return IRQ_HANDLED;
> +}
> +
> +static int fops_lose_arbitration_set(void *data, u64 num_faults)
> +{
> +	struct i2c_gpio_private_data *priv = data;
> +	int irq = gpiod_to_irq(priv->scl);
> +	int ret, i;
> +
> +	i2c_lock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);
> +
> +	/*
> +	 * Interrupt on falling SCL. This ensures that the master under test has
> +	 * really started the transfer. Interrupt on falling SDA did only
> +	 * exercise 'bus busy' detection on some HW but not 'arbitration lost'.
> +	 * Note that the interrupt latency may cause the first bits to be
> +	 * transmitted correctly.
> +	 */
> +	ret = gpiod_direction_input(priv->scl);
> +	if (ret)
> +		goto unlock;
> +
> +	ret = request_irq(irq, lose_arbitration_irq, IRQF_TRIGGER_FALLING,
> +			  "i2c-gpio-fi", priv);
> +	if (ret)
> +		goto output;
> +
> +	for (i = 0; i < num_faults; i++) {
> +		ret = wait_for_completion_interruptible(&priv->irq_happened);
> +		if (ret)
> +			break;
> +		reinit_completion(&priv->irq_happened);
> +	}
> +
> +	free_irq(irq, priv);
> + output:
> +	ret = gpiod_direction_output(priv->scl, 1);
> + unlock:
> +	i2c_unlock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);
> +
> +	return ret;
> +}
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_lose_arbitration, NULL, fops_lose_arbitration_set, "%llu\n");
> +
>  static void i2c_gpio_fault_injector_init(struct platform_device *pdev)
>  {
>  	struct i2c_gpio_private_data *priv = platform_get_drvdata(pdev);
> @@ -181,10 +237,15 @@ static void i2c_gpio_fault_injector_init(struct platform_device *pdev)
>  	if (!priv->debug_dir)
>  		return;
>  
> +	init_completion(&priv->irq_happened);
> +
>  	debugfs_create_file_unsafe("incomplete_address_phase", 0200, priv->debug_dir,
>  				   priv, &fops_incomplete_addr_phase);
>  	debugfs_create_file_unsafe("incomplete_write_byte", 0200, priv->debug_dir,
>  				   priv, &fops_incomplete_write_byte);
> +	if (priv->bit_data.getscl)
> +		debugfs_create_file_unsafe("lose_arbitration", 0200, priv->debug_dir,
> +					   priv, &fops_lose_arbitration);
>  	debugfs_create_file_unsafe("scl", 0600, priv->debug_dir, priv, &fops_scl);
>  	debugfs_create_file_unsafe("sda", 0600, priv->debug_dir, priv, &fops_sda);
>  }
> -- 
> 2.11.0
>
Geert Uytterhoeven Feb. 11, 2019, 10:30 a.m. UTC | #2
Hi Wolfram,

On Mon, Jan 21, 2019 at 3:29 PM Wolfram Sang
<wsa+renesas@sang-engineering.com> wrote:
> Here is a fault injector simulating 'arbitration lost' from multi-master
> setups. Read the docs for its usage.
>
> Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>

Thanks, looks like a valuable injector case!

> This is the most reliable result I came up with so far for simulating lost
> arbitration (after playing a lot with falling SDA as trigger first, but SCL
> seems the way to go). Works fine with the i2c-sh_mobile driver on a Renesas

Using SCL as the trigger makes most sense to me, too.

> --- a/drivers/i2c/busses/i2c-gpio.c
> +++ b/drivers/i2c/busses/i2c-gpio.c

> @@ -162,6 +165,59 @@ static int fops_incomplete_write_byte_set(void *data, u64 addr)
>  }
>  DEFINE_DEBUGFS_ATTRIBUTE(fops_incomplete_write_byte, NULL, fops_incomplete_write_byte_set, "%llu\n");
>
> +static irqreturn_t lose_arbitration_irq(int irq, void *dev_id)
> +{
> +       struct i2c_gpio_private_data *priv = dev_id;
> +
> +       setsda(&priv->bit_data, 0);
> +       udelay(200);

Please add a #define for this value.

> +       setsda(&priv->bit_data, 1);
> +
> +       complete(&priv->irq_happened);
> +       return IRQ_HANDLED;
> +}
> +
> +static int fops_lose_arbitration_set(void *data, u64 num_faults)
> +{
> +       struct i2c_gpio_private_data *priv = data;
> +       int irq = gpiod_to_irq(priv->scl);

As request_irq() takes an unsigned int irq, please check for a negative error
code here.

> +       int ret, i;

u64 i;

Yes, 64-bit values may be a bit excessive, but that's what the caller is
passing, and truncating to int may cause unexpected behavior.

> +       ret = request_irq(irq, lose_arbitration_irq, IRQF_TRIGGER_FALLING,
> +                         "i2c-gpio-fi", priv);

"i2c-gpio-fault-injection"?

> +       if (ret)
> +               goto output;
> +
> +       for (i = 0; i < num_faults; i++) {
> +               ret = wait_for_completion_interruptible(&priv->irq_happened);
> +               if (ret)
> +                       break;
> +               reinit_completion(&priv->irq_happened);

The reinitialization may race with the interrupt handler. Probably you don't
care, though, as all of this is "best effort" anyway.

Perhaps you can do the counting in the interrupt handler instead, and only
trigger completion after the wanted number of lost arbitrations has been
performed?


> +DEFINE_DEBUGFS_ATTRIBUTE(fops_lose_arbitration, NULL, fops_lose_arbitration_set, "%llu\n");

I think you can drop the format if you don't provide a "get" method.
Or just implement the "get" method, too ;-)

Gr{oetje,eeting}s,

                        Geert
Wolfram Sang Feb. 11, 2019, 8:59 p.m. UTC | #3
Hi Geert,

On Mon, Feb 11, 2019 at 11:30:56AM +0100, Geert Uytterhoeven wrote:
> Hi Wolfram,
> 
> On Mon, Jan 21, 2019 at 3:29 PM Wolfram Sang
> <wsa+renesas@sang-engineering.com> wrote:
> > Here is a fault injector simulating 'arbitration lost' from multi-master
> > setups. Read the docs for its usage.
> >
> > Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
> 
> Thanks, looks like a valuable injector case!
> 
> > This is the most reliable result I came up with so far for simulating lost
> > arbitration (after playing a lot with falling SDA as trigger first, but SCL
> > seems the way to go). Works fine with the i2c-sh_mobile driver on a Renesas
> 
> Using SCL as the trigger makes most sense to me, too.

Thanks for the review and glad you like the use case and implementation
approach.

> > +       struct i2c_gpio_private_data *priv = dev_id;
> > +
> > +       setsda(&priv->bit_data, 0);
> > +       udelay(200);
> 
> Please add a #define for this value.

Sure thing, this was a typical RFC marker :) It is now calculated based
on bit_data.udelay and has a comment.


> > +       struct i2c_gpio_private_data *priv = data;
> > +       int irq = gpiod_to_irq(priv->scl);
> 
> As request_irq() takes an unsigned int irq, please check for a negative error
> code here.


Will do.

> > +       int ret, i;
> 
> u64 i;
> 
> Yes, 64-bit values may be a bit excessive, but that's what the caller is
> passing, and truncating to int may cause unexpected behavior.

OK, but won't be needed after my refactoring to count this in the
interrupt.

> > +       ret = request_irq(irq, lose_arbitration_irq, IRQF_TRIGGER_FALLING,
> > +                         "i2c-gpio-fi", priv);
> 
> "i2c-gpio-fault-injection"?

OK. That was RFC style, too.

> > +       if (ret)
> > +               goto output;
> > +
> > +       for (i = 0; i < num_faults; i++) {
> > +               ret = wait_for_completion_interruptible(&priv->irq_happened);
> > +               if (ret)
> > +                       break;
> > +               reinit_completion(&priv->irq_happened);
> 
> The reinitialization may race with the interrupt handler. Probably you don't
> care, though, as all of this is "best effort" anyway.
> 
> Perhaps you can do the counting in the interrupt handler instead, and only
> trigger completion after the wanted number of lost arbitrations has been
> performed?

Yes, I like this approach! Will do.

> 
> 
> > +DEFINE_DEBUGFS_ATTRIBUTE(fops_lose_arbitration, NULL, fops_lose_arbitration_set, "%llu\n");
> 
> I think you can drop the format if you don't provide a "get" method.
> Or just implement the "get" method, too ;-)

Any pointer for that information? I seem to recall that the format
string is also used to parse the user string to the set function. I
sadly can't find that documented, but all the helper macros in
fs/debugfs/file.c have a format string, too, even if WO.

I have the code ready but testing and pushing out needs to wait until
tomorrow.

Thanks again,

   Wolfram
Geert Uytterhoeven Feb. 12, 2019, 8:31 a.m. UTC | #4
Hi Wolfram,

On Mon, Feb 11, 2019 at 9:59 PM Wolfram Sang <wsa@the-dreams.de> wrote:
> On Mon, Feb 11, 2019 at 11:30:56AM +0100, Geert Uytterhoeven wrote:
> > On Mon, Jan 21, 2019 at 3:29 PM Wolfram Sang
> > <wsa+renesas@sang-engineering.com> wrote:
> > > Here is a fault injector simulating 'arbitration lost' from multi-master
> > > setups. Read the docs for its usage.

> > > +       struct i2c_gpio_private_data *priv = dev_id;
> > > +
> > > +       setsda(&priv->bit_data, 0);
> > > +       udelay(200);
> >
> > Please add a #define for this value.
>
> Sure thing, this was a typical RFC marker :) It is now calculated based
> on bit_data.udelay and has a comment.

But that value bears no direct relation to the transfer speed as used by the
master to interfere with, right?
Still, you have to guess anyway.

> > > +DEFINE_DEBUGFS_ATTRIBUTE(fops_lose_arbitration, NULL, fops_lose_arbitration_set, "%llu\n");
> >
> > I think you can drop the format if you don't provide a "get" method.
> > Or just implement the "get" method, too ;-)
>
> Any pointer for that information? I seem to recall that the format
> string is also used to parse the user string to the set function. I
> sadly can't find that documented, but all the helper macros in
> fs/debugfs/file.c have a format string, too, even if WO.

fs/libfs.c:struct simple_attr {

        const char *fmt;        /* format for read operation */

It's being parsed using simple_strtoll(), so it takes whatever format is being
passed by the writer.

Perhaps you were confused by the term "read", which is from the point of
view of userspace, not of the driver.

Gr{oetje,eeting}s,

                        Geert
Wolfram Sang Feb. 16, 2019, 1:04 p.m. UTC | #5
Hi Geert,

> > Sure thing, this was a typical RFC marker :) It is now calculated based
> > on bit_data.udelay and has a comment.
> 
> But that value bears no direct relation to the transfer speed as used by the
> master to interfere with, right?

Yeah, the assumption would have been that i2c-gpio is configured as the
same speed as the master under test. However, ...

> Still, you have to guess anyway.

... I changed the user API meanwhile. The value you write is the value
used for this delay. Because 'number of interfered arbitrations' is
vague anyhow. Because of latencies, interrupts sometimes trigger twice
per interference, so this number becomes meaningless.

> > Any pointer for that information? I seem to recall that the format
> > string is also used to parse the user string to the set function. I
> > sadly can't find that documented, but all the helper macros in
> > fs/debugfs/file.c have a format string, too, even if WO.
> 
> fs/libfs.c:struct simple_attr {
> 
>         const char *fmt;        /* format for read operation */
> 
> It's being parsed using simple_strtoll(), so it takes whatever format is being
> passed by the writer.
> 
> Perhaps you were confused by the term "read", which is from the point of
> view of userspace, not of the driver.

Because all other users (even WO) have that still set, I prefer
consistency here.

Will send out next version, including panic-fault-injector, hopefully
today.

Thanks,

   Wolfram
diff mbox series

Patch

diff --git a/Documentation/i2c/gpio-fault-injection b/Documentation/i2c/gpio-fault-injection
index 1a44e3edc0c4..b6f36ffe55e1 100644
--- a/Documentation/i2c/gpio-fault-injection
+++ b/Documentation/i2c/gpio-fault-injection
@@ -83,3 +83,29 @@  This is why bus recovery (up to 9 clock pulses) must either check SDA or send
 additional STOP conditions to ensure the bus has been released. Otherwise
 random data will be written to a device!
 
+Lost arbitration
+================
+
+Here, we want to simulate the condition where the master under tests loses the
+bus arbitration against another master in a multi-master setup.
+
+"lose_arbitration"
+------------------
+
+This file is write only and you need to write the number of desired lost
+arbitrations in a row. The calling process will then sleep and interfere with
+transfers from the master under test when they appear until that number is
+reached. The process is interruptible, though.
+
+Arbitration lost is achieved by waiting for SCL going down by the master under
+test and then pulling SDA low for some time. So, the I2C address sent out
+should be corrupted and that should be detected properly. That means that the
+address sent out should have a lot of '1' bits to be able to detect corruption.
+There doesn't need to be a device at this address because arbitration lost
+should be detected beforehand. Also note, that SCL going down is monitored
+using interrupts, so the interrupt latency might cause the first bits to be not
+corrupted. A good starting script for using this fault injector:
+
+# echo 1 > lose_arbitration &
+# i2cget -y <bus_to_test> 0x3f
+
diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
index ca04fa25a141..c630172b4787 100644
--- a/drivers/i2c/busses/i2c-gpio.c
+++ b/drivers/i2c/busses/i2c-gpio.c
@@ -7,12 +7,14 @@ 
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/completion.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c-algo-bit.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_data/i2c-gpio.h>
@@ -27,6 +29,7 @@  struct i2c_gpio_private_data {
 	struct i2c_gpio_platform_data pdata;
 #ifdef CONFIG_I2C_GPIO_FAULT_INJECTOR
 	struct dentry *debug_dir;
+	struct completion irq_happened;
 #endif
 };
 
@@ -162,6 +165,59 @@  static int fops_incomplete_write_byte_set(void *data, u64 addr)
 }
 DEFINE_DEBUGFS_ATTRIBUTE(fops_incomplete_write_byte, NULL, fops_incomplete_write_byte_set, "%llu\n");
 
+static irqreturn_t lose_arbitration_irq(int irq, void *dev_id)
+{
+	struct i2c_gpio_private_data *priv = dev_id;
+
+	setsda(&priv->bit_data, 0);
+	udelay(200);
+	setsda(&priv->bit_data, 1);
+
+	complete(&priv->irq_happened);
+	return IRQ_HANDLED;
+}
+
+static int fops_lose_arbitration_set(void *data, u64 num_faults)
+{
+	struct i2c_gpio_private_data *priv = data;
+	int irq = gpiod_to_irq(priv->scl);
+	int ret, i;
+
+	i2c_lock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);
+
+	/*
+	 * Interrupt on falling SCL. This ensures that the master under test has
+	 * really started the transfer. Interrupt on falling SDA did only
+	 * exercise 'bus busy' detection on some HW but not 'arbitration lost'.
+	 * Note that the interrupt latency may cause the first bits to be
+	 * transmitted correctly.
+	 */
+	ret = gpiod_direction_input(priv->scl);
+	if (ret)
+		goto unlock;
+
+	ret = request_irq(irq, lose_arbitration_irq, IRQF_TRIGGER_FALLING,
+			  "i2c-gpio-fi", priv);
+	if (ret)
+		goto output;
+
+	for (i = 0; i < num_faults; i++) {
+		ret = wait_for_completion_interruptible(&priv->irq_happened);
+		if (ret)
+			break;
+		reinit_completion(&priv->irq_happened);
+	}
+
+	free_irq(irq, priv);
+ output:
+	ret = gpiod_direction_output(priv->scl, 1);
+ unlock:
+	i2c_unlock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);
+
+	return ret;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_lose_arbitration, NULL, fops_lose_arbitration_set, "%llu\n");
+
 static void i2c_gpio_fault_injector_init(struct platform_device *pdev)
 {
 	struct i2c_gpio_private_data *priv = platform_get_drvdata(pdev);
@@ -181,10 +237,15 @@  static void i2c_gpio_fault_injector_init(struct platform_device *pdev)
 	if (!priv->debug_dir)
 		return;
 
+	init_completion(&priv->irq_happened);
+
 	debugfs_create_file_unsafe("incomplete_address_phase", 0200, priv->debug_dir,
 				   priv, &fops_incomplete_addr_phase);
 	debugfs_create_file_unsafe("incomplete_write_byte", 0200, priv->debug_dir,
 				   priv, &fops_incomplete_write_byte);
+	if (priv->bit_data.getscl)
+		debugfs_create_file_unsafe("lose_arbitration", 0200, priv->debug_dir,
+					   priv, &fops_lose_arbitration);
 	debugfs_create_file_unsafe("scl", 0600, priv->debug_dir, priv, &fops_scl);
 	debugfs_create_file_unsafe("sda", 0600, priv->debug_dir, priv, &fops_sda);
 }