diff mbox series

[dev-5.2,2/2] i2c: aspeed: add slave inactive timeout support

Message ID 20190904200758.5420-3-jae.hyun.yoo@linux.intel.com
State Rejected, archived
Headers show
Series i2c: aspeed: Add H/W timeout support | expand

Commit Message

Jae Hyun Yoo Sept. 4, 2019, 8:07 p.m. UTC
In case of multi-master environment, if a peer master incorrectly handles
a bus in the middle of a transaction, I2C hardware hangs in slave state
and it can't escape from the slave state, so this commit adds slave
inactive timeout support to recover the bus in the case.

Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
---
 drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
 1 file changed, 73 insertions(+), 6 deletions(-)

Comments

Tao Ren Sept. 4, 2019, 10:37 p.m. UTC | #1
On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
> In case of multi-master environment, if a peer master incorrectly handles
> a bus in the middle of a transaction, I2C hardware hangs in slave state
> and it can't escape from the slave state, so this commit adds slave
> inactive timeout support to recover the bus in the case.
> 
> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>

Thank you Jae for the patch. Let me play with it and will update back with my results later this week.

Cheers,

Tao

> ---
>  drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>  1 file changed, 73 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
> index 89317929bee4..92e1a249b393 100644
> --- a/drivers/i2c/busses/i2c-aspeed.c
> +++ b/drivers/i2c/busses/i2c-aspeed.c
> @@ -70,10 +70,14 @@
>  #define ASPEED_I2CD_TIME_SCL_HIGH_MASK			GENMASK(19, 16)
>  #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT			12
>  #define ASPEED_I2CD_TIME_SCL_LOW_MASK			GENMASK(15, 12)
> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT	8
> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK	GENMASK(9, 8)
>  #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK		GENMASK(3, 0)
>  #define ASPEED_I2CD_TIME_SCL_REG_MAX			GENMASK(3, 0)
> +
>  /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
> -#define ASPEED_NO_TIMEOUT_CTRL				0
> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT		0
> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK			GENMASK(4, 0)
>  
>  /* 0x0c : I2CD Interrupt Control Register &
>   * 0x10 : I2CD Interrupt Status Register
> @@ -81,6 +85,7 @@
>   * These share bit definitions, so use the same values for the enable &
>   * status bits.
>   */
> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT		BIT(15)
>  #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT			BIT(14)
>  #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE		BIT(13)
>  #define ASPEED_I2CD_INTR_SLAVE_MATCH			BIT(7)
> @@ -96,8 +101,11 @@
>  		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
>  		 ASPEED_I2CD_INTR_ABNORMAL |				       \
>  		 ASPEED_I2CD_INTR_ARBIT_LOSS)
> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS					       \
> +		ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>  #define ASPEED_I2CD_INTR_ALL						       \
> -		(ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
> +		(ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |		       \
> +		 ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
>  		 ASPEED_I2CD_INTR_BUS_RECOVER_DONE |			       \
>  		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
>  		 ASPEED_I2CD_INTR_ABNORMAL |				       \
> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>  							   u32 divisor);
>  	unsigned long			parent_clk_frequency;
>  	u32				bus_frequency;
> +	u32				hw_timeout_ms;
>  	/* Transaction state. */
>  	enum aspeed_i2c_master_state	master_state;
>  	struct i2c_msg			*msgs;
> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>  }
>  
>  #if IS_ENABLED(CONFIG_I2C_SLAVE)
> +static int aspeed_i2c_check_slave_error(u32 irq_status)
> +{
> +	if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
> +		return -EIO;
> +
> +	return 0;
> +}
> +
>  static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>  {
>  	u32 command, irq_handled = 0;
> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>  	if (!slave)
>  		return 0;
>  
> +	if (aspeed_i2c_check_slave_error(irq_status)) {
> +		dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
> +			irq_status);
> +		irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
> +		bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
> +		return irq_handled;
> +	}
> +
>  	command = readl(bus->base + ASPEED_I2C_CMD_REG);
>  
>  	/* Slave was requested, restart state machine. */
> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>  	}
>  }
>  
> -static int aspeed_i2c_is_irq_error(u32 irq_status)
> +static int aspeed_i2c_check_master_error(u32 irq_status)
>  {
>  	if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>  		return -EAGAIN;
> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>  	 * should clear the command queue effectively taking us back to the
>  	 * INACTIVE state.
>  	 */
> -	ret = aspeed_i2c_is_irq_error(irq_status);
> +	ret = aspeed_i2c_check_master_error(irq_status);
>  	if (ret) {
> -		dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
> +		dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>  			irq_status);
>  		irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>  		if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>  /* precondition: bus.lock has been acquired. */
>  static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>  {
> +	u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>  	u32 divisor, clk_reg_val;
>  
>  	divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>  			ASPEED_I2CD_TIME_THDSTA_MASK |
>  			ASPEED_I2CD_TIME_TACST_MASK);
>  	clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
> +
> +	if (bus->hw_timeout_ms) {
> +		u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
> +			     ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
> +		u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
> +				ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
> +
> +		timeout_base_divisor = 0;
> +
> +		do {
> +			timeout_tick_us = 1000 * (16384 <<
> +						  (timeout_base_divisor << 1)) /
> +					  (bus->parent_clk_frequency / 1000);
> +
> +			if (timeout_base_divisor == div_max ||
> +			    timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
> +			    bus->hw_timeout_ms * 1000)
> +				break;
> +		} while (timeout_base_divisor++ < div_max);
> +
> +		if (timeout_tick_us) {
> +			timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
> +						      timeout_tick_us);
> +			if (timeout_cycles == 0)
> +				timeout_cycles = 1;
> +			else if (timeout_cycles > cycles_max)
> +				timeout_cycles = cycles_max;
> +		} else {
> +			timeout_cycles = 0;
> +		}
> +	} else {
> +		timeout_base_divisor = 0;
> +		timeout_cycles = 0;
> +	}
> +
> +	clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
> +				  timeout_base_divisor);
> +
>  	writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
> -	writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
> +	writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>  
>  	return 0;
>  }
> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>  		}
>  	}
>  
> +	device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
> +				 &bus->hw_timeout_ms);
> +
>  	/* Initialize the I2C adapter */
>  	spin_lock_init(&bus->lock);
>  	init_completion(&bus->cmd_complete);
>
Tao Ren Sept. 5, 2019, 10:28 p.m. UTC | #2
Hi Jae,

On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
> In case of multi-master environment, if a peer master incorrectly handles
> a bus in the middle of a transaction, I2C hardware hangs in slave state
> and it can't escape from the slave state, so this commit adds slave
> inactive timeout support to recover the bus in the case.
> 
> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>

I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?

I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.


Cheers,

Tao

> ---
>  drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>  1 file changed, 73 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
> index 89317929bee4..92e1a249b393 100644
> --- a/drivers/i2c/busses/i2c-aspeed.c
> +++ b/drivers/i2c/busses/i2c-aspeed.c
> @@ -70,10 +70,14 @@
>  #define ASPEED_I2CD_TIME_SCL_HIGH_MASK			GENMASK(19, 16)
>  #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT			12
>  #define ASPEED_I2CD_TIME_SCL_LOW_MASK			GENMASK(15, 12)
> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT	8
> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK	GENMASK(9, 8)
>  #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK		GENMASK(3, 0)
>  #define ASPEED_I2CD_TIME_SCL_REG_MAX			GENMASK(3, 0)
> +
>  /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
> -#define ASPEED_NO_TIMEOUT_CTRL				0
> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT		0
> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK			GENMASK(4, 0)
>  
>  /* 0x0c : I2CD Interrupt Control Register &
>   * 0x10 : I2CD Interrupt Status Register
> @@ -81,6 +85,7 @@
>   * These share bit definitions, so use the same values for the enable &
>   * status bits.
>   */
> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT		BIT(15)
>  #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT			BIT(14)
>  #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE		BIT(13)
>  #define ASPEED_I2CD_INTR_SLAVE_MATCH			BIT(7)
> @@ -96,8 +101,11 @@
>  		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
>  		 ASPEED_I2CD_INTR_ABNORMAL |				       \
>  		 ASPEED_I2CD_INTR_ARBIT_LOSS)
> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS					       \
> +		ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>  #define ASPEED_I2CD_INTR_ALL						       \
> -		(ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
> +		(ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |		       \
> +		 ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
>  		 ASPEED_I2CD_INTR_BUS_RECOVER_DONE |			       \
>  		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
>  		 ASPEED_I2CD_INTR_ABNORMAL |				       \
> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>  							   u32 divisor);
>  	unsigned long			parent_clk_frequency;
>  	u32				bus_frequency;
> +	u32				hw_timeout_ms;
>  	/* Transaction state. */
>  	enum aspeed_i2c_master_state	master_state;
>  	struct i2c_msg			*msgs;
> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>  }
>  
>  #if IS_ENABLED(CONFIG_I2C_SLAVE)
> +static int aspeed_i2c_check_slave_error(u32 irq_status)
> +{
> +	if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
> +		return -EIO;
> +
> +	return 0;
> +}
> +
>  static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>  {
>  	u32 command, irq_handled = 0;
> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>  	if (!slave)
>  		return 0;
>  
> +	if (aspeed_i2c_check_slave_error(irq_status)) {
> +		dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
> +			irq_status);
> +		irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
> +		bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
> +		return irq_handled;
> +	}
> +
>  	command = readl(bus->base + ASPEED_I2C_CMD_REG);
>  
>  	/* Slave was requested, restart state machine. */
> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>  	}
>  }
>  
> -static int aspeed_i2c_is_irq_error(u32 irq_status)
> +static int aspeed_i2c_check_master_error(u32 irq_status)
>  {
>  	if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>  		return -EAGAIN;
> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>  	 * should clear the command queue effectively taking us back to the
>  	 * INACTIVE state.
>  	 */
> -	ret = aspeed_i2c_is_irq_error(irq_status);
> +	ret = aspeed_i2c_check_master_error(irq_status);
>  	if (ret) {
> -		dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
> +		dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>  			irq_status);
>  		irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>  		if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>  /* precondition: bus.lock has been acquired. */
>  static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>  {
> +	u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>  	u32 divisor, clk_reg_val;
>  
>  	divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>  			ASPEED_I2CD_TIME_THDSTA_MASK |
>  			ASPEED_I2CD_TIME_TACST_MASK);
>  	clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
> +
> +	if (bus->hw_timeout_ms) {
> +		u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
> +			     ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
> +		u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
> +				ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
> +
> +		timeout_base_divisor = 0;
> +
> +		do {
> +			timeout_tick_us = 1000 * (16384 <<
> +						  (timeout_base_divisor << 1)) /
> +					  (bus->parent_clk_frequency / 1000);
> +
> +			if (timeout_base_divisor == div_max ||
> +			    timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
> +			    bus->hw_timeout_ms * 1000)
> +				break;
> +		} while (timeout_base_divisor++ < div_max);
> +
> +		if (timeout_tick_us) {
> +			timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
> +						      timeout_tick_us);
> +			if (timeout_cycles == 0)
> +				timeout_cycles = 1;
> +			else if (timeout_cycles > cycles_max)
> +				timeout_cycles = cycles_max;
> +		} else {
> +			timeout_cycles = 0;
> +		}
> +	} else {
> +		timeout_base_divisor = 0;
> +		timeout_cycles = 0;
> +	}
> +
> +	clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
> +				  timeout_base_divisor);
> +
>  	writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
> -	writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
> +	writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>  
>  	return 0;
>  }
> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>  		}
>  	}
>  
> +	device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
> +				 &bus->hw_timeout_ms);
> +
>  	/* Initialize the I2C adapter */
>  	spin_lock_init(&bus->lock);
>  	init_completion(&bus->cmd_complete);
>
Jae Hyun Yoo Sept. 5, 2019, 10:48 p.m. UTC | #3
Hi Tao,

On 9/5/2019 3:28 PM, Tao Ren wrote:
> Hi Jae,
> 
> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>> In case of multi-master environment, if a peer master incorrectly handles
>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>> and it can't escape from the slave state, so this commit adds slave
>> inactive timeout support to recover the bus in the case.
>>
>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
> 
> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
> 
> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.

Basic concept of this implementation is setting the slave state of
driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
aspeed_i2c_reset() directly from interrupt context. Instead, when a
master xfer happens after that, it will try bus recovery
through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
if needed.

If this patch doesn't work in your case, test it again after adding
one line code into this driver. See below.

> 
>> ---
>>   drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>>   1 file changed, 73 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>> index 89317929bee4..92e1a249b393 100644
>> --- a/drivers/i2c/busses/i2c-aspeed.c
>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>> @@ -70,10 +70,14 @@
>>   #define ASPEED_I2CD_TIME_SCL_HIGH_MASK			GENMASK(19, 16)
>>   #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT			12
>>   #define ASPEED_I2CD_TIME_SCL_LOW_MASK			GENMASK(15, 12)
>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT	8
>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK	GENMASK(9, 8)
>>   #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK		GENMASK(3, 0)
>>   #define ASPEED_I2CD_TIME_SCL_REG_MAX			GENMASK(3, 0)
>> +
>>   /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
>> -#define ASPEED_NO_TIMEOUT_CTRL				0
>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT		0
>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK			GENMASK(4, 0)
>>   
>>   /* 0x0c : I2CD Interrupt Control Register &
>>    * 0x10 : I2CD Interrupt Status Register
>> @@ -81,6 +85,7 @@
>>    * These share bit definitions, so use the same values for the enable &
>>    * status bits.
>>    */
>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT		BIT(15)
>>   #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT			BIT(14)
>>   #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE		BIT(13)
>>   #define ASPEED_I2CD_INTR_SLAVE_MATCH			BIT(7)
>> @@ -96,8 +101,11 @@
>>   		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
>>   		 ASPEED_I2CD_INTR_ABNORMAL |				       \
>>   		 ASPEED_I2CD_INTR_ARBIT_LOSS)
>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS					       \
>> +		ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>>   #define ASPEED_I2CD_INTR_ALL						       \
>> -		(ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
>> +		(ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |		       \
>> +		 ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
>>   		 ASPEED_I2CD_INTR_BUS_RECOVER_DONE |			       \
>>   		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
>>   		 ASPEED_I2CD_INTR_ABNORMAL |				       \
>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>>   							   u32 divisor);
>>   	unsigned long			parent_clk_frequency;
>>   	u32				bus_frequency;
>> +	u32				hw_timeout_ms;
>>   	/* Transaction state. */
>>   	enum aspeed_i2c_master_state	master_state;
>>   	struct i2c_msg			*msgs;
>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>   }
>>   
>>   #if IS_ENABLED(CONFIG_I2C_SLAVE)
>> +static int aspeed_i2c_check_slave_error(u32 irq_status)
>> +{
>> +	if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
>> +		return -EIO;
>> +
>> +	return 0;
>> +}
>> +
>>   static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>   {
>>   	u32 command, irq_handled = 0;
>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>   	if (!slave)
>>   		return 0;
>>   
>> +	if (aspeed_i2c_check_slave_error(irq_status)) {
>> +		dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
>> +			irq_status);
>> +		irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
>> +		bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;

                 aspeed_i2c_reset(bus);

I didn't add it in this patch because I wanted to avoid calling of this
reset function from interrupt context but give it a try.

Thanks,
Jae

>> +		return irq_handled;
>> +	}
>> +
>>   	command = readl(bus->base + ASPEED_I2C_CMD_REG);
>>   
>>   	/* Slave was requested, restart state machine. */
>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>>   	}
>>   }
>>   
>> -static int aspeed_i2c_is_irq_error(u32 irq_status)
>> +static int aspeed_i2c_check_master_error(u32 irq_status)
>>   {
>>   	if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>>   		return -EAGAIN;
>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>   	 * should clear the command queue effectively taking us back to the
>>   	 * INACTIVE state.
>>   	 */
>> -	ret = aspeed_i2c_is_irq_error(irq_status);
>> +	ret = aspeed_i2c_check_master_error(irq_status);
>>   	if (ret) {
>> -		dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>> +		dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>>   			irq_status);
>>   		irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>>   		if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>>   /* precondition: bus.lock has been acquired. */
>>   static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>   {
>> +	u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>>   	u32 divisor, clk_reg_val;
>>   
>>   	divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>   			ASPEED_I2CD_TIME_THDSTA_MASK |
>>   			ASPEED_I2CD_TIME_TACST_MASK);
>>   	clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
>> +
>> +	if (bus->hw_timeout_ms) {
>> +		u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
>> +			     ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
>> +		u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
>> +				ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
>> +
>> +		timeout_base_divisor = 0;
>> +
>> +		do {
>> +			timeout_tick_us = 1000 * (16384 <<
>> +						  (timeout_base_divisor << 1)) /
>> +					  (bus->parent_clk_frequency / 1000);
>> +
>> +			if (timeout_base_divisor == div_max ||
>> +			    timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
>> +			    bus->hw_timeout_ms * 1000)
>> +				break;
>> +		} while (timeout_base_divisor++ < div_max);
>> +
>> +		if (timeout_tick_us) {
>> +			timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
>> +						      timeout_tick_us);
>> +			if (timeout_cycles == 0)
>> +				timeout_cycles = 1;
>> +			else if (timeout_cycles > cycles_max)
>> +				timeout_cycles = cycles_max;
>> +		} else {
>> +			timeout_cycles = 0;
>> +		}
>> +	} else {
>> +		timeout_base_divisor = 0;
>> +		timeout_cycles = 0;
>> +	}
>> +
>> +	clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
>> +				  timeout_base_divisor);
>> +
>>   	writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
>> -	writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>> +	writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>   
>>   	return 0;
>>   }
>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>   		}
>>   	}
>>   
>> +	device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
>> +				 &bus->hw_timeout_ms);
>> +
>>   	/* Initialize the I2C adapter */
>>   	spin_lock_init(&bus->lock);
>>   	init_completion(&bus->cmd_complete);
>>
Tao Ren Sept. 5, 2019, 11:19 p.m. UTC | #4
On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
> Hi Tao,
> 
> On 9/5/2019 3:28 PM, Tao Ren wrote:
>> Hi Jae,
>>
>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>> In case of multi-master environment, if a peer master incorrectly handles
>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>> and it can't escape from the slave state, so this commit adds slave
>>> inactive timeout support to recover the bus in the case.
>>>
>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>
>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>
>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
> 
> Basic concept of this implementation is setting the slave state of
> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
> aspeed_i2c_reset() directly from interrupt context. Instead, when a
> master xfer happens after that, it will try bus recovery
> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
> if needed.
> 
> If this patch doesn't work in your case, test it again after adding
> one line code into this driver. See below.

If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:

1) we don't know when userspace starts next master transfer.
2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).


Thanks,

Tao

> 
>>
>>> ---
>>>   drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>>>   1 file changed, 73 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>>> index 89317929bee4..92e1a249b393 100644
>>> --- a/drivers/i2c/busses/i2c-aspeed.c
>>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>>> @@ -70,10 +70,14 @@
>>>   #define ASPEED_I2CD_TIME_SCL_HIGH_MASK            GENMASK(19, 16)
>>>   #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT            12
>>>   #define ASPEED_I2CD_TIME_SCL_LOW_MASK            GENMASK(15, 12)
>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT    8
>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK    GENMASK(9, 8)
>>>   #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK        GENMASK(3, 0)
>>>   #define ASPEED_I2CD_TIME_SCL_REG_MAX            GENMASK(3, 0)
>>> +
>>>   /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
>>> -#define ASPEED_NO_TIMEOUT_CTRL                0
>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT        0
>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK            GENMASK(4, 0)
>>>     /* 0x0c : I2CD Interrupt Control Register &
>>>    * 0x10 : I2CD Interrupt Status Register
>>> @@ -81,6 +85,7 @@
>>>    * These share bit definitions, so use the same values for the enable &
>>>    * status bits.
>>>    */
>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT        BIT(15)
>>>   #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT            BIT(14)
>>>   #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE        BIT(13)
>>>   #define ASPEED_I2CD_INTR_SLAVE_MATCH            BIT(7)
>>> @@ -96,8 +101,11 @@
>>>            ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>            ASPEED_I2CD_INTR_ABNORMAL |                       \
>>>            ASPEED_I2CD_INTR_ARBIT_LOSS)
>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS                           \
>>> +        ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>>>   #define ASPEED_I2CD_INTR_ALL                               \
>>> -        (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>> +        (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |               \
>>> +         ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>>            ASPEED_I2CD_INTR_BUS_RECOVER_DONE |                   \
>>>            ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>            ASPEED_I2CD_INTR_ABNORMAL |                       \
>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>>>                                  u32 divisor);
>>>       unsigned long            parent_clk_frequency;
>>>       u32                bus_frequency;
>>> +    u32                hw_timeout_ms;
>>>       /* Transaction state. */
>>>       enum aspeed_i2c_master_state    master_state;
>>>       struct i2c_msg            *msgs;
>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>>   }
>>>     #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>> +static int aspeed_i2c_check_slave_error(u32 irq_status)
>>> +{
>>> +    if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
>>> +        return -EIO;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>   {
>>>       u32 command, irq_handled = 0;
>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>       if (!slave)
>>>           return 0;
>>>   +    if (aspeed_i2c_check_slave_error(irq_status)) {
>>> +        dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
>>> +            irq_status);
>>> +        irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
>>> +        bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
> 
>                 aspeed_i2c_reset(bus);
> 
> I didn't add it in this patch because I wanted to avoid calling of this
> reset function from interrupt context but give it a try.
> 
> Thanks,
> Jae

I believe this will solve my problem, but let me test it and will share you results later.

>>> +        return irq_handled;
>>> +    }
>>> +
>>>       command = readl(bus->base + ASPEED_I2C_CMD_REG);
>>>         /* Slave was requested, restart state machine. */
>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>>>       }
>>>   }
>>>   -static int aspeed_i2c_is_irq_error(u32 irq_status)
>>> +static int aspeed_i2c_check_master_error(u32 irq_status)
>>>   {
>>>       if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>           return -EAGAIN;
>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>        * should clear the command queue effectively taking us back to the
>>>        * INACTIVE state.
>>>        */
>>> -    ret = aspeed_i2c_is_irq_error(irq_status);
>>> +    ret = aspeed_i2c_check_master_error(irq_status);
>>>       if (ret) {
>>> -        dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>>> +        dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>>>               irq_status);
>>>           irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>>>           if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>>>   /* precondition: bus.lock has been acquired. */
>>>   static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>   {
>>> +    u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>>>       u32 divisor, clk_reg_val;
>>>         divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>               ASPEED_I2CD_TIME_THDSTA_MASK |
>>>               ASPEED_I2CD_TIME_TACST_MASK);
>>>       clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
>>> +
>>> +    if (bus->hw_timeout_ms) {
>>> +        u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
>>> +                 ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
>>> +        u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
>>> +                ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
>>> +
>>> +        timeout_base_divisor = 0;
>>> +
>>> +        do {
>>> +            timeout_tick_us = 1000 * (16384 <<
>>> +                          (timeout_base_divisor << 1)) /
>>> +                      (bus->parent_clk_frequency / 1000);
>>> +
>>> +            if (timeout_base_divisor == div_max ||
>>> +                timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
>>> +                bus->hw_timeout_ms * 1000)
>>> +                break;
>>> +        } while (timeout_base_divisor++ < div_max);
>>> +
>>> +        if (timeout_tick_us) {
>>> +            timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
>>> +                              timeout_tick_us);
>>> +            if (timeout_cycles == 0)
>>> +                timeout_cycles = 1;
>>> +            else if (timeout_cycles > cycles_max)
>>> +                timeout_cycles = cycles_max;
>>> +        } else {
>>> +            timeout_cycles = 0;
>>> +        }
>>> +    } else {
>>> +        timeout_base_divisor = 0;
>>> +        timeout_cycles = 0;
>>> +    }
>>> +
>>> +    clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
>>> +                  timeout_base_divisor);
>>> +
>>>       writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
>>> -    writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>> +    writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>         return 0;
>>>   }
>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>>           }
>>>       }
>>>   +    device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
>>> +                 &bus->hw_timeout_ms);
>>> +
>>>       /* Initialize the I2C adapter */
>>>       spin_lock_init(&bus->lock);
>>>       init_completion(&bus->cmd_complete);
>>>
Jae Hyun Yoo Sept. 5, 2019, 11:35 p.m. UTC | #5
On 9/5/2019 4:19 PM, Tao Ren wrote:
> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
>> Hi Tao,
>>
>> On 9/5/2019 3:28 PM, Tao Ren wrote:
>>> Hi Jae,
>>>
>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>>> In case of multi-master environment, if a peer master incorrectly handles
>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>>> and it can't escape from the slave state, so this commit adds slave
>>>> inactive timeout support to recover the bus in the case.
>>>>
>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>>
>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>>
>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
>>
>> Basic concept of this implementation is setting the slave state of
>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
>> aspeed_i2c_reset() directly from interrupt context. Instead, when a
>> master xfer happens after that, it will try bus recovery
>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
>> if needed.
>>
>> If this patch doesn't work in your case, test it again after adding
>> one line code into this driver. See below.
> 
> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:
> 
> 1) we don't know when userspace starts next master transfer.
> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).
> 

Oh, so to support the case as well, probably we need to add a flag for
indicating recovery needs when a master xfer comes then it could
forcibly recover and reset the bus even if the bus is idle. Can you
please test that with making code changes? Unfortunately, I can't
reproduce the case in my system.

>>
>>>
>>>> ---
>>>>    drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>>>>    1 file changed, 73 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>>>> index 89317929bee4..92e1a249b393 100644
>>>> --- a/drivers/i2c/busses/i2c-aspeed.c
>>>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>>>> @@ -70,10 +70,14 @@
>>>>    #define ASPEED_I2CD_TIME_SCL_HIGH_MASK            GENMASK(19, 16)
>>>>    #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT            12
>>>>    #define ASPEED_I2CD_TIME_SCL_LOW_MASK            GENMASK(15, 12)
>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT    8
>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK    GENMASK(9, 8)
>>>>    #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK        GENMASK(3, 0)
>>>>    #define ASPEED_I2CD_TIME_SCL_REG_MAX            GENMASK(3, 0)
>>>> +
>>>>    /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
>>>> -#define ASPEED_NO_TIMEOUT_CTRL                0
>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT        0
>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK            GENMASK(4, 0)
>>>>      /* 0x0c : I2CD Interrupt Control Register &
>>>>     * 0x10 : I2CD Interrupt Status Register
>>>> @@ -81,6 +85,7 @@
>>>>     * These share bit definitions, so use the same values for the enable &
>>>>     * status bits.
>>>>     */
>>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT        BIT(15)
>>>>    #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT            BIT(14)
>>>>    #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE        BIT(13)
>>>>    #define ASPEED_I2CD_INTR_SLAVE_MATCH            BIT(7)
>>>> @@ -96,8 +101,11 @@
>>>>             ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>>             ASPEED_I2CD_INTR_ABNORMAL |                       \
>>>>             ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS                           \
>>>> +        ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>>>>    #define ASPEED_I2CD_INTR_ALL                               \
>>>> -        (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>>> +        (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |               \
>>>> +         ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>>>             ASPEED_I2CD_INTR_BUS_RECOVER_DONE |                   \
>>>>             ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>>             ASPEED_I2CD_INTR_ABNORMAL |                       \
>>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>>>>                                   u32 divisor);
>>>>        unsigned long            parent_clk_frequency;
>>>>        u32                bus_frequency;
>>>> +    u32                hw_timeout_ms;
>>>>        /* Transaction state. */
>>>>        enum aspeed_i2c_master_state    master_state;
>>>>        struct i2c_msg            *msgs;
>>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>>>    }
>>>>      #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>>> +static int aspeed_i2c_check_slave_error(u32 irq_status)
>>>> +{
>>>> +    if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
>>>> +        return -EIO;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>    static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>    {
>>>>        u32 command, irq_handled = 0;
>>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>        if (!slave)
>>>>            return 0;
>>>>    +    if (aspeed_i2c_check_slave_error(irq_status)) {
>>>> +        dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
>>>> +            irq_status);
>>>> +        irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
>>>> +        bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
>>
>>                  aspeed_i2c_reset(bus);
>>
>> I didn't add it in this patch because I wanted to avoid calling of this
>> reset function from interrupt context but give it a try.
>>
>> Thanks,
>> Jae
> 
> I believe this will solve my problem, but let me test it and will share you results later.

Please share this result too. It would be useful to complete making this
patch.

Thanks,
Jae

>>>> +        return irq_handled;
>>>> +    }
>>>> +
>>>>        command = readl(bus->base + ASPEED_I2C_CMD_REG);
>>>>          /* Slave was requested, restart state machine. */
>>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>>>>        }
>>>>    }
>>>>    -static int aspeed_i2c_is_irq_error(u32 irq_status)
>>>> +static int aspeed_i2c_check_master_error(u32 irq_status)
>>>>    {
>>>>        if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>>            return -EAGAIN;
>>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>         * should clear the command queue effectively taking us back to the
>>>>         * INACTIVE state.
>>>>         */
>>>> -    ret = aspeed_i2c_is_irq_error(irq_status);
>>>> +    ret = aspeed_i2c_check_master_error(irq_status);
>>>>        if (ret) {
>>>> -        dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>>>> +        dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>>>>                irq_status);
>>>>            irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>>>>            if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>>>>    /* precondition: bus.lock has been acquired. */
>>>>    static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>>    {
>>>> +    u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>>>>        u32 divisor, clk_reg_val;
>>>>          divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
>>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>>                ASPEED_I2CD_TIME_THDSTA_MASK |
>>>>                ASPEED_I2CD_TIME_TACST_MASK);
>>>>        clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
>>>> +
>>>> +    if (bus->hw_timeout_ms) {
>>>> +        u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
>>>> +                 ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
>>>> +        u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
>>>> +                ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
>>>> +
>>>> +        timeout_base_divisor = 0;
>>>> +
>>>> +        do {
>>>> +            timeout_tick_us = 1000 * (16384 <<
>>>> +                          (timeout_base_divisor << 1)) /
>>>> +                      (bus->parent_clk_frequency / 1000);
>>>> +
>>>> +            if (timeout_base_divisor == div_max ||
>>>> +                timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
>>>> +                bus->hw_timeout_ms * 1000)
>>>> +                break;
>>>> +        } while (timeout_base_divisor++ < div_max);
>>>> +
>>>> +        if (timeout_tick_us) {
>>>> +            timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
>>>> +                              timeout_tick_us);
>>>> +            if (timeout_cycles == 0)
>>>> +                timeout_cycles = 1;
>>>> +            else if (timeout_cycles > cycles_max)
>>>> +                timeout_cycles = cycles_max;
>>>> +        } else {
>>>> +            timeout_cycles = 0;
>>>> +        }
>>>> +    } else {
>>>> +        timeout_base_divisor = 0;
>>>> +        timeout_cycles = 0;
>>>> +    }
>>>> +
>>>> +    clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
>>>> +                  timeout_base_divisor);
>>>> +
>>>>        writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
>>>> -    writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>> +    writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>>          return 0;
>>>>    }
>>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>>>            }
>>>>        }
>>>>    +    device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
>>>> +                 &bus->hw_timeout_ms);
>>>> +
>>>>        /* Initialize the I2C adapter */
>>>>        spin_lock_init(&bus->lock);
>>>>        init_completion(&bus->cmd_complete);
>>>>
Tao Ren Sept. 6, 2019, 1:16 a.m. UTC | #6
Hi Jae,

On 9/5/19 4:35 PM, Jae Hyun Yoo wrote:
> On 9/5/2019 4:19 PM, Tao Ren wrote:
>> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
>>> Hi Tao,
>>>
>>> On 9/5/2019 3:28 PM, Tao Ren wrote:
>>>> Hi Jae,
>>>>
>>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>>>> In case of multi-master environment, if a peer master incorrectly handles
>>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>>>> and it can't escape from the slave state, so this commit adds slave
>>>>> inactive timeout support to recover the bus in the case.
>>>>>
>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>>>
>>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>>>
>>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
>>>
>>> Basic concept of this implementation is setting the slave state of
>>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
>>> aspeed_i2c_reset() directly from interrupt context. Instead, when a
>>> master xfer happens after that, it will try bus recovery
>>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
>>> if needed.
>>>
>>> If this patch doesn't work in your case, test it again after adding
>>> one line code into this driver. See below.
>>
>> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:
>>
>> 1) we don't know when userspace starts next master transfer.
>> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).
>>
> 
> Oh, so to support the case as well, probably we need to add a flag for
> indicating recovery needs when a master xfer comes then it could
> forcibly recover and reset the bus even if the bus is idle. Can you
> please test that with making code changes? Unfortunately, I can't
> reproduce the case in my system.

Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed?

>>>
>>>>
>>>>> ---
>>>>>    drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>>>>>    1 file changed, 73 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>>>>> index 89317929bee4..92e1a249b393 100644
>>>>> --- a/drivers/i2c/busses/i2c-aspeed.c
>>>>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>>>>> @@ -70,10 +70,14 @@
>>>>>    #define ASPEED_I2CD_TIME_SCL_HIGH_MASK            GENMASK(19, 16)
>>>>>    #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT            12
>>>>>    #define ASPEED_I2CD_TIME_SCL_LOW_MASK            GENMASK(15, 12)
>>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT    8
>>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK    GENMASK(9, 8)
>>>>>    #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK        GENMASK(3, 0)
>>>>>    #define ASPEED_I2CD_TIME_SCL_REG_MAX            GENMASK(3, 0)
>>>>> +
>>>>>    /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
>>>>> -#define ASPEED_NO_TIMEOUT_CTRL                0
>>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT        0
>>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK            GENMASK(4, 0)
>>>>>      /* 0x0c : I2CD Interrupt Control Register &
>>>>>     * 0x10 : I2CD Interrupt Status Register
>>>>> @@ -81,6 +85,7 @@
>>>>>     * These share bit definitions, so use the same values for the enable &
>>>>>     * status bits.
>>>>>     */
>>>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT        BIT(15)
>>>>>    #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT            BIT(14)
>>>>>    #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE        BIT(13)
>>>>>    #define ASPEED_I2CD_INTR_SLAVE_MATCH            BIT(7)
>>>>> @@ -96,8 +101,11 @@
>>>>>             ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>>>             ASPEED_I2CD_INTR_ABNORMAL |                       \
>>>>>             ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS                           \
>>>>> +        ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>>>>>    #define ASPEED_I2CD_INTR_ALL                               \
>>>>> -        (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>>>> +        (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |               \
>>>>> +         ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>>>>             ASPEED_I2CD_INTR_BUS_RECOVER_DONE |                   \
>>>>>             ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>>>             ASPEED_I2CD_INTR_ABNORMAL |                       \
>>>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>>>>>                                   u32 divisor);
>>>>>        unsigned long            parent_clk_frequency;
>>>>>        u32                bus_frequency;
>>>>> +    u32                hw_timeout_ms;
>>>>>        /* Transaction state. */
>>>>>        enum aspeed_i2c_master_state    master_state;
>>>>>        struct i2c_msg            *msgs;
>>>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>>>>    }
>>>>>      #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>>>> +static int aspeed_i2c_check_slave_error(u32 irq_status)
>>>>> +{
>>>>> +    if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
>>>>> +        return -EIO;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>>    static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>>    {
>>>>>        u32 command, irq_handled = 0;
>>>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>>        if (!slave)
>>>>>            return 0;
>>>>>    +    if (aspeed_i2c_check_slave_error(irq_status)) {
>>>>> +        dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
>>>>> +            irq_status);
>>>>> +        irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
>>>>> +        bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
>>>
>>>                  aspeed_i2c_reset(bus);
>>>
>>> I didn't add it in this patch because I wanted to avoid calling of this
>>> reset function from interrupt context but give it a try.
>>>
>>> Thanks,
>>> Jae
>>
>> I believe this will solve my problem, but let me test it and will share you results later.
> 
> Please share this result too. It would be useful to complete making this
> patch.
> 
> Thanks,
> Jae

Adding aspeed_i2c_reset(bus) does fix my problem. Thank you Jae.


Cheers,

Tao

>>>>> +        return irq_handled;
>>>>> +    }
>>>>> +
>>>>>        command = readl(bus->base + ASPEED_I2C_CMD_REG);
>>>>>          /* Slave was requested, restart state machine. */
>>>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>>>>>        }
>>>>>    }
>>>>>    -static int aspeed_i2c_is_irq_error(u32 irq_status)
>>>>> +static int aspeed_i2c_check_master_error(u32 irq_status)
>>>>>    {
>>>>>        if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>>>            return -EAGAIN;
>>>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>>         * should clear the command queue effectively taking us back to the
>>>>>         * INACTIVE state.
>>>>>         */
>>>>> -    ret = aspeed_i2c_is_irq_error(irq_status);
>>>>> +    ret = aspeed_i2c_check_master_error(irq_status);
>>>>>        if (ret) {
>>>>> -        dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>>>>> +        dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>>>>>                irq_status);
>>>>>            irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>>>>>            if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>>>>>    /* precondition: bus.lock has been acquired. */
>>>>>    static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>>>    {
>>>>> +    u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>>>>>        u32 divisor, clk_reg_val;
>>>>>          divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
>>>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>>>                ASPEED_I2CD_TIME_THDSTA_MASK |
>>>>>                ASPEED_I2CD_TIME_TACST_MASK);
>>>>>        clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
>>>>> +
>>>>> +    if (bus->hw_timeout_ms) {
>>>>> +        u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
>>>>> +                 ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
>>>>> +        u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
>>>>> +                ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
>>>>> +
>>>>> +        timeout_base_divisor = 0;
>>>>> +
>>>>> +        do {
>>>>> +            timeout_tick_us = 1000 * (16384 <<
>>>>> +                          (timeout_base_divisor << 1)) /
>>>>> +                      (bus->parent_clk_frequency / 1000);
>>>>> +
>>>>> +            if (timeout_base_divisor == div_max ||
>>>>> +                timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
>>>>> +                bus->hw_timeout_ms * 1000)
>>>>> +                break;
>>>>> +        } while (timeout_base_divisor++ < div_max);
>>>>> +
>>>>> +        if (timeout_tick_us) {
>>>>> +            timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
>>>>> +                              timeout_tick_us);
>>>>> +            if (timeout_cycles == 0)
>>>>> +                timeout_cycles = 1;
>>>>> +            else if (timeout_cycles > cycles_max)
>>>>> +                timeout_cycles = cycles_max;
>>>>> +        } else {
>>>>> +            timeout_cycles = 0;
>>>>> +        }
>>>>> +    } else {
>>>>> +        timeout_base_divisor = 0;
>>>>> +        timeout_cycles = 0;
>>>>> +    }
>>>>> +
>>>>> +    clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
>>>>> +                  timeout_base_divisor);
>>>>> +
>>>>>        writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
>>>>> -    writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>>> +    writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>>>          return 0;
>>>>>    }
>>>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>>>>            }
>>>>>        }
>>>>>    +    device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
>>>>> +                 &bus->hw_timeout_ms);
>>>>> +
>>>>>        /* Initialize the I2C adapter */
>>>>>        spin_lock_init(&bus->lock);
>>>>>        init_completion(&bus->cmd_complete);
>>>>>
Tao Ren Sept. 6, 2019, 1:56 a.m. UTC | #7
On 9/5/19 6:16 PM, Tao Ren wrote:
> Hi Jae,
> 
> On 9/5/19 4:35 PM, Jae Hyun Yoo wrote:
>> On 9/5/2019 4:19 PM, Tao Ren wrote:
>>> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
>>>> Hi Tao,
>>>>
>>>> On 9/5/2019 3:28 PM, Tao Ren wrote:
>>>>> Hi Jae,
>>>>>
>>>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>>>>> In case of multi-master environment, if a peer master incorrectly handles
>>>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>>>>> and it can't escape from the slave state, so this commit adds slave
>>>>>> inactive timeout support to recover the bus in the case.
>>>>>>
>>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>>>>
>>>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>>>>
>>>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
>>>>
>>>> Basic concept of this implementation is setting the slave state of
>>>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
>>>> aspeed_i2c_reset() directly from interrupt context. Instead, when a
>>>> master xfer happens after that, it will try bus recovery
>>>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
>>>> if needed.
>>>>
>>>> If this patch doesn't work in your case, test it again after adding
>>>> one line code into this driver. See below.
>>>
>>> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:
>>>
>>> 1) we don't know when userspace starts next master transfer.
>>> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).
>>>
>>
>> Oh, so to support the case as well, probably we need to add a flag for
>> indicating recovery needs when a master xfer comes then it could
>> forcibly recover and reset the bus even if the bus is idle. Can you
>> please test that with making code changes? Unfortunately, I can't
>> reproduce the case in my system.
> 
> Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed?

One of my colleagues reminded me to enable bus auto release (I2CD00[17]) which could avoid explicit bus reset in interrupt handler.

Let me try and will update back soon.


Thanks,

Tao

>>>>
>>>>>
>>>>>> ---
>>>>>>    drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++---
>>>>>>    1 file changed, 73 insertions(+), 6 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>>>>>> index 89317929bee4..92e1a249b393 100644
>>>>>> --- a/drivers/i2c/busses/i2c-aspeed.c
>>>>>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>>>>>> @@ -70,10 +70,14 @@
>>>>>>    #define ASPEED_I2CD_TIME_SCL_HIGH_MASK            GENMASK(19, 16)
>>>>>>    #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT            12
>>>>>>    #define ASPEED_I2CD_TIME_SCL_LOW_MASK            GENMASK(15, 12)
>>>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT    8
>>>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK    GENMASK(9, 8)
>>>>>>    #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK        GENMASK(3, 0)
>>>>>>    #define ASPEED_I2CD_TIME_SCL_REG_MAX            GENMASK(3, 0)
>>>>>> +
>>>>>>    /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
>>>>>> -#define ASPEED_NO_TIMEOUT_CTRL                0
>>>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT        0
>>>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK            GENMASK(4, 0)
>>>>>>      /* 0x0c : I2CD Interrupt Control Register &
>>>>>>     * 0x10 : I2CD Interrupt Status Register
>>>>>> @@ -81,6 +85,7 @@
>>>>>>     * These share bit definitions, so use the same values for the enable &
>>>>>>     * status bits.
>>>>>>     */
>>>>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT        BIT(15)
>>>>>>    #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT            BIT(14)
>>>>>>    #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE        BIT(13)
>>>>>>    #define ASPEED_I2CD_INTR_SLAVE_MATCH            BIT(7)
>>>>>> @@ -96,8 +101,11 @@
>>>>>>             ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>>>>             ASPEED_I2CD_INTR_ABNORMAL |                       \
>>>>>>             ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS                           \
>>>>>> +        ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
>>>>>>    #define ASPEED_I2CD_INTR_ALL                               \
>>>>>> -        (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>>>>> +        (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |               \
>>>>>> +         ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                   \
>>>>>>             ASPEED_I2CD_INTR_BUS_RECOVER_DONE |                   \
>>>>>>             ASPEED_I2CD_INTR_SCL_TIMEOUT |                       \
>>>>>>             ASPEED_I2CD_INTR_ABNORMAL |                       \
>>>>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus {
>>>>>>                                   u32 divisor);
>>>>>>        unsigned long            parent_clk_frequency;
>>>>>>        u32                bus_frequency;
>>>>>> +    u32                hw_timeout_ms;
>>>>>>        /* Transaction state. */
>>>>>>        enum aspeed_i2c_master_state    master_state;
>>>>>>        struct i2c_msg            *msgs;
>>>>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>>>>>    }
>>>>>>      #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>>>>> +static int aspeed_i2c_check_slave_error(u32 irq_status)
>>>>>> +{
>>>>>> +    if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
>>>>>> +        return -EIO;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>>    static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>>>    {
>>>>>>        u32 command, irq_handled = 0;
>>>>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>>>        if (!slave)
>>>>>>            return 0;
>>>>>>    +    if (aspeed_i2c_check_slave_error(irq_status)) {
>>>>>> +        dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
>>>>>> +            irq_status);
>>>>>> +        irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
>>>>>> +        bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
>>>>
>>>>                  aspeed_i2c_reset(bus);
>>>>
>>>> I didn't add it in this patch because I wanted to avoid calling of this
>>>> reset function from interrupt context but give it a try.
>>>>
>>>> Thanks,
>>>> Jae
>>>
>>> I believe this will solve my problem, but let me test it and will share you results later.
>>
>> Please share this result too. It would be useful to complete making this
>> patch.
>>
>> Thanks,
>> Jae
> 
> Adding aspeed_i2c_reset(bus) does fix my problem. Thank you Jae.
> 
> 
> Cheers,
> 
> Tao
> 
>>>>>> +        return irq_handled;
>>>>>> +    }
>>>>>> +
>>>>>>        command = readl(bus->base + ASPEED_I2C_CMD_REG);
>>>>>>          /* Slave was requested, restart state machine. */
>>>>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
>>>>>>        }
>>>>>>    }
>>>>>>    -static int aspeed_i2c_is_irq_error(u32 irq_status)
>>>>>> +static int aspeed_i2c_check_master_error(u32 irq_status)
>>>>>>    {
>>>>>>        if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
>>>>>>            return -EAGAIN;
>>>>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
>>>>>>         * should clear the command queue effectively taking us back to the
>>>>>>         * INACTIVE state.
>>>>>>         */
>>>>>> -    ret = aspeed_i2c_is_irq_error(irq_status);
>>>>>> +    ret = aspeed_i2c_check_master_error(irq_status);
>>>>>>        if (ret) {
>>>>>> -        dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>>>>>> +        dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
>>>>>>                irq_status);
>>>>>>            irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
>>>>>>            if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>>>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
>>>>>>    /* precondition: bus.lock has been acquired. */
>>>>>>    static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>>>>    {
>>>>>> +    u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
>>>>>>        u32 divisor, clk_reg_val;
>>>>>>          divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
>>>>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
>>>>>>                ASPEED_I2CD_TIME_THDSTA_MASK |
>>>>>>                ASPEED_I2CD_TIME_TACST_MASK);
>>>>>>        clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
>>>>>> +
>>>>>> +    if (bus->hw_timeout_ms) {
>>>>>> +        u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
>>>>>> +                 ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
>>>>>> +        u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
>>>>>> +                ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
>>>>>> +
>>>>>> +        timeout_base_divisor = 0;
>>>>>> +
>>>>>> +        do {
>>>>>> +            timeout_tick_us = 1000 * (16384 <<
>>>>>> +                          (timeout_base_divisor << 1)) /
>>>>>> +                      (bus->parent_clk_frequency / 1000);
>>>>>> +
>>>>>> +            if (timeout_base_divisor == div_max ||
>>>>>> +                timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
>>>>>> +                bus->hw_timeout_ms * 1000)
>>>>>> +                break;
>>>>>> +        } while (timeout_base_divisor++ < div_max);
>>>>>> +
>>>>>> +        if (timeout_tick_us) {
>>>>>> +            timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
>>>>>> +                              timeout_tick_us);
>>>>>> +            if (timeout_cycles == 0)
>>>>>> +                timeout_cycles = 1;
>>>>>> +            else if (timeout_cycles > cycles_max)
>>>>>> +                timeout_cycles = cycles_max;
>>>>>> +        } else {
>>>>>> +            timeout_cycles = 0;
>>>>>> +        }
>>>>>> +    } else {
>>>>>> +        timeout_base_divisor = 0;
>>>>>> +        timeout_cycles = 0;
>>>>>> +    }
>>>>>> +
>>>>>> +    clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
>>>>>> +                  timeout_base_divisor);
>>>>>> +
>>>>>>        writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
>>>>>> -    writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>>>> +    writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
>>>>>>          return 0;
>>>>>>    }
>>>>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>>>>>            }
>>>>>>        }
>>>>>>    +    device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
>>>>>> +                 &bus->hw_timeout_ms);
>>>>>> +
>>>>>>        /* Initialize the I2C adapter */
>>>>>>        spin_lock_init(&bus->lock);
>>>>>>        init_completion(&bus->cmd_complete);
>>>>>>
Tao Ren Sept. 6, 2019, 2:35 a.m. UTC | #8
Hi Jae,

On 9/5/19 6:56 PM, Tao Ren wrote:
> On 9/5/19 6:16 PM, Tao Ren wrote:
>> Hi Jae,
>>
>> On 9/5/19 4:35 PM, Jae Hyun Yoo wrote:
>>> On 9/5/2019 4:19 PM, Tao Ren wrote:
>>>> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
>>>>> Hi Tao,
>>>>>
>>>>> On 9/5/2019 3:28 PM, Tao Ren wrote:
>>>>>> Hi Jae,
>>>>>>
>>>>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>>>>>> In case of multi-master environment, if a peer master incorrectly handles
>>>>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>>>>>> and it can't escape from the slave state, so this commit adds slave
>>>>>>> inactive timeout support to recover the bus in the case.
>>>>>>>
>>>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>>>>>
>>>>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>>>>>
>>>>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
>>>>>
>>>>> Basic concept of this implementation is setting the slave state of
>>>>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
>>>>> aspeed_i2c_reset() directly from interrupt context. Instead, when a
>>>>> master xfer happens after that, it will try bus recovery
>>>>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
>>>>> if needed.
>>>>>
>>>>> If this patch doesn't work in your case, test it again after adding
>>>>> one line code into this driver. See below.
>>>>
>>>> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:
>>>>
>>>> 1) we don't know when userspace starts next master transfer.
>>>> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).
>>>>
>>>
>>> Oh, so to support the case as well, probably we need to add a flag for
>>> indicating recovery needs when a master xfer comes then it could
>>> forcibly recover and reset the bus even if the bus is idle. Can you
>>> please test that with making code changes? Unfortunately, I can't
>>> reproduce the case in my system.
>>
>> Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed?
> 
> One of my colleagues reminded me to enable bus auto release (I2CD00[17]) which could avoid explicit bus reset in interrupt handler.
> 
> Let me try and will update back soon.

Let me summarize my testing result on Minipack BMC:

BMC i2c controller cannot exit slave state by enabling slave_inactive_timeout interrupt only: in this case, i2c controller stays in SRXD state and slave_inactive_timeout interrupt will be generated again and again until bus is explicitly reset.

I've tried below 2 approaches and both works on my platform:
  - calling aspeed_i2c_reset() in interrupt handler when slave_inacive_timeout interrupt is delivered.
  - enabling bus auto release feature (I2CD00[17]) in probe function.


Cheers,

Tao
Jae Hyun Yoo Sept. 6, 2019, 4:20 p.m. UTC | #9
Hi Tao,

On 9/5/2019 7:35 PM, Tao Ren wrote:
> Hi Jae,
> 
> On 9/5/19 6:56 PM, Tao Ren wrote:
>> On 9/5/19 6:16 PM, Tao Ren wrote:
>>> Hi Jae,
>>>
>>> On 9/5/19 4:35 PM, Jae Hyun Yoo wrote:
>>>> On 9/5/2019 4:19 PM, Tao Ren wrote:
>>>>> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote:
>>>>>> Hi Tao,
>>>>>>
>>>>>> On 9/5/2019 3:28 PM, Tao Ren wrote:
>>>>>>> Hi Jae,
>>>>>>>
>>>>>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote:
>>>>>>>> In case of multi-master environment, if a peer master incorrectly handles
>>>>>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state
>>>>>>>> and it can't escape from the slave state, so this commit adds slave
>>>>>>>> inactive timeout support to recover the bus in the case.
>>>>>>>>
>>>>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>>>>>>
>>>>>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state?
>>>>>>>
>>>>>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again.
>>>>>>
>>>>>> Basic concept of this implementation is setting the slave state of
>>>>>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of
>>>>>> aspeed_i2c_reset() directly from interrupt context. Instead, when a
>>>>>> master xfer happens after that, it will try bus recovery
>>>>>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset()
>>>>>> if needed.
>>>>>>
>>>>>> If this patch doesn't work in your case, test it again after adding
>>>>>> one line code into this driver. See below.
>>>>>
>>>>> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being:
>>>>>
>>>>> 1) we don't know when userspace starts next master transfer.
>>>>> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0).
>>>>>
>>>>
>>>> Oh, so to support the case as well, probably we need to add a flag for
>>>> indicating recovery needs when a master xfer comes then it could
>>>> forcibly recover and reset the bus even if the bus is idle. Can you
>>>> please test that with making code changes? Unfortunately, I can't
>>>> reproduce the case in my system.
>>>
>>> Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed?
>>
>> One of my colleagues reminded me to enable bus auto release (I2CD00[17]) which could avoid explicit bus reset in interrupt handler.
>>
>> Let me try and will update back soon.
> 
> Let me summarize my testing result on Minipack BMC:
> 
> BMC i2c controller cannot exit slave state by enabling slave_inactive_timeout interrupt only: in this case, i2c controller stays in SRXD state and slave_inactive_timeout interrupt will be generated again and again until bus is explicitly reset.
> 
> I've tried below 2 approaches and both works on my platform:
>    - calling aspeed_i2c_reset() in interrupt handler when slave_inacive_timeout interrupt is delivered.
>    - enabling bus auto release feature (I2CD00[17]) in probe function.

Thanks a lot for your testing result and suggestions. It seems that the
latter way - enabling bus auto release would be better for this
implementation.

I'll submit a next spin today.

Cheers,
Jae
diff mbox series

Patch

diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 89317929bee4..92e1a249b393 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -70,10 +70,14 @@ 
 #define ASPEED_I2CD_TIME_SCL_HIGH_MASK			GENMASK(19, 16)
 #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT			12
 #define ASPEED_I2CD_TIME_SCL_LOW_MASK			GENMASK(15, 12)
+#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT	8
+#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK	GENMASK(9, 8)
 #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK		GENMASK(3, 0)
 #define ASPEED_I2CD_TIME_SCL_REG_MAX			GENMASK(3, 0)
+
 /* 0x08 : I2CD Clock and AC Timing Control Register #2 */
-#define ASPEED_NO_TIMEOUT_CTRL				0
+#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT		0
+#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK			GENMASK(4, 0)
 
 /* 0x0c : I2CD Interrupt Control Register &
  * 0x10 : I2CD Interrupt Status Register
@@ -81,6 +85,7 @@ 
  * These share bit definitions, so use the same values for the enable &
  * status bits.
  */
+#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT		BIT(15)
 #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT			BIT(14)
 #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE		BIT(13)
 #define ASPEED_I2CD_INTR_SLAVE_MATCH			BIT(7)
@@ -96,8 +101,11 @@ 
 		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
 		 ASPEED_I2CD_INTR_ABNORMAL |				       \
 		 ASPEED_I2CD_INTR_ARBIT_LOSS)
+#define ASPEED_I2CD_INTR_SLAVE_ERRORS					       \
+		ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT
 #define ASPEED_I2CD_INTR_ALL						       \
-		(ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
+		(ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT |		       \
+		 ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
 		 ASPEED_I2CD_INTR_BUS_RECOVER_DONE |			       \
 		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
 		 ASPEED_I2CD_INTR_ABNORMAL |				       \
@@ -176,6 +184,7 @@  struct aspeed_i2c_bus {
 							   u32 divisor);
 	unsigned long			parent_clk_frequency;
 	u32				bus_frequency;
+	u32				hw_timeout_ms;
 	/* Transaction state. */
 	enum aspeed_i2c_master_state	master_state;
 	struct i2c_msg			*msgs;
@@ -276,6 +285,14 @@  static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
 }
 
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
+static int aspeed_i2c_check_slave_error(u32 irq_status)
+{
+	if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT)
+		return -EIO;
+
+	return 0;
+}
+
 static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
 {
 	u32 command, irq_handled = 0;
@@ -286,6 +303,14 @@  static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
 	if (!slave)
 		return 0;
 
+	if (aspeed_i2c_check_slave_error(irq_status)) {
+		dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n",
+			irq_status);
+		irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS);
+		bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE;
+		return irq_handled;
+	}
+
 	command = readl(bus->base + ASPEED_I2C_CMD_REG);
 
 	/* Slave was requested, restart state machine. */
@@ -602,7 +627,7 @@  static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
 	}
 }
 
-static int aspeed_i2c_is_irq_error(u32 irq_status)
+static int aspeed_i2c_check_master_error(u32 irq_status)
 {
 	if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
 		return -EAGAIN;
@@ -633,9 +658,9 @@  static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
 	 * should clear the command queue effectively taking us back to the
 	 * INACTIVE state.
 	 */
-	ret = aspeed_i2c_is_irq_error(irq_status);
+	ret = aspeed_i2c_check_master_error(irq_status);
 	if (ret) {
-		dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
+		dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n",
 			irq_status);
 		irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS);
 		if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
@@ -1194,6 +1219,7 @@  static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor)
 /* precondition: bus.lock has been acquired. */
 static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
 {
+	u32 timeout_base_divisor, timeout_tick_us, timeout_cycles;
 	u32 divisor, clk_reg_val;
 
 	divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency);
@@ -1202,8 +1228,46 @@  static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
 			ASPEED_I2CD_TIME_THDSTA_MASK |
 			ASPEED_I2CD_TIME_TACST_MASK);
 	clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor);
+
+	if (bus->hw_timeout_ms) {
+		u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >>
+			     ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT;
+		u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >>
+				ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT;
+
+		timeout_base_divisor = 0;
+
+		do {
+			timeout_tick_us = 1000 * (16384 <<
+						  (timeout_base_divisor << 1)) /
+					  (bus->parent_clk_frequency / 1000);
+
+			if (timeout_base_divisor == div_max ||
+			    timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >=
+			    bus->hw_timeout_ms * 1000)
+				break;
+		} while (timeout_base_divisor++ < div_max);
+
+		if (timeout_tick_us) {
+			timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000,
+						      timeout_tick_us);
+			if (timeout_cycles == 0)
+				timeout_cycles = 1;
+			else if (timeout_cycles > cycles_max)
+				timeout_cycles = cycles_max;
+		} else {
+			timeout_cycles = 0;
+		}
+	} else {
+		timeout_base_divisor = 0;
+		timeout_cycles = 0;
+	}
+
+	clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK,
+				  timeout_base_divisor);
+
 	writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
-	writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
+	writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2);
 
 	return 0;
 }
@@ -1404,6 +1468,9 @@  static int aspeed_i2c_probe_bus(struct platform_device *pdev)
 		}
 	}
 
+	device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms",
+				 &bus->hw_timeout_ms);
+
 	/* Initialize the I2C adapter */
 	spin_lock_init(&bus->lock);
 	init_completion(&bus->cmd_complete);