Message ID | 20190904200758.5420-3-jae.hyun.yoo@linux.intel.com |
---|---|
State | Rejected, archived |
Headers | show |
Series | i2c: aspeed: Add H/W timeout support | expand |
On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: > In case of multi-master environment, if a peer master incorrectly handles > a bus in the middle of a transaction, I2C hardware hangs in slave state > and it can't escape from the slave state, so this commit adds slave > inactive timeout support to recover the bus in the case. > > Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> Thank you Jae for the patch. Let me play with it and will update back with my results later this week. Cheers, Tao > --- > drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++--- > 1 file changed, 73 insertions(+), 6 deletions(-) > > diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c > index 89317929bee4..92e1a249b393 100644 > --- a/drivers/i2c/busses/i2c-aspeed.c > +++ b/drivers/i2c/busses/i2c-aspeed.c > @@ -70,10 +70,14 @@ > #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16) > #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12 > #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12) > +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8 > +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8) > #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0) > #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0) > + > /* 0x08 : I2CD Clock and AC Timing Control Register #2 */ > -#define ASPEED_NO_TIMEOUT_CTRL 0 > +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0 > +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0) > > /* 0x0c : I2CD Interrupt Control Register & > * 0x10 : I2CD Interrupt Status Register > @@ -81,6 +85,7 @@ > * These share bit definitions, so use the same values for the enable & > * status bits. > */ > +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15) > #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) > #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) > #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) > @@ -96,8 +101,11 @@ > ASPEED_I2CD_INTR_SCL_TIMEOUT | \ > ASPEED_I2CD_INTR_ABNORMAL | \ > ASPEED_I2CD_INTR_ARBIT_LOSS) > +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \ > + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT > #define ASPEED_I2CD_INTR_ALL \ > - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ > + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \ > + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ > ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \ > ASPEED_I2CD_INTR_SCL_TIMEOUT | \ > ASPEED_I2CD_INTR_ABNORMAL | \ > @@ -176,6 +184,7 @@ struct aspeed_i2c_bus { > u32 divisor); > unsigned long parent_clk_frequency; > u32 bus_frequency; > + u32 hw_timeout_ms; > /* Transaction state. */ > enum aspeed_i2c_master_state master_state; > struct i2c_msg *msgs; > @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) > } > > #if IS_ENABLED(CONFIG_I2C_SLAVE) > +static int aspeed_i2c_check_slave_error(u32 irq_status) > +{ > + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT) > + return -EIO; > + > + return 0; > +} > + > static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) > { > u32 command, irq_handled = 0; > @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) > if (!slave) > return 0; > > + if (aspeed_i2c_check_slave_error(irq_status)) { > + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n", > + irq_status); > + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS); > + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; > + return irq_handled; > + } > + > command = readl(bus->base + ASPEED_I2C_CMD_REG); > > /* Slave was requested, restart state machine. */ > @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) > } > } > > -static int aspeed_i2c_is_irq_error(u32 irq_status) > +static int aspeed_i2c_check_master_error(u32 irq_status) > { > if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS) > return -EAGAIN; > @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) > * should clear the command queue effectively taking us back to the > * INACTIVE state. > */ > - ret = aspeed_i2c_is_irq_error(irq_status); > + ret = aspeed_i2c_check_master_error(irq_status); > if (ret) { > - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n", > + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n", > irq_status); > irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); > if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { > @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) > /* precondition: bus.lock has been acquired. */ > static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) > { > + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles; > u32 divisor, clk_reg_val; > > divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency); > @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) > ASPEED_I2CD_TIME_THDSTA_MASK | > ASPEED_I2CD_TIME_TACST_MASK); > clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); > + > + if (bus->hw_timeout_ms) { > + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >> > + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT; > + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >> > + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT; > + > + timeout_base_divisor = 0; > + > + do { > + timeout_tick_us = 1000 * (16384 << > + (timeout_base_divisor << 1)) / > + (bus->parent_clk_frequency / 1000); > + > + if (timeout_base_divisor == div_max || > + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >= > + bus->hw_timeout_ms * 1000) > + break; > + } while (timeout_base_divisor++ < div_max); > + > + if (timeout_tick_us) { > + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000, > + timeout_tick_us); > + if (timeout_cycles == 0) > + timeout_cycles = 1; > + else if (timeout_cycles > cycles_max) > + timeout_cycles = cycles_max; > + } else { > + timeout_cycles = 0; > + } > + } else { > + timeout_base_divisor = 0; > + timeout_cycles = 0; > + } > + > + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK, > + timeout_base_divisor); > + > writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); > - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); > + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2); > > return 0; > } > @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) > } > } > > + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms", > + &bus->hw_timeout_ms); > + > /* Initialize the I2C adapter */ > spin_lock_init(&bus->lock); > init_completion(&bus->cmd_complete); >
Hi Jae, On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: > In case of multi-master environment, if a peer master incorrectly handles > a bus in the middle of a transaction, I2C hardware hangs in slave state > and it can't escape from the slave state, so this commit adds slave > inactive timeout support to recover the bus in the case. > > Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state? I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again. Cheers, Tao > --- > drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++--- > 1 file changed, 73 insertions(+), 6 deletions(-) > > diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c > index 89317929bee4..92e1a249b393 100644 > --- a/drivers/i2c/busses/i2c-aspeed.c > +++ b/drivers/i2c/busses/i2c-aspeed.c > @@ -70,10 +70,14 @@ > #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16) > #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12 > #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12) > +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8 > +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8) > #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0) > #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0) > + > /* 0x08 : I2CD Clock and AC Timing Control Register #2 */ > -#define ASPEED_NO_TIMEOUT_CTRL 0 > +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0 > +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0) > > /* 0x0c : I2CD Interrupt Control Register & > * 0x10 : I2CD Interrupt Status Register > @@ -81,6 +85,7 @@ > * These share bit definitions, so use the same values for the enable & > * status bits. > */ > +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15) > #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) > #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) > #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) > @@ -96,8 +101,11 @@ > ASPEED_I2CD_INTR_SCL_TIMEOUT | \ > ASPEED_I2CD_INTR_ABNORMAL | \ > ASPEED_I2CD_INTR_ARBIT_LOSS) > +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \ > + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT > #define ASPEED_I2CD_INTR_ALL \ > - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ > + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \ > + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ > ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \ > ASPEED_I2CD_INTR_SCL_TIMEOUT | \ > ASPEED_I2CD_INTR_ABNORMAL | \ > @@ -176,6 +184,7 @@ struct aspeed_i2c_bus { > u32 divisor); > unsigned long parent_clk_frequency; > u32 bus_frequency; > + u32 hw_timeout_ms; > /* Transaction state. */ > enum aspeed_i2c_master_state master_state; > struct i2c_msg *msgs; > @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) > } > > #if IS_ENABLED(CONFIG_I2C_SLAVE) > +static int aspeed_i2c_check_slave_error(u32 irq_status) > +{ > + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT) > + return -EIO; > + > + return 0; > +} > + > static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) > { > u32 command, irq_handled = 0; > @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) > if (!slave) > return 0; > > + if (aspeed_i2c_check_slave_error(irq_status)) { > + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n", > + irq_status); > + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS); > + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; > + return irq_handled; > + } > + > command = readl(bus->base + ASPEED_I2C_CMD_REG); > > /* Slave was requested, restart state machine. */ > @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) > } > } > > -static int aspeed_i2c_is_irq_error(u32 irq_status) > +static int aspeed_i2c_check_master_error(u32 irq_status) > { > if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS) > return -EAGAIN; > @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) > * should clear the command queue effectively taking us back to the > * INACTIVE state. > */ > - ret = aspeed_i2c_is_irq_error(irq_status); > + ret = aspeed_i2c_check_master_error(irq_status); > if (ret) { > - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n", > + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n", > irq_status); > irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); > if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { > @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) > /* precondition: bus.lock has been acquired. */ > static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) > { > + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles; > u32 divisor, clk_reg_val; > > divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency); > @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) > ASPEED_I2CD_TIME_THDSTA_MASK | > ASPEED_I2CD_TIME_TACST_MASK); > clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); > + > + if (bus->hw_timeout_ms) { > + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >> > + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT; > + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >> > + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT; > + > + timeout_base_divisor = 0; > + > + do { > + timeout_tick_us = 1000 * (16384 << > + (timeout_base_divisor << 1)) / > + (bus->parent_clk_frequency / 1000); > + > + if (timeout_base_divisor == div_max || > + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >= > + bus->hw_timeout_ms * 1000) > + break; > + } while (timeout_base_divisor++ < div_max); > + > + if (timeout_tick_us) { > + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000, > + timeout_tick_us); > + if (timeout_cycles == 0) > + timeout_cycles = 1; > + else if (timeout_cycles > cycles_max) > + timeout_cycles = cycles_max; > + } else { > + timeout_cycles = 0; > + } > + } else { > + timeout_base_divisor = 0; > + timeout_cycles = 0; > + } > + > + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK, > + timeout_base_divisor); > + > writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); > - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); > + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2); > > return 0; > } > @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) > } > } > > + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms", > + &bus->hw_timeout_ms); > + > /* Initialize the I2C adapter */ > spin_lock_init(&bus->lock); > init_completion(&bus->cmd_complete); >
Hi Tao, On 9/5/2019 3:28 PM, Tao Ren wrote: > Hi Jae, > > On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: >> In case of multi-master environment, if a peer master incorrectly handles >> a bus in the middle of a transaction, I2C hardware hangs in slave state >> and it can't escape from the slave state, so this commit adds slave >> inactive timeout support to recover the bus in the case. >> >> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> > > I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state? > > I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again. Basic concept of this implementation is setting the slave state of driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of aspeed_i2c_reset() directly from interrupt context. Instead, when a master xfer happens after that, it will try bus recovery through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset() if needed. If this patch doesn't work in your case, test it again after adding one line code into this driver. See below. > >> --- >> drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++--- >> 1 file changed, 73 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c >> index 89317929bee4..92e1a249b393 100644 >> --- a/drivers/i2c/busses/i2c-aspeed.c >> +++ b/drivers/i2c/busses/i2c-aspeed.c >> @@ -70,10 +70,14 @@ >> #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16) >> #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12 >> #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12) >> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8 >> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8) >> #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0) >> #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0) >> + >> /* 0x08 : I2CD Clock and AC Timing Control Register #2 */ >> -#define ASPEED_NO_TIMEOUT_CTRL 0 >> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0 >> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0) >> >> /* 0x0c : I2CD Interrupt Control Register & >> * 0x10 : I2CD Interrupt Status Register >> @@ -81,6 +85,7 @@ >> * These share bit definitions, so use the same values for the enable & >> * status bits. >> */ >> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15) >> #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) >> #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) >> #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) >> @@ -96,8 +101,11 @@ >> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >> ASPEED_I2CD_INTR_ABNORMAL | \ >> ASPEED_I2CD_INTR_ARBIT_LOSS) >> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \ >> + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT >> #define ASPEED_I2CD_INTR_ALL \ >> - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >> + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \ >> + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >> ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \ >> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >> ASPEED_I2CD_INTR_ABNORMAL | \ >> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus { >> u32 divisor); >> unsigned long parent_clk_frequency; >> u32 bus_frequency; >> + u32 hw_timeout_ms; >> /* Transaction state. */ >> enum aspeed_i2c_master_state master_state; >> struct i2c_msg *msgs; >> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) >> } >> >> #if IS_ENABLED(CONFIG_I2C_SLAVE) >> +static int aspeed_i2c_check_slave_error(u32 irq_status) >> +{ >> + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT) >> + return -EIO; >> + >> + return 0; >> +} >> + >> static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >> { >> u32 command, irq_handled = 0; >> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >> if (!slave) >> return 0; >> >> + if (aspeed_i2c_check_slave_error(irq_status)) { >> + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n", >> + irq_status); >> + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS); >> + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; aspeed_i2c_reset(bus); I didn't add it in this patch because I wanted to avoid calling of this reset function from interrupt context but give it a try. Thanks, Jae >> + return irq_handled; >> + } >> + >> command = readl(bus->base + ASPEED_I2C_CMD_REG); >> >> /* Slave was requested, restart state machine. */ >> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) >> } >> } >> >> -static int aspeed_i2c_is_irq_error(u32 irq_status) >> +static int aspeed_i2c_check_master_error(u32 irq_status) >> { >> if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS) >> return -EAGAIN; >> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >> * should clear the command queue effectively taking us back to the >> * INACTIVE state. >> */ >> - ret = aspeed_i2c_is_irq_error(irq_status); >> + ret = aspeed_i2c_check_master_error(irq_status); >> if (ret) { >> - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n", >> + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n", >> irq_status); >> irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); >> if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { >> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) >> /* precondition: bus.lock has been acquired. */ >> static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >> { >> + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles; >> u32 divisor, clk_reg_val; >> >> divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency); >> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >> ASPEED_I2CD_TIME_THDSTA_MASK | >> ASPEED_I2CD_TIME_TACST_MASK); >> clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); >> + >> + if (bus->hw_timeout_ms) { >> + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >> >> + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT; >> + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >> >> + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT; >> + >> + timeout_base_divisor = 0; >> + >> + do { >> + timeout_tick_us = 1000 * (16384 << >> + (timeout_base_divisor << 1)) / >> + (bus->parent_clk_frequency / 1000); >> + >> + if (timeout_base_divisor == div_max || >> + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >= >> + bus->hw_timeout_ms * 1000) >> + break; >> + } while (timeout_base_divisor++ < div_max); >> + >> + if (timeout_tick_us) { >> + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000, >> + timeout_tick_us); >> + if (timeout_cycles == 0) >> + timeout_cycles = 1; >> + else if (timeout_cycles > cycles_max) >> + timeout_cycles = cycles_max; >> + } else { >> + timeout_cycles = 0; >> + } >> + } else { >> + timeout_base_divisor = 0; >> + timeout_cycles = 0; >> + } >> + >> + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK, >> + timeout_base_divisor); >> + >> writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); >> - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); >> + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2); >> >> return 0; >> } >> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) >> } >> } >> >> + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms", >> + &bus->hw_timeout_ms); >> + >> /* Initialize the I2C adapter */ >> spin_lock_init(&bus->lock); >> init_completion(&bus->cmd_complete); >>
On 9/5/19 3:48 PM, Jae Hyun Yoo wrote: > Hi Tao, > > On 9/5/2019 3:28 PM, Tao Ren wrote: >> Hi Jae, >> >> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: >>> In case of multi-master environment, if a peer master incorrectly handles >>> a bus in the middle of a transaction, I2C hardware hangs in slave state >>> and it can't escape from the slave state, so this commit adds slave >>> inactive timeout support to recover the bus in the case. >>> >>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> >> >> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state? >> >> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again. > > Basic concept of this implementation is setting the slave state of > driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of > aspeed_i2c_reset() directly from interrupt context. Instead, when a > master xfer happens after that, it will try bus recovery > through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset() > if needed. > > If this patch doesn't work in your case, test it again after adding > one line code into this driver. See below. If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being: 1) we don't know when userspace starts next master transfer. 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0). Thanks, Tao > >> >>> --- >>> drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++--- >>> 1 file changed, 73 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c >>> index 89317929bee4..92e1a249b393 100644 >>> --- a/drivers/i2c/busses/i2c-aspeed.c >>> +++ b/drivers/i2c/busses/i2c-aspeed.c >>> @@ -70,10 +70,14 @@ >>> #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16) >>> #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12 >>> #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12) >>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8 >>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8) >>> #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0) >>> #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0) >>> + >>> /* 0x08 : I2CD Clock and AC Timing Control Register #2 */ >>> -#define ASPEED_NO_TIMEOUT_CTRL 0 >>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0 >>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0) >>> /* 0x0c : I2CD Interrupt Control Register & >>> * 0x10 : I2CD Interrupt Status Register >>> @@ -81,6 +85,7 @@ >>> * These share bit definitions, so use the same values for the enable & >>> * status bits. >>> */ >>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15) >>> #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) >>> #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) >>> #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) >>> @@ -96,8 +101,11 @@ >>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >>> ASPEED_I2CD_INTR_ABNORMAL | \ >>> ASPEED_I2CD_INTR_ARBIT_LOSS) >>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \ >>> + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT >>> #define ASPEED_I2CD_INTR_ALL \ >>> - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >>> + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \ >>> + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >>> ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \ >>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >>> ASPEED_I2CD_INTR_ABNORMAL | \ >>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus { >>> u32 divisor); >>> unsigned long parent_clk_frequency; >>> u32 bus_frequency; >>> + u32 hw_timeout_ms; >>> /* Transaction state. */ >>> enum aspeed_i2c_master_state master_state; >>> struct i2c_msg *msgs; >>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) >>> } >>> #if IS_ENABLED(CONFIG_I2C_SLAVE) >>> +static int aspeed_i2c_check_slave_error(u32 irq_status) >>> +{ >>> + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT) >>> + return -EIO; >>> + >>> + return 0; >>> +} >>> + >>> static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>> { >>> u32 command, irq_handled = 0; >>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>> if (!slave) >>> return 0; >>> + if (aspeed_i2c_check_slave_error(irq_status)) { >>> + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n", >>> + irq_status); >>> + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS); >>> + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; > > aspeed_i2c_reset(bus); > > I didn't add it in this patch because I wanted to avoid calling of this > reset function from interrupt context but give it a try. > > Thanks, > Jae I believe this will solve my problem, but let me test it and will share you results later. >>> + return irq_handled; >>> + } >>> + >>> command = readl(bus->base + ASPEED_I2C_CMD_REG); >>> /* Slave was requested, restart state machine. */ >>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) >>> } >>> } >>> -static int aspeed_i2c_is_irq_error(u32 irq_status) >>> +static int aspeed_i2c_check_master_error(u32 irq_status) >>> { >>> if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS) >>> return -EAGAIN; >>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>> * should clear the command queue effectively taking us back to the >>> * INACTIVE state. >>> */ >>> - ret = aspeed_i2c_is_irq_error(irq_status); >>> + ret = aspeed_i2c_check_master_error(irq_status); >>> if (ret) { >>> - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n", >>> + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n", >>> irq_status); >>> irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); >>> if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { >>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) >>> /* precondition: bus.lock has been acquired. */ >>> static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >>> { >>> + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles; >>> u32 divisor, clk_reg_val; >>> divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency); >>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >>> ASPEED_I2CD_TIME_THDSTA_MASK | >>> ASPEED_I2CD_TIME_TACST_MASK); >>> clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); >>> + >>> + if (bus->hw_timeout_ms) { >>> + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >> >>> + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT; >>> + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >> >>> + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT; >>> + >>> + timeout_base_divisor = 0; >>> + >>> + do { >>> + timeout_tick_us = 1000 * (16384 << >>> + (timeout_base_divisor << 1)) / >>> + (bus->parent_clk_frequency / 1000); >>> + >>> + if (timeout_base_divisor == div_max || >>> + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >= >>> + bus->hw_timeout_ms * 1000) >>> + break; >>> + } while (timeout_base_divisor++ < div_max); >>> + >>> + if (timeout_tick_us) { >>> + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000, >>> + timeout_tick_us); >>> + if (timeout_cycles == 0) >>> + timeout_cycles = 1; >>> + else if (timeout_cycles > cycles_max) >>> + timeout_cycles = cycles_max; >>> + } else { >>> + timeout_cycles = 0; >>> + } >>> + } else { >>> + timeout_base_divisor = 0; >>> + timeout_cycles = 0; >>> + } >>> + >>> + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK, >>> + timeout_base_divisor); >>> + >>> writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); >>> - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); >>> + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2); >>> return 0; >>> } >>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) >>> } >>> } >>> + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms", >>> + &bus->hw_timeout_ms); >>> + >>> /* Initialize the I2C adapter */ >>> spin_lock_init(&bus->lock); >>> init_completion(&bus->cmd_complete); >>>
On 9/5/2019 4:19 PM, Tao Ren wrote: > On 9/5/19 3:48 PM, Jae Hyun Yoo wrote: >> Hi Tao, >> >> On 9/5/2019 3:28 PM, Tao Ren wrote: >>> Hi Jae, >>> >>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: >>>> In case of multi-master environment, if a peer master incorrectly handles >>>> a bus in the middle of a transaction, I2C hardware hangs in slave state >>>> and it can't escape from the slave state, so this commit adds slave >>>> inactive timeout support to recover the bus in the case. >>>> >>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> >>> >>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state? >>> >>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again. >> >> Basic concept of this implementation is setting the slave state of >> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of >> aspeed_i2c_reset() directly from interrupt context. Instead, when a >> master xfer happens after that, it will try bus recovery >> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset() >> if needed. >> >> If this patch doesn't work in your case, test it again after adding >> one line code into this driver. See below. > > If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being: > > 1) we don't know when userspace starts next master transfer. > 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0). > Oh, so to support the case as well, probably we need to add a flag for indicating recovery needs when a master xfer comes then it could forcibly recover and reset the bus even if the bus is idle. Can you please test that with making code changes? Unfortunately, I can't reproduce the case in my system. >> >>> >>>> --- >>>> drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++--- >>>> 1 file changed, 73 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c >>>> index 89317929bee4..92e1a249b393 100644 >>>> --- a/drivers/i2c/busses/i2c-aspeed.c >>>> +++ b/drivers/i2c/busses/i2c-aspeed.c >>>> @@ -70,10 +70,14 @@ >>>> #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16) >>>> #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12 >>>> #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12) >>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8 >>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8) >>>> #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0) >>>> #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0) >>>> + >>>> /* 0x08 : I2CD Clock and AC Timing Control Register #2 */ >>>> -#define ASPEED_NO_TIMEOUT_CTRL 0 >>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0 >>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0) >>>> /* 0x0c : I2CD Interrupt Control Register & >>>> * 0x10 : I2CD Interrupt Status Register >>>> @@ -81,6 +85,7 @@ >>>> * These share bit definitions, so use the same values for the enable & >>>> * status bits. >>>> */ >>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15) >>>> #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) >>>> #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) >>>> #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) >>>> @@ -96,8 +101,11 @@ >>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >>>> ASPEED_I2CD_INTR_ABNORMAL | \ >>>> ASPEED_I2CD_INTR_ARBIT_LOSS) >>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \ >>>> + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT >>>> #define ASPEED_I2CD_INTR_ALL \ >>>> - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >>>> + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \ >>>> + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >>>> ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \ >>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >>>> ASPEED_I2CD_INTR_ABNORMAL | \ >>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus { >>>> u32 divisor); >>>> unsigned long parent_clk_frequency; >>>> u32 bus_frequency; >>>> + u32 hw_timeout_ms; >>>> /* Transaction state. */ >>>> enum aspeed_i2c_master_state master_state; >>>> struct i2c_msg *msgs; >>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) >>>> } >>>> #if IS_ENABLED(CONFIG_I2C_SLAVE) >>>> +static int aspeed_i2c_check_slave_error(u32 irq_status) >>>> +{ >>>> + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT) >>>> + return -EIO; >>>> + >>>> + return 0; >>>> +} >>>> + >>>> static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>> { >>>> u32 command, irq_handled = 0; >>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>> if (!slave) >>>> return 0; >>>> + if (aspeed_i2c_check_slave_error(irq_status)) { >>>> + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n", >>>> + irq_status); >>>> + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS); >>>> + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; >> >> aspeed_i2c_reset(bus); >> >> I didn't add it in this patch because I wanted to avoid calling of this >> reset function from interrupt context but give it a try. >> >> Thanks, >> Jae > > I believe this will solve my problem, but let me test it and will share you results later. Please share this result too. It would be useful to complete making this patch. Thanks, Jae >>>> + return irq_handled; >>>> + } >>>> + >>>> command = readl(bus->base + ASPEED_I2C_CMD_REG); >>>> /* Slave was requested, restart state machine. */ >>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) >>>> } >>>> } >>>> -static int aspeed_i2c_is_irq_error(u32 irq_status) >>>> +static int aspeed_i2c_check_master_error(u32 irq_status) >>>> { >>>> if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS) >>>> return -EAGAIN; >>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>> * should clear the command queue effectively taking us back to the >>>> * INACTIVE state. >>>> */ >>>> - ret = aspeed_i2c_is_irq_error(irq_status); >>>> + ret = aspeed_i2c_check_master_error(irq_status); >>>> if (ret) { >>>> - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n", >>>> + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n", >>>> irq_status); >>>> irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); >>>> if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { >>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) >>>> /* precondition: bus.lock has been acquired. */ >>>> static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >>>> { >>>> + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles; >>>> u32 divisor, clk_reg_val; >>>> divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency); >>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >>>> ASPEED_I2CD_TIME_THDSTA_MASK | >>>> ASPEED_I2CD_TIME_TACST_MASK); >>>> clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); >>>> + >>>> + if (bus->hw_timeout_ms) { >>>> + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >> >>>> + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT; >>>> + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >> >>>> + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT; >>>> + >>>> + timeout_base_divisor = 0; >>>> + >>>> + do { >>>> + timeout_tick_us = 1000 * (16384 << >>>> + (timeout_base_divisor << 1)) / >>>> + (bus->parent_clk_frequency / 1000); >>>> + >>>> + if (timeout_base_divisor == div_max || >>>> + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >= >>>> + bus->hw_timeout_ms * 1000) >>>> + break; >>>> + } while (timeout_base_divisor++ < div_max); >>>> + >>>> + if (timeout_tick_us) { >>>> + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000, >>>> + timeout_tick_us); >>>> + if (timeout_cycles == 0) >>>> + timeout_cycles = 1; >>>> + else if (timeout_cycles > cycles_max) >>>> + timeout_cycles = cycles_max; >>>> + } else { >>>> + timeout_cycles = 0; >>>> + } >>>> + } else { >>>> + timeout_base_divisor = 0; >>>> + timeout_cycles = 0; >>>> + } >>>> + >>>> + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK, >>>> + timeout_base_divisor); >>>> + >>>> writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); >>>> - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); >>>> + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2); >>>> return 0; >>>> } >>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) >>>> } >>>> } >>>> + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms", >>>> + &bus->hw_timeout_ms); >>>> + >>>> /* Initialize the I2C adapter */ >>>> spin_lock_init(&bus->lock); >>>> init_completion(&bus->cmd_complete); >>>>
Hi Jae, On 9/5/19 4:35 PM, Jae Hyun Yoo wrote: > On 9/5/2019 4:19 PM, Tao Ren wrote: >> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote: >>> Hi Tao, >>> >>> On 9/5/2019 3:28 PM, Tao Ren wrote: >>>> Hi Jae, >>>> >>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: >>>>> In case of multi-master environment, if a peer master incorrectly handles >>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state >>>>> and it can't escape from the slave state, so this commit adds slave >>>>> inactive timeout support to recover the bus in the case. >>>>> >>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> >>>> >>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state? >>>> >>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again. >>> >>> Basic concept of this implementation is setting the slave state of >>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of >>> aspeed_i2c_reset() directly from interrupt context. Instead, when a >>> master xfer happens after that, it will try bus recovery >>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset() >>> if needed. >>> >>> If this patch doesn't work in your case, test it again after adding >>> one line code into this driver. See below. >> >> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being: >> >> 1) we don't know when userspace starts next master transfer. >> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0). >> > > Oh, so to support the case as well, probably we need to add a flag for > indicating recovery needs when a master xfer comes then it could > forcibly recover and reset the bus even if the bus is idle. Can you > please test that with making code changes? Unfortunately, I can't > reproduce the case in my system. Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed? >>> >>>> >>>>> --- >>>>> drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++--- >>>>> 1 file changed, 73 insertions(+), 6 deletions(-) >>>>> >>>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c >>>>> index 89317929bee4..92e1a249b393 100644 >>>>> --- a/drivers/i2c/busses/i2c-aspeed.c >>>>> +++ b/drivers/i2c/busses/i2c-aspeed.c >>>>> @@ -70,10 +70,14 @@ >>>>> #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16) >>>>> #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12 >>>>> #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12) >>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8 >>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8) >>>>> #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0) >>>>> #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0) >>>>> + >>>>> /* 0x08 : I2CD Clock and AC Timing Control Register #2 */ >>>>> -#define ASPEED_NO_TIMEOUT_CTRL 0 >>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0 >>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0) >>>>> /* 0x0c : I2CD Interrupt Control Register & >>>>> * 0x10 : I2CD Interrupt Status Register >>>>> @@ -81,6 +85,7 @@ >>>>> * These share bit definitions, so use the same values for the enable & >>>>> * status bits. >>>>> */ >>>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15) >>>>> #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) >>>>> #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) >>>>> #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) >>>>> @@ -96,8 +101,11 @@ >>>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >>>>> ASPEED_I2CD_INTR_ABNORMAL | \ >>>>> ASPEED_I2CD_INTR_ARBIT_LOSS) >>>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \ >>>>> + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT >>>>> #define ASPEED_I2CD_INTR_ALL \ >>>>> - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >>>>> + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \ >>>>> + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >>>>> ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \ >>>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >>>>> ASPEED_I2CD_INTR_ABNORMAL | \ >>>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus { >>>>> u32 divisor); >>>>> unsigned long parent_clk_frequency; >>>>> u32 bus_frequency; >>>>> + u32 hw_timeout_ms; >>>>> /* Transaction state. */ >>>>> enum aspeed_i2c_master_state master_state; >>>>> struct i2c_msg *msgs; >>>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) >>>>> } >>>>> #if IS_ENABLED(CONFIG_I2C_SLAVE) >>>>> +static int aspeed_i2c_check_slave_error(u32 irq_status) >>>>> +{ >>>>> + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT) >>>>> + return -EIO; >>>>> + >>>>> + return 0; >>>>> +} >>>>> + >>>>> static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>>> { >>>>> u32 command, irq_handled = 0; >>>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>>> if (!slave) >>>>> return 0; >>>>> + if (aspeed_i2c_check_slave_error(irq_status)) { >>>>> + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n", >>>>> + irq_status); >>>>> + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS); >>>>> + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; >>> >>> aspeed_i2c_reset(bus); >>> >>> I didn't add it in this patch because I wanted to avoid calling of this >>> reset function from interrupt context but give it a try. >>> >>> Thanks, >>> Jae >> >> I believe this will solve my problem, but let me test it and will share you results later. > > Please share this result too. It would be useful to complete making this > patch. > > Thanks, > Jae Adding aspeed_i2c_reset(bus) does fix my problem. Thank you Jae. Cheers, Tao >>>>> + return irq_handled; >>>>> + } >>>>> + >>>>> command = readl(bus->base + ASPEED_I2C_CMD_REG); >>>>> /* Slave was requested, restart state machine. */ >>>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) >>>>> } >>>>> } >>>>> -static int aspeed_i2c_is_irq_error(u32 irq_status) >>>>> +static int aspeed_i2c_check_master_error(u32 irq_status) >>>>> { >>>>> if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS) >>>>> return -EAGAIN; >>>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>>> * should clear the command queue effectively taking us back to the >>>>> * INACTIVE state. >>>>> */ >>>>> - ret = aspeed_i2c_is_irq_error(irq_status); >>>>> + ret = aspeed_i2c_check_master_error(irq_status); >>>>> if (ret) { >>>>> - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n", >>>>> + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n", >>>>> irq_status); >>>>> irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); >>>>> if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { >>>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) >>>>> /* precondition: bus.lock has been acquired. */ >>>>> static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >>>>> { >>>>> + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles; >>>>> u32 divisor, clk_reg_val; >>>>> divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency); >>>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >>>>> ASPEED_I2CD_TIME_THDSTA_MASK | >>>>> ASPEED_I2CD_TIME_TACST_MASK); >>>>> clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); >>>>> + >>>>> + if (bus->hw_timeout_ms) { >>>>> + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >> >>>>> + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT; >>>>> + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >> >>>>> + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT; >>>>> + >>>>> + timeout_base_divisor = 0; >>>>> + >>>>> + do { >>>>> + timeout_tick_us = 1000 * (16384 << >>>>> + (timeout_base_divisor << 1)) / >>>>> + (bus->parent_clk_frequency / 1000); >>>>> + >>>>> + if (timeout_base_divisor == div_max || >>>>> + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >= >>>>> + bus->hw_timeout_ms * 1000) >>>>> + break; >>>>> + } while (timeout_base_divisor++ < div_max); >>>>> + >>>>> + if (timeout_tick_us) { >>>>> + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000, >>>>> + timeout_tick_us); >>>>> + if (timeout_cycles == 0) >>>>> + timeout_cycles = 1; >>>>> + else if (timeout_cycles > cycles_max) >>>>> + timeout_cycles = cycles_max; >>>>> + } else { >>>>> + timeout_cycles = 0; >>>>> + } >>>>> + } else { >>>>> + timeout_base_divisor = 0; >>>>> + timeout_cycles = 0; >>>>> + } >>>>> + >>>>> + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK, >>>>> + timeout_base_divisor); >>>>> + >>>>> writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); >>>>> - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); >>>>> + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2); >>>>> return 0; >>>>> } >>>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) >>>>> } >>>>> } >>>>> + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms", >>>>> + &bus->hw_timeout_ms); >>>>> + >>>>> /* Initialize the I2C adapter */ >>>>> spin_lock_init(&bus->lock); >>>>> init_completion(&bus->cmd_complete); >>>>>
On 9/5/19 6:16 PM, Tao Ren wrote: > Hi Jae, > > On 9/5/19 4:35 PM, Jae Hyun Yoo wrote: >> On 9/5/2019 4:19 PM, Tao Ren wrote: >>> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote: >>>> Hi Tao, >>>> >>>> On 9/5/2019 3:28 PM, Tao Ren wrote: >>>>> Hi Jae, >>>>> >>>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: >>>>>> In case of multi-master environment, if a peer master incorrectly handles >>>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state >>>>>> and it can't escape from the slave state, so this commit adds slave >>>>>> inactive timeout support to recover the bus in the case. >>>>>> >>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> >>>>> >>>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state? >>>>> >>>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again. >>>> >>>> Basic concept of this implementation is setting the slave state of >>>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of >>>> aspeed_i2c_reset() directly from interrupt context. Instead, when a >>>> master xfer happens after that, it will try bus recovery >>>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset() >>>> if needed. >>>> >>>> If this patch doesn't work in your case, test it again after adding >>>> one line code into this driver. See below. >>> >>> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being: >>> >>> 1) we don't know when userspace starts next master transfer. >>> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0). >>> >> >> Oh, so to support the case as well, probably we need to add a flag for >> indicating recovery needs when a master xfer comes then it could >> forcibly recover and reset the bus even if the bus is idle. Can you >> please test that with making code changes? Unfortunately, I can't >> reproduce the case in my system. > > Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed? One of my colleagues reminded me to enable bus auto release (I2CD00[17]) which could avoid explicit bus reset in interrupt handler. Let me try and will update back soon. Thanks, Tao >>>> >>>>> >>>>>> --- >>>>>> drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++--- >>>>>> 1 file changed, 73 insertions(+), 6 deletions(-) >>>>>> >>>>>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c >>>>>> index 89317929bee4..92e1a249b393 100644 >>>>>> --- a/drivers/i2c/busses/i2c-aspeed.c >>>>>> +++ b/drivers/i2c/busses/i2c-aspeed.c >>>>>> @@ -70,10 +70,14 @@ >>>>>> #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16) >>>>>> #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12 >>>>>> #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12) >>>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8 >>>>>> +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8) >>>>>> #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0) >>>>>> #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0) >>>>>> + >>>>>> /* 0x08 : I2CD Clock and AC Timing Control Register #2 */ >>>>>> -#define ASPEED_NO_TIMEOUT_CTRL 0 >>>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0 >>>>>> +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0) >>>>>> /* 0x0c : I2CD Interrupt Control Register & >>>>>> * 0x10 : I2CD Interrupt Status Register >>>>>> @@ -81,6 +85,7 @@ >>>>>> * These share bit definitions, so use the same values for the enable & >>>>>> * status bits. >>>>>> */ >>>>>> +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15) >>>>>> #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) >>>>>> #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) >>>>>> #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) >>>>>> @@ -96,8 +101,11 @@ >>>>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >>>>>> ASPEED_I2CD_INTR_ABNORMAL | \ >>>>>> ASPEED_I2CD_INTR_ARBIT_LOSS) >>>>>> +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \ >>>>>> + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT >>>>>> #define ASPEED_I2CD_INTR_ALL \ >>>>>> - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >>>>>> + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \ >>>>>> + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ >>>>>> ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \ >>>>>> ASPEED_I2CD_INTR_SCL_TIMEOUT | \ >>>>>> ASPEED_I2CD_INTR_ABNORMAL | \ >>>>>> @@ -176,6 +184,7 @@ struct aspeed_i2c_bus { >>>>>> u32 divisor); >>>>>> unsigned long parent_clk_frequency; >>>>>> u32 bus_frequency; >>>>>> + u32 hw_timeout_ms; >>>>>> /* Transaction state. */ >>>>>> enum aspeed_i2c_master_state master_state; >>>>>> struct i2c_msg *msgs; >>>>>> @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) >>>>>> } >>>>>> #if IS_ENABLED(CONFIG_I2C_SLAVE) >>>>>> +static int aspeed_i2c_check_slave_error(u32 irq_status) >>>>>> +{ >>>>>> + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT) >>>>>> + return -EIO; >>>>>> + >>>>>> + return 0; >>>>>> +} >>>>>> + >>>>>> static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>>>> { >>>>>> u32 command, irq_handled = 0; >>>>>> @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>>>> if (!slave) >>>>>> return 0; >>>>>> + if (aspeed_i2c_check_slave_error(irq_status)) { >>>>>> + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n", >>>>>> + irq_status); >>>>>> + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS); >>>>>> + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; >>>> >>>> aspeed_i2c_reset(bus); >>>> >>>> I didn't add it in this patch because I wanted to avoid calling of this >>>> reset function from interrupt context but give it a try. >>>> >>>> Thanks, >>>> Jae >>> >>> I believe this will solve my problem, but let me test it and will share you results later. >> >> Please share this result too. It would be useful to complete making this >> patch. >> >> Thanks, >> Jae > > Adding aspeed_i2c_reset(bus) does fix my problem. Thank you Jae. > > > Cheers, > > Tao > >>>>>> + return irq_handled; >>>>>> + } >>>>>> + >>>>>> command = readl(bus->base + ASPEED_I2C_CMD_REG); >>>>>> /* Slave was requested, restart state machine. */ >>>>>> @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) >>>>>> } >>>>>> } >>>>>> -static int aspeed_i2c_is_irq_error(u32 irq_status) >>>>>> +static int aspeed_i2c_check_master_error(u32 irq_status) >>>>>> { >>>>>> if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS) >>>>>> return -EAGAIN; >>>>>> @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) >>>>>> * should clear the command queue effectively taking us back to the >>>>>> * INACTIVE state. >>>>>> */ >>>>>> - ret = aspeed_i2c_is_irq_error(irq_status); >>>>>> + ret = aspeed_i2c_check_master_error(irq_status); >>>>>> if (ret) { >>>>>> - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n", >>>>>> + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n", >>>>>> irq_status); >>>>>> irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); >>>>>> if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { >>>>>> @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) >>>>>> /* precondition: bus.lock has been acquired. */ >>>>>> static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >>>>>> { >>>>>> + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles; >>>>>> u32 divisor, clk_reg_val; >>>>>> divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency); >>>>>> @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) >>>>>> ASPEED_I2CD_TIME_THDSTA_MASK | >>>>>> ASPEED_I2CD_TIME_TACST_MASK); >>>>>> clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); >>>>>> + >>>>>> + if (bus->hw_timeout_ms) { >>>>>> + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >> >>>>>> + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT; >>>>>> + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >> >>>>>> + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT; >>>>>> + >>>>>> + timeout_base_divisor = 0; >>>>>> + >>>>>> + do { >>>>>> + timeout_tick_us = 1000 * (16384 << >>>>>> + (timeout_base_divisor << 1)) / >>>>>> + (bus->parent_clk_frequency / 1000); >>>>>> + >>>>>> + if (timeout_base_divisor == div_max || >>>>>> + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >= >>>>>> + bus->hw_timeout_ms * 1000) >>>>>> + break; >>>>>> + } while (timeout_base_divisor++ < div_max); >>>>>> + >>>>>> + if (timeout_tick_us) { >>>>>> + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000, >>>>>> + timeout_tick_us); >>>>>> + if (timeout_cycles == 0) >>>>>> + timeout_cycles = 1; >>>>>> + else if (timeout_cycles > cycles_max) >>>>>> + timeout_cycles = cycles_max; >>>>>> + } else { >>>>>> + timeout_cycles = 0; >>>>>> + } >>>>>> + } else { >>>>>> + timeout_base_divisor = 0; >>>>>> + timeout_cycles = 0; >>>>>> + } >>>>>> + >>>>>> + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK, >>>>>> + timeout_base_divisor); >>>>>> + >>>>>> writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); >>>>>> - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); >>>>>> + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2); >>>>>> return 0; >>>>>> } >>>>>> @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) >>>>>> } >>>>>> } >>>>>> + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms", >>>>>> + &bus->hw_timeout_ms); >>>>>> + >>>>>> /* Initialize the I2C adapter */ >>>>>> spin_lock_init(&bus->lock); >>>>>> init_completion(&bus->cmd_complete); >>>>>>
Hi Jae, On 9/5/19 6:56 PM, Tao Ren wrote: > On 9/5/19 6:16 PM, Tao Ren wrote: >> Hi Jae, >> >> On 9/5/19 4:35 PM, Jae Hyun Yoo wrote: >>> On 9/5/2019 4:19 PM, Tao Ren wrote: >>>> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote: >>>>> Hi Tao, >>>>> >>>>> On 9/5/2019 3:28 PM, Tao Ren wrote: >>>>>> Hi Jae, >>>>>> >>>>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: >>>>>>> In case of multi-master environment, if a peer master incorrectly handles >>>>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state >>>>>>> and it can't escape from the slave state, so this commit adds slave >>>>>>> inactive timeout support to recover the bus in the case. >>>>>>> >>>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> >>>>>> >>>>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state? >>>>>> >>>>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again. >>>>> >>>>> Basic concept of this implementation is setting the slave state of >>>>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of >>>>> aspeed_i2c_reset() directly from interrupt context. Instead, when a >>>>> master xfer happens after that, it will try bus recovery >>>>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset() >>>>> if needed. >>>>> >>>>> If this patch doesn't work in your case, test it again after adding >>>>> one line code into this driver. See below. >>>> >>>> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being: >>>> >>>> 1) we don't know when userspace starts next master transfer. >>>> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0). >>>> >>> >>> Oh, so to support the case as well, probably we need to add a flag for >>> indicating recovery needs when a master xfer comes then it could >>> forcibly recover and reset the bus even if the bus is idle. Can you >>> please test that with making code changes? Unfortunately, I can't >>> reproduce the case in my system. >> >> Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed? > > One of my colleagues reminded me to enable bus auto release (I2CD00[17]) which could avoid explicit bus reset in interrupt handler. > > Let me try and will update back soon. Let me summarize my testing result on Minipack BMC: BMC i2c controller cannot exit slave state by enabling slave_inactive_timeout interrupt only: in this case, i2c controller stays in SRXD state and slave_inactive_timeout interrupt will be generated again and again until bus is explicitly reset. I've tried below 2 approaches and both works on my platform: - calling aspeed_i2c_reset() in interrupt handler when slave_inacive_timeout interrupt is delivered. - enabling bus auto release feature (I2CD00[17]) in probe function. Cheers, Tao
Hi Tao, On 9/5/2019 7:35 PM, Tao Ren wrote: > Hi Jae, > > On 9/5/19 6:56 PM, Tao Ren wrote: >> On 9/5/19 6:16 PM, Tao Ren wrote: >>> Hi Jae, >>> >>> On 9/5/19 4:35 PM, Jae Hyun Yoo wrote: >>>> On 9/5/2019 4:19 PM, Tao Ren wrote: >>>>> On 9/5/19 3:48 PM, Jae Hyun Yoo wrote: >>>>>> Hi Tao, >>>>>> >>>>>> On 9/5/2019 3:28 PM, Tao Ren wrote: >>>>>>> Hi Jae, >>>>>>> >>>>>>> On 9/4/19 1:07 PM, Jae Hyun Yoo wrote: >>>>>>>> In case of multi-master environment, if a peer master incorrectly handles >>>>>>>> a bus in the middle of a transaction, I2C hardware hangs in slave state >>>>>>>> and it can't escape from the slave state, so this commit adds slave >>>>>>>> inactive timeout support to recover the bus in the case. >>>>>>>> >>>>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> >>>>>>> >>>>>>> I have a generic question on the patch: is it good enough to just enable slave_inactive_timeout and acknowledge the interrupt when it happens? Or do we need to reset the i2c controller to bring it out of slave state? >>>>>>> >>>>>>> I'm asking this because I hit an issue on my Minipack BMC where the slave_inactive_timeout interrupt was generated once every ~0.1 seconds (I set aspeed,hw-timeout-ms to 1000), and a few seconds later my BMC was rebooted (I guess watchdog timed out). Given the issue happened only once, I don't have chance to collect more information (such as why the repetitive interrupt was generated, why BMC rebooted, and etc.). Will share more if I met the problem again. >>>>>> >>>>>> Basic concept of this implementation is setting the slave state of >>>>>> driver to ASPEED_I2C_SLAVE_INACTIVE to avoid calling of >>>>>> aspeed_i2c_reset() directly from interrupt context. Instead, when a >>>>>> master xfer happens after that, it will try bus recovery >>>>>> through aspeed_i2c_recover_bus() and it will call aspeed_i2c_reset() >>>>>> if needed. >>>>>> >>>>>> If this patch doesn't work in your case, test it again after adding >>>>>> one line code into this driver. See below. >>>>> >>>>> If bus_reset is really needed in this case, then I'd prefer to do it immediately or in a threaded_irq_handler if it's bad idea to call aspeed_i2c_reset() in hardware interrupt context. The reasons being: >>>>> >>>>> 1) we don't know when userspace starts next master transfer. >>>>> 2) aspeed_i2c_recover_bus() is not triggered in aspeed_i2c_master_xfer() in my environment because bus is "idle" (I2CD14[16] == 0). >>>>> >>>> >>>> Oh, so to support the case as well, probably we need to add a flag for >>>> indicating recovery needs when a master xfer comes then it could >>>> forcibly recover and reset the bus even if the bus is idle. Can you >>>> please test that with making code changes? Unfortunately, I can't >>>> reproduce the case in my system. >>> >>> Not sure if I understand it correctly, but given we already reset the bus in interrupt handler, the extra flag should not be needed? >> >> One of my colleagues reminded me to enable bus auto release (I2CD00[17]) which could avoid explicit bus reset in interrupt handler. >> >> Let me try and will update back soon. > > Let me summarize my testing result on Minipack BMC: > > BMC i2c controller cannot exit slave state by enabling slave_inactive_timeout interrupt only: in this case, i2c controller stays in SRXD state and slave_inactive_timeout interrupt will be generated again and again until bus is explicitly reset. > > I've tried below 2 approaches and both works on my platform: > - calling aspeed_i2c_reset() in interrupt handler when slave_inacive_timeout interrupt is delivered. > - enabling bus auto release feature (I2CD00[17]) in probe function. Thanks a lot for your testing result and suggestions. It seems that the latter way - enabling bus auto release would be better for this implementation. I'll submit a next spin today. Cheers, Jae
diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 89317929bee4..92e1a249b393 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -70,10 +70,14 @@ #define ASPEED_I2CD_TIME_SCL_HIGH_MASK GENMASK(19, 16) #define ASPEED_I2CD_TIME_SCL_LOW_SHIFT 12 #define ASPEED_I2CD_TIME_SCL_LOW_MASK GENMASK(15, 12) +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT 8 +#define ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK GENMASK(9, 8) #define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK GENMASK(3, 0) #define ASPEED_I2CD_TIME_SCL_REG_MAX GENMASK(3, 0) + /* 0x08 : I2CD Clock and AC Timing Control Register #2 */ -#define ASPEED_NO_TIMEOUT_CTRL 0 +#define ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT 0 +#define ASPEED_I2CD_TIMEOUT_CYCLES_MASK GENMASK(4, 0) /* 0x0c : I2CD Interrupt Control Register & * 0x10 : I2CD Interrupt Status Register @@ -81,6 +85,7 @@ * These share bit definitions, so use the same values for the enable & * status bits. */ +#define ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT BIT(15) #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) @@ -96,8 +101,11 @@ ASPEED_I2CD_INTR_SCL_TIMEOUT | \ ASPEED_I2CD_INTR_ABNORMAL | \ ASPEED_I2CD_INTR_ARBIT_LOSS) +#define ASPEED_I2CD_INTR_SLAVE_ERRORS \ + ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT #define ASPEED_I2CD_INTR_ALL \ - (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ + (ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT | \ + ASPEED_I2CD_INTR_SDA_DL_TIMEOUT | \ ASPEED_I2CD_INTR_BUS_RECOVER_DONE | \ ASPEED_I2CD_INTR_SCL_TIMEOUT | \ ASPEED_I2CD_INTR_ABNORMAL | \ @@ -176,6 +184,7 @@ struct aspeed_i2c_bus { u32 divisor); unsigned long parent_clk_frequency; u32 bus_frequency; + u32 hw_timeout_ms; /* Transaction state. */ enum aspeed_i2c_master_state master_state; struct i2c_msg *msgs; @@ -276,6 +285,14 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus) } #if IS_ENABLED(CONFIG_I2C_SLAVE) +static int aspeed_i2c_check_slave_error(u32 irq_status) +{ + if (irq_status & ASPEED_I2CD_INTR_SLAVE_INACTIVE_TIMEOUT) + return -EIO; + + return 0; +} + static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) { u32 command, irq_handled = 0; @@ -286,6 +303,14 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) if (!slave) return 0; + if (aspeed_i2c_check_slave_error(irq_status)) { + dev_dbg(bus->dev, "received slave error interrupt: 0x%08x\n", + irq_status); + irq_handled |= (irq_status & ASPEED_I2CD_INTR_SLAVE_ERRORS); + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; + return irq_handled; + } + command = readl(bus->base + ASPEED_I2C_CMD_REG); /* Slave was requested, restart state machine. */ @@ -602,7 +627,7 @@ static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus) } } -static int aspeed_i2c_is_irq_error(u32 irq_status) +static int aspeed_i2c_check_master_error(u32 irq_status) { if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS) return -EAGAIN; @@ -633,9 +658,9 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) * should clear the command queue effectively taking us back to the * INACTIVE state. */ - ret = aspeed_i2c_is_irq_error(irq_status); + ret = aspeed_i2c_check_master_error(irq_status); if (ret) { - dev_dbg(bus->dev, "received error interrupt: 0x%08x\n", + dev_dbg(bus->dev, "received master error interrupt: 0x%08x\n", irq_status); irq_handled |= (irq_status & ASPEED_I2CD_INTR_MASTER_ERRORS); if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) { @@ -1194,6 +1219,7 @@ static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) /* precondition: bus.lock has been acquired. */ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) { + u32 timeout_base_divisor, timeout_tick_us, timeout_cycles; u32 divisor, clk_reg_val; divisor = DIV_ROUND_UP(bus->parent_clk_frequency, bus->bus_frequency); @@ -1202,8 +1228,46 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) ASPEED_I2CD_TIME_THDSTA_MASK | ASPEED_I2CD_TIME_TACST_MASK); clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); + + if (bus->hw_timeout_ms) { + u8 div_max = ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK >> + ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_SHIFT; + u8 cycles_max = ASPEED_I2CD_TIMEOUT_CYCLES_MASK >> + ASPEED_I2CD_TIMEOUT_CYCLES_SHIFT; + + timeout_base_divisor = 0; + + do { + timeout_tick_us = 1000 * (16384 << + (timeout_base_divisor << 1)) / + (bus->parent_clk_frequency / 1000); + + if (timeout_base_divisor == div_max || + timeout_tick_us * ASPEED_I2CD_TIMEOUT_CYCLES_MASK >= + bus->hw_timeout_ms * 1000) + break; + } while (timeout_base_divisor++ < div_max); + + if (timeout_tick_us) { + timeout_cycles = DIV_ROUND_UP(bus->hw_timeout_ms * 1000, + timeout_tick_us); + if (timeout_cycles == 0) + timeout_cycles = 1; + else if (timeout_cycles > cycles_max) + timeout_cycles = cycles_max; + } else { + timeout_cycles = 0; + } + } else { + timeout_base_divisor = 0; + timeout_cycles = 0; + } + + clk_reg_val |= FIELD_PREP(ASPEED_I2CD_TIME_TIMEOUT_BASE_DIVISOR_MASK, + timeout_base_divisor); + writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); - writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); + writel(timeout_cycles, bus->base + ASPEED_I2C_AC_TIMING_REG2); return 0; } @@ -1404,6 +1468,9 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) } } + device_property_read_u32(&pdev->dev, "aspeed,hw-timeout-ms", + &bus->hw_timeout_ms); + /* Initialize the I2C adapter */ spin_lock_init(&bus->lock); init_completion(&bus->cmd_complete);
In case of multi-master environment, if a peer master incorrectly handles a bus in the middle of a transaction, I2C hardware hangs in slave state and it can't escape from the slave state, so this commit adds slave inactive timeout support to recover the bus in the case. Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> --- drivers/i2c/busses/i2c-aspeed.c | 79 ++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 6 deletions(-)