Message ID | 20201127130731.99270-1-nixiaoming@huawei.com |
---|---|
State | Rejected |
Delegated to: | Vignesh R |
Headers | show |
Series | mtd:cfi_cmdset_0002: fix atomic sleep bug when CONFIG_MTD_XIP=y | expand |
ping On 2020/11/27 21:07, Xiaoming Ni wrote: > When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable(). > To avoid sleep in interrupt context, we need to call local_irq_enable() > before schedule(). > > The problem call stack is as follows: > bug1: > do_write_oneword_retry() > xip_disable() > local_irq_disable() > do_write_oneword_once() > schedule() > bug2: > do_write_buffer() > xip_disable() > local_irq_disable() > do_write_buffer_wait() > schedule() > bug3: > do_erase_chip() > xip_disable() > local_irq_disable() > schedule() > bug4: > do_erase_oneblock() > xip_disable() > local_irq_disable() > schedule() > > Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.") > Cc: stable@vger.kernel.org # v2.6.13 > Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com> > --- > drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++ > 1 file changed, 16 insertions(+) > > diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c > index a1f3e1031c3d..12c3776f093a 100644 > --- a/drivers/mtd/chips/cfi_cmdset_0002.c > +++ b/drivers/mtd/chips/cfi_cmdset_0002.c > @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map, > set_current_state(TASK_UNINTERRUPTIBLE); > add_wait_queue(&chip->wq, &wait); > mutex_unlock(&chip->mutex); > + if (IS_ENABLED(CONFIG_MTD_XIP)) > + local_irq_enable(); > schedule(); > + if (IS_ENABLED(CONFIG_MTD_XIP)) > + local_irq_disable(); > remove_wait_queue(&chip->wq, &wait); > timeo = jiffies + (HZ / 2); /* FIXME */ > mutex_lock(&chip->mutex); > @@ -1962,7 +1966,11 @@ static int __xipram do_write_buffer_wait(struct map_info *map, > set_current_state(TASK_UNINTERRUPTIBLE); > add_wait_queue(&chip->wq, &wait); > mutex_unlock(&chip->mutex); > + if (IS_ENABLED(CONFIG_MTD_XIP)) > + local_irq_enable(); > schedule(); > + if (IS_ENABLED(CONFIG_MTD_XIP)) > + local_irq_disable(); > remove_wait_queue(&chip->wq, &wait); > timeo = jiffies + (HZ / 2); /* FIXME */ > mutex_lock(&chip->mutex); > @@ -2461,7 +2469,11 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) > set_current_state(TASK_UNINTERRUPTIBLE); > add_wait_queue(&chip->wq, &wait); > mutex_unlock(&chip->mutex); > + if (IS_ENABLED(CONFIG_MTD_XIP)) > + local_irq_enable(); > schedule(); > + if (IS_ENABLED(CONFIG_MTD_XIP)) > + local_irq_disable(); > remove_wait_queue(&chip->wq, &wait); > mutex_lock(&chip->mutex); > continue; > @@ -2560,7 +2572,11 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, > set_current_state(TASK_UNINTERRUPTIBLE); > add_wait_queue(&chip->wq, &wait); > mutex_unlock(&chip->mutex); > + if (IS_ENABLED(CONFIG_MTD_XIP)) > + local_irq_enable(); > schedule(); > + if (IS_ENABLED(CONFIG_MTD_XIP)) > + local_irq_disable(); > remove_wait_queue(&chip->wq, &wait); > mutex_lock(&chip->mutex); > continue; >
Hi Xiaoming, Xiaoming Ni <nixiaoming@huawei.com> wrote on Mon, 7 Dec 2020 18:48:33 +0800: > ping > > On 2020/11/27 21:07, Xiaoming Ni wrote: > > When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable(). > > To avoid sleep in interrupt context, we need to call local_irq_enable() > > before schedule(). > > > > The problem call stack is as follows: > > bug1: > > do_write_oneword_retry() > > xip_disable() > > local_irq_disable() > > do_write_oneword_once() > > schedule() > > bug2: > > do_write_buffer() > > xip_disable() > > local_irq_disable() > > do_write_buffer_wait() > > schedule() > > bug3: > > do_erase_chip() > > xip_disable() > > local_irq_disable() > > schedule() > > bug4: > > do_erase_oneblock() > > xip_disable() > > local_irq_disable() > > schedule() > > > > Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.") > > Cc: stable@vger.kernel.org # v2.6.13 > > Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com> > > --- > > drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++ > > 1 file changed, 16 insertions(+) > > > > diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c > > index a1f3e1031c3d..12c3776f093a 100644 > > --- a/drivers/mtd/chips/cfi_cmdset_0002.c > > +++ b/drivers/mtd/chips/cfi_cmdset_0002.c > > @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map, > > set_current_state(TASK_UNINTERRUPTIBLE); > > add_wait_queue(&chip->wq, &wait); > > mutex_unlock(&chip->mutex); > > + if (IS_ENABLED(CONFIG_MTD_XIP)) > > + local_irq_enable(); > > schedule(); > > + if (IS_ENABLED(CONFIG_MTD_XIP)) > > + local_irq_disable(); The fix really seems strange to me. I will let Vignesh decide but I think we should consider updating/fixing xip_disable instead. > > remove_wait_queue(&chip->wq, &wait); > > timeo = jiffies + (HZ / 2); /* FIXME */ > > mutex_lock(&chip->mutex); > > @@ -1962,7 +1966,11 @@ static int __xipram do_write_buffer_wait(struct map_info *map, > > set_current_state(TASK_UNINTERRUPTIBLE); > > add_wait_queue(&chip->wq, &wait); > > mutex_unlock(&chip->mutex); > > + if (IS_ENABLED(CONFIG_MTD_XIP)) > > + local_irq_enable(); > > schedule(); > > + if (IS_ENABLED(CONFIG_MTD_XIP)) > > + local_irq_disable(); > > remove_wait_queue(&chip->wq, &wait); > > timeo = jiffies + (HZ / 2); /* FIXME */ > > mutex_lock(&chip->mutex); > > @@ -2461,7 +2469,11 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) > > set_current_state(TASK_UNINTERRUPTIBLE); > > add_wait_queue(&chip->wq, &wait); > > mutex_unlock(&chip->mutex); > > + if (IS_ENABLED(CONFIG_MTD_XIP)) > > + local_irq_enable(); > > schedule(); > > + if (IS_ENABLED(CONFIG_MTD_XIP)) > > + local_irq_disable(); > > remove_wait_queue(&chip->wq, &wait); > > mutex_lock(&chip->mutex); > > continue; > > @@ -2560,7 +2572,11 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, > > set_current_state(TASK_UNINTERRUPTIBLE); > > add_wait_queue(&chip->wq, &wait); > > mutex_unlock(&chip->mutex); > > + if (IS_ENABLED(CONFIG_MTD_XIP)) > > + local_irq_enable(); > > schedule(); > > + if (IS_ENABLED(CONFIG_MTD_XIP)) > > + local_irq_disable(); > > remove_wait_queue(&chip->wq, &wait); > > mutex_lock(&chip->mutex); > > continue; > > > Thanks, Miquèl
Hi Xiaoming, On 12/7/20 4:23 PM, Miquel Raynal wrote: > Hi Xiaoming, > > Xiaoming Ni <nixiaoming@huawei.com> wrote on Mon, 7 Dec 2020 18:48:33 > +0800: > >> ping >> >> On 2020/11/27 21:07, Xiaoming Ni wrote: >>> When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable(). >>> To avoid sleep in interrupt context, we need to call local_irq_enable() >>> before schedule(). >>> >>> The problem call stack is as follows: >>> bug1: >>> do_write_oneword_retry() >>> xip_disable() >>> local_irq_disable() >>> do_write_oneword_once() >>> schedule() >>> bug2: >>> do_write_buffer() >>> xip_disable() >>> local_irq_disable() >>> do_write_buffer_wait() >>> schedule() >>> bug3: >>> do_erase_chip() >>> xip_disable() >>> local_irq_disable() >>> schedule() >>> bug4: >>> do_erase_oneblock() >>> xip_disable() >>> local_irq_disable() >>> schedule() >>> >>> Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.") >>> Cc: stable@vger.kernel.org # v2.6.13 >>> Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com> >>> --- >>> drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++ >>> 1 file changed, 16 insertions(+) >>> >>> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c >>> index a1f3e1031c3d..12c3776f093a 100644 >>> --- a/drivers/mtd/chips/cfi_cmdset_0002.c >>> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c >>> @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map, >>> set_current_state(TASK_UNINTERRUPTIBLE); >>> add_wait_queue(&chip->wq, &wait); >>> mutex_unlock(&chip->mutex); >>> + if (IS_ENABLED(CONFIG_MTD_XIP)) >>> + local_irq_enable(); >>> schedule(); >>> + if (IS_ENABLED(CONFIG_MTD_XIP)) >>> + local_irq_disable(); > > The fix really seems strange to me. I will let Vignesh decide but I > think we should consider updating/fixing xip_disable instead. Agree with Miquel. Have you done any testing or is this purely based on code inspection? What about comment before xip_disable() function: /* * No interrupt what so ever can be serviced while the flash isn't in array * mode. This is ensured by the xip_disable() and xip_enable() functions * enclosing any code path where the flash is known not to be in array mode. * And within a XIP disabled code path, only functions marked with __xipram * may be called and nothing else (it's a good thing to inspect generated * assembly to make sure inline functions were actually inlined and that gcc * didn't emit calls to its own support functions). Also configuring MTD CFI * support to a single buswidth and a single interleave is also recommended. */ So, I don't think the fix is as simple as this patch. Regards Vignesh
On 2020/12/8 2:59, Vignesh Raghavendra wrote: > Hi Xiaoming, > > On 12/7/20 4:23 PM, Miquel Raynal wrote: >> Hi Xiaoming, >> >> Xiaoming Ni <nixiaoming@huawei.com> wrote on Mon, 7 Dec 2020 18:48:33 >> +0800: >> >>> ping >>> >>> On 2020/11/27 21:07, Xiaoming Ni wrote: >>>> When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable(). >>>> To avoid sleep in interrupt context, we need to call local_irq_enable() >>>> before schedule(). >>>> >>>> The problem call stack is as follows: >>>> bug1: >>>> do_write_oneword_retry() >>>> xip_disable() >>>> local_irq_disable() >>>> do_write_oneword_once() >>>> schedule() >>>> bug2: >>>> do_write_buffer() >>>> xip_disable() >>>> local_irq_disable() >>>> do_write_buffer_wait() >>>> schedule() >>>> bug3: >>>> do_erase_chip() >>>> xip_disable() >>>> local_irq_disable() >>>> schedule() >>>> bug4: >>>> do_erase_oneblock() >>>> xip_disable() >>>> local_irq_disable() >>>> schedule() >>>> >>>> Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.") >>>> Cc: stable@vger.kernel.org # v2.6.13 >>>> Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com> >>>> --- >>>> drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++ >>>> 1 file changed, 16 insertions(+) >>>> >>>> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c >>>> index a1f3e1031c3d..12c3776f093a 100644 >>>> --- a/drivers/mtd/chips/cfi_cmdset_0002.c >>>> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c >>>> @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map, >>>> set_current_state(TASK_UNINTERRUPTIBLE); >>>> add_wait_queue(&chip->wq, &wait); >>>> mutex_unlock(&chip->mutex); >>>> + if (IS_ENABLED(CONFIG_MTD_XIP)) >>>> + local_irq_enable(); >>>> schedule(); >>>> + if (IS_ENABLED(CONFIG_MTD_XIP)) >>>> + local_irq_disable(); >> >> The fix really seems strange to me. I will let Vignesh decide but I >> think we should consider updating/fixing xip_disable instead. > > Agree with Miquel. Have you done any testing > or is this purely based on code inspection? > I don't have the corresponding device test environment. I found the problem through code review. > What about comment before xip_disable() function: > > /* > * No interrupt what so ever can be serviced while the flash isn't in array > * mode. This is ensured by the xip_disable() and xip_enable() functions > * enclosing any code path where the flash is known not to be in array mode. > * And within a XIP disabled code path, only functions marked with __xipram > * may be called and nothing else (it's a good thing to inspect generated > * assembly to make sure inline functions were actually inlined and that gcc > * didn't emit calls to its own support functions). Also configuring MTD CFI > * support to a single buswidth and a single interleave is also recommended. > */ > > So, I don't think the fix is as simple as this patch. > +xip_enable(); schedule(); +xip_disable(); Do I need to change it to this? > Regards > Vignesh > . > Thanks Xiaoming Ni
On 12/8/20 6:53 AM, Xiaoming Ni wrote: > On 2020/12/8 2:59, Vignesh Raghavendra wrote: >> Hi Xiaoming, >> [...] >>>>> --- >>>>> drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++ >>>>> 1 file changed, 16 insertions(+) >>>>> >>>>> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c >>>>> b/drivers/mtd/chips/cfi_cmdset_0002.c >>>>> index a1f3e1031c3d..12c3776f093a 100644 >>>>> --- a/drivers/mtd/chips/cfi_cmdset_0002.c >>>>> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c >>>>> @@ -1682,7 +1682,11 @@ static int __xipram >>>>> do_write_oneword_once(struct map_info *map, >>>>> set_current_state(TASK_UNINTERRUPTIBLE); >>>>> add_wait_queue(&chip->wq, &wait); >>>>> mutex_unlock(&chip->mutex); >>>>> + if (IS_ENABLED(CONFIG_MTD_XIP)) >>>>> + local_irq_enable(); >>>>> schedule(); >>>>> + if (IS_ENABLED(CONFIG_MTD_XIP)) >>>>> + local_irq_disable(); >>> >>> The fix really seems strange to me. I will let Vignesh decide but I >>> think we should consider updating/fixing xip_disable instead. >> >> Agree with Miquel. Have you done any testing >> or is this purely based on code inspection? >> > I don't have the corresponding device test environment. > I found the problem through code review. > Sorry, I am not comfortable applying this patch without proper testing that given the unknowns and legacy nature of the code. > >> What about comment before xip_disable() function: >> >> /* >> * No interrupt what so ever can be serviced while the flash isn't in >> array >> * mode. This is ensured by the xip_disable() and xip_enable() >> functions >> * enclosing any code path where the flash is known not to be in >> array mode. >> * And within a XIP disabled code path, only functions marked with >> __xipram >> * may be called and nothing else (it's a good thing to inspect >> generated >> * assembly to make sure inline functions were actually inlined and >> that gcc >> * didn't emit calls to its own support functions). Also configuring >> MTD CFI >> * support to a single buswidth and a single interleave is also >> recommended. >> */ >> >> So, I don't think the fix is as simple as this patch. >> > +xip_enable(); > schedule(); > +xip_disable(); > > Do I need to change it to this? > This just narrows the window, but an IRQ is still possible just after xip_enable() but before schedule(). Regards Vignesh
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index a1f3e1031c3d..12c3776f093a 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map, set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); mutex_unlock(&chip->mutex); + if (IS_ENABLED(CONFIG_MTD_XIP)) + local_irq_enable(); schedule(); + if (IS_ENABLED(CONFIG_MTD_XIP)) + local_irq_disable(); remove_wait_queue(&chip->wq, &wait); timeo = jiffies + (HZ / 2); /* FIXME */ mutex_lock(&chip->mutex); @@ -1962,7 +1966,11 @@ static int __xipram do_write_buffer_wait(struct map_info *map, set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); mutex_unlock(&chip->mutex); + if (IS_ENABLED(CONFIG_MTD_XIP)) + local_irq_enable(); schedule(); + if (IS_ENABLED(CONFIG_MTD_XIP)) + local_irq_disable(); remove_wait_queue(&chip->wq, &wait); timeo = jiffies + (HZ / 2); /* FIXME */ mutex_lock(&chip->mutex); @@ -2461,7 +2469,11 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); mutex_unlock(&chip->mutex); + if (IS_ENABLED(CONFIG_MTD_XIP)) + local_irq_enable(); schedule(); + if (IS_ENABLED(CONFIG_MTD_XIP)) + local_irq_disable(); remove_wait_queue(&chip->wq, &wait); mutex_lock(&chip->mutex); continue; @@ -2560,7 +2572,11 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&chip->wq, &wait); mutex_unlock(&chip->mutex); + if (IS_ENABLED(CONFIG_MTD_XIP)) + local_irq_enable(); schedule(); + if (IS_ENABLED(CONFIG_MTD_XIP)) + local_irq_disable(); remove_wait_queue(&chip->wq, &wait); mutex_lock(&chip->mutex); continue;
When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable(). To avoid sleep in interrupt context, we need to call local_irq_enable() before schedule(). The problem call stack is as follows: bug1: do_write_oneword_retry() xip_disable() local_irq_disable() do_write_oneword_once() schedule() bug2: do_write_buffer() xip_disable() local_irq_disable() do_write_buffer_wait() schedule() bug3: do_erase_chip() xip_disable() local_irq_disable() schedule() bug4: do_erase_oneblock() xip_disable() local_irq_disable() schedule() Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.") Cc: stable@vger.kernel.org # v2.6.13 Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com> --- drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)