diff mbox series

[U-Boot,v3,11/11] mtd: sf: Make sf_mtd.c more robust

Message ID 20181119205955.32042-12-boris.brezillon@bootlin.com
State Superseded
Delegated to: Jagannadha Sutradharudu Teki
Headers show
Series mtd/sf: Various fixes | expand

Commit Message

Boris Brezillon Nov. 19, 2018, 8:59 p.m. UTC
SPI flash based MTD devs can be registered/unregistered at any time
through the sf probe command or the spi_flash_free() function.

This commit does not try to fix the root cause as it would probably
require rewriting most of the code and have an mtd_info object
instance per spi_flash object (not to mention that the the spi-flash
layer is likely to be replaced by a spi-nor layer ported from Linux).

Instead, we try to be as safe as can be by checking the code returned
by del_mtd_device() and complain loudly when there's nothing we can
do about the deregistration failure. When that happens we also reset
sf_mtd_info.priv to NULL, and check for NULL pointer in the mtd hooks
so that -ENODEV is returned instead of hitting a NULL pointer
dereference exception when the MTD instance is later accessed by a user.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
Changes in v3:
- New patch
---
 drivers/mtd/spi/sf_mtd.c | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

Comments

Heiko Schocher Nov. 21, 2018, 6:47 a.m. UTC | #1
Hello Boris,

Am 19.11.2018 um 21:59 schrieb Boris Brezillon:
> SPI flash based MTD devs can be registered/unregistered at any time
> through the sf probe command or the spi_flash_free() function.
> 
> This commit does not try to fix the root cause as it would probably
> require rewriting most of the code and have an mtd_info object
> instance per spi_flash object (not to mention that the the spi-flash
> layer is likely to be replaced by a spi-nor layer ported from Linux).
> 
> Instead, we try to be as safe as can be by checking the code returned
> by del_mtd_device() and complain loudly when there's nothing we can
> do about the deregistration failure. When that happens we also reset
> sf_mtd_info.priv to NULL, and check for NULL pointer in the mtd hooks
> so that -ENODEV is returned instead of hitting a NULL pointer
> dereference exception when the MTD instance is later accessed by a user.
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
> ---
> Changes in v3:
> - New patch
> ---
>   drivers/mtd/spi/sf_mtd.c | 39 ++++++++++++++++++++++++++++++++++++---
>   1 file changed, 36 insertions(+), 3 deletions(-)

Tested-by: Heiko Schocher <hs@denx.de>

bye,
Heiko
Jagan Teki Nov. 22, 2018, 7:10 a.m. UTC | #2
On Tue, Nov 20, 2018 at 2:33 AM Boris Brezillon
<boris.brezillon@bootlin.com> wrote:
>
> SPI flash based MTD devs can be registered/unregistered at any time
> through the sf probe command or the spi_flash_free() function.
>
> This commit does not try to fix the root cause as it would probably
> require rewriting most of the code and have an mtd_info object
> instance per spi_flash object (not to mention that the the spi-flash
> layer is likely to be replaced by a spi-nor layer ported from Linux).
>
> Instead, we try to be as safe as can be by checking the code returned
> by del_mtd_device() and complain loudly when there's nothing we can
> do about the deregistration failure. When that happens we also reset
> sf_mtd_info.priv to NULL, and check for NULL pointer in the mtd hooks
> so that -ENODEV is returned instead of hitting a NULL pointer
> dereference exception when the MTD instance is later accessed by a user.
>
> Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
> ---
> Changes in v3:
> - New patch
> ---
>  drivers/mtd/spi/sf_mtd.c | 39 ++++++++++++++++++++++++++++++++++++---
>  1 file changed, 36 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/mtd/spi/sf_mtd.c b/drivers/mtd/spi/sf_mtd.c
> index aabbc3589435..68c36002bee2 100644
> --- a/drivers/mtd/spi/sf_mtd.c
> +++ b/drivers/mtd/spi/sf_mtd.c
> @@ -18,6 +18,9 @@ static int spi_flash_mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
>         struct spi_flash *flash = mtd->priv;
>         int err;
>
> +       if (!flash)
> +               return -ENODEV;
> +
>         instr->state = MTD_ERASING;
>
>         err = spi_flash_erase(flash, instr->addr, instr->len);
> @@ -39,6 +42,9 @@ static int spi_flash_mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
>         struct spi_flash *flash = mtd->priv;
>         int err;
>
> +       if (!flash)
> +               return -ENODEV;
> +
>         err = spi_flash_read(flash, from, len, buf);
>         if (!err)
>                 *retlen = len;
> @@ -52,6 +58,9 @@ static int spi_flash_mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
>         struct spi_flash *flash = mtd->priv;
>         int err;
>
> +       if (!flash)
> +               return -ENODEV;
> +
>         err = spi_flash_write(flash, to, len, buf);
>         if (!err)
>                 *retlen = len;
> @@ -76,8 +85,13 @@ int spi_flash_mtd_register(struct spi_flash *flash)
>  {
>         int ret;
>
> -       if (sf_mtd_registered)
> -               del_mtd_device(&sf_mtd_info);
> +       if (sf_mtd_registered) {
> +               ret = del_mtd_device(&sf_mtd_info);
> +               if (ret)
> +                       return ret;
> +
> +               sf_mtd_registered = false;
> +       }
>
>         sf_mtd_registered = false;
>         memset(&sf_mtd_info, 0, sizeof(sf_mtd_info));
> @@ -110,5 +124,24 @@ int spi_flash_mtd_register(struct spi_flash *flash)
>
>  void spi_flash_mtd_unregister(void)
>  {
> -       del_mtd_device(&sf_mtd_info);
> +       int ret;
> +
> +       if (!sf_mtd_registered)
> +               return;
> +
> +       ret = del_mtd_device(&sf_mtd_info);
> +       if (!ret) {
> +               sf_mtd_registered = false;
> +               return;
> +       }
> +
> +       /*
> +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
> +        * the MTD layer can still call mtd hooks without risking a
> +        * use-after-free bug. Still, things should be fixed to prevent the
> +        * spi_flash object from being destroyed when del_mtd_device() fails.
> +        */
> +       sf_mtd_info.priv = NULL;
> +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
> +              sf_mtd_info.name);

Why do we need this print? can't we do the same thing in MTD core
itself, so-that it can be generic for all flash objects.
Boris Brezillon Nov. 22, 2018, 8:40 a.m. UTC | #3
On Thu, 22 Nov 2018 12:40:57 +0530
Jagan Teki <jagan@amarulasolutions.com> wrote:

> On Tue, Nov 20, 2018 at 2:33 AM Boris Brezillon
> <boris.brezillon@bootlin.com> wrote:
> >
> > SPI flash based MTD devs can be registered/unregistered at any time
> > through the sf probe command or the spi_flash_free() function.
> >
> > This commit does not try to fix the root cause as it would probably
> > require rewriting most of the code and have an mtd_info object
> > instance per spi_flash object (not to mention that the the spi-flash
> > layer is likely to be replaced by a spi-nor layer ported from Linux).
> >
> > Instead, we try to be as safe as can be by checking the code returned
> > by del_mtd_device() and complain loudly when there's nothing we can
> > do about the deregistration failure. When that happens we also reset
> > sf_mtd_info.priv to NULL, and check for NULL pointer in the mtd hooks
> > so that -ENODEV is returned instead of hitting a NULL pointer
> > dereference exception when the MTD instance is later accessed by a user.
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
> > ---
> > Changes in v3:
> > - New patch
> > ---
> >  drivers/mtd/spi/sf_mtd.c | 39 ++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 36 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/mtd/spi/sf_mtd.c b/drivers/mtd/spi/sf_mtd.c
> > index aabbc3589435..68c36002bee2 100644
> > --- a/drivers/mtd/spi/sf_mtd.c
> > +++ b/drivers/mtd/spi/sf_mtd.c
> > @@ -18,6 +18,9 @@ static int spi_flash_mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
> >         struct spi_flash *flash = mtd->priv;
> >         int err;
> >
> > +       if (!flash)
> > +               return -ENODEV;
> > +
> >         instr->state = MTD_ERASING;
> >
> >         err = spi_flash_erase(flash, instr->addr, instr->len);
> > @@ -39,6 +42,9 @@ static int spi_flash_mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
> >         struct spi_flash *flash = mtd->priv;
> >         int err;
> >
> > +       if (!flash)
> > +               return -ENODEV;
> > +
> >         err = spi_flash_read(flash, from, len, buf);
> >         if (!err)
> >                 *retlen = len;
> > @@ -52,6 +58,9 @@ static int spi_flash_mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
> >         struct spi_flash *flash = mtd->priv;
> >         int err;
> >
> > +       if (!flash)
> > +               return -ENODEV;
> > +
> >         err = spi_flash_write(flash, to, len, buf);
> >         if (!err)
> >                 *retlen = len;
> > @@ -76,8 +85,13 @@ int spi_flash_mtd_register(struct spi_flash *flash)
> >  {
> >         int ret;
> >
> > -       if (sf_mtd_registered)
> > -               del_mtd_device(&sf_mtd_info);
> > +       if (sf_mtd_registered) {
> > +               ret = del_mtd_device(&sf_mtd_info);
> > +               if (ret)
> > +                       return ret;
> > +
> > +               sf_mtd_registered = false;
> > +       }
> >
> >         sf_mtd_registered = false;
> >         memset(&sf_mtd_info, 0, sizeof(sf_mtd_info));
> > @@ -110,5 +124,24 @@ int spi_flash_mtd_register(struct spi_flash *flash)
> >
> >  void spi_flash_mtd_unregister(void)
> >  {
> > -       del_mtd_device(&sf_mtd_info);
> > +       int ret;
> > +
> > +       if (!sf_mtd_registered)
> > +               return;
> > +
> > +       ret = del_mtd_device(&sf_mtd_info);
> > +       if (!ret) {
> > +               sf_mtd_registered = false;
> > +               return;
> > +       }
> > +
> > +       /*
> > +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
> > +        * the MTD layer can still call mtd hooks without risking a
> > +        * use-after-free bug. Still, things should be fixed to prevent the
> > +        * spi_flash object from being destroyed when del_mtd_device() fails.
> > +        */
> > +       sf_mtd_info.priv = NULL;
> > +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
> > +              sf_mtd_info.name);  
> 
> Why do we need this print?

Yes we do, just to keep the user informed that something bad happened
and its spi-flash is no longer usable (at least through the MTD layer).

> can't we do the same thing in MTD core
> itself, so-that it can be generic for all flash objects.

del_mtd_device() can fail, so it's the caller responsibility to decide
what to do when that happens. Some users will propagate the error to
the upper layer and maybe cancel the device removal (AFAICT,
driver->remove() can return an error, not sure what happens in this
case though). For others, like spi-flash, the device will go away, and
all subsequent accesses will fail.
Boris Brezillon Nov. 26, 2018, 8:42 a.m. UTC | #4
Hi Jagan,

On Thu, 22 Nov 2018 09:40:56 +0100
Boris Brezillon <boris.brezillon@bootlin.com> wrote:

> > > +       /*
> > > +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
> > > +        * the MTD layer can still call mtd hooks without risking a
> > > +        * use-after-free bug. Still, things should be fixed to prevent the
> > > +        * spi_flash object from being destroyed when del_mtd_device() fails.
> > > +        */
> > > +       sf_mtd_info.priv = NULL;
> > > +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
> > > +              sf_mtd_info.name);    
> > 
> > Why do we need this print?  
> 
> Yes we do, just to keep the user informed that something bad happened
> and its spi-flash is no longer usable (at least through the MTD layer).
> 
> > can't we do the same thing in MTD core
> > itself, so-that it can be generic for all flash objects.  
> 
> del_mtd_device() can fail, so it's the caller responsibility to decide
> what to do when that happens. Some users will propagate the error to
> the upper layer and maybe cancel the device removal (AFAICT,
> driver->remove() can return an error, not sure what happens in this
> case though). For others, like spi-flash, the device will go away, and
> all subsequent accesses will fail.

I'm about to send a new version fixing the problem I mentioned in patch
3, but before doing that, I'd like to know if my answer convinced you or
if you'd still prefer this message to go away (or be placed in
mtdcore/mtdpart.c).

Regards,

Boris
Jagan Teki Nov. 26, 2018, 11:12 a.m. UTC | #5
On 26/11/18 2:12 PM, Boris Brezillon wrote:
> Hi Jagan,
> 
> On Thu, 22 Nov 2018 09:40:56 +0100
> Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> 
>>>> +       /*
>>>> +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
>>>> +        * the MTD layer can still call mtd hooks without risking a
>>>> +        * use-after-free bug. Still, things should be fixed to prevent the
>>>> +        * spi_flash object from being destroyed when del_mtd_device() fails.
>>>> +        */
>>>> +       sf_mtd_info.priv = NULL;
>>>> +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
>>>> +              sf_mtd_info.name);
>>>
>>> Why do we need this print?
>>
>> Yes we do, just to keep the user informed that something bad happened
>> and its spi-flash is no longer usable (at least through the MTD layer).
>>
>>> can't we do the same thing in MTD core
>>> itself, so-that it can be generic for all flash objects.
>>
>> del_mtd_device() can fail, so it's the caller responsibility to decide
>> what to do when that happens. Some users will propagate the error to
>> the upper layer and maybe cancel the device removal (AFAICT,
>> driver->remove() can return an error, not sure what happens in this
>> case though). For others, like spi-flash, the device will go away, and
>> all subsequent accesses will fail.
> 
> I'm about to send a new version fixing the problem I mentioned in patch
> 3, but before doing that, I'd like to know if my answer convinced you or
> if you'd still prefer this message to go away (or be placed in
> mtdcore/mtdpart.c).


I'm thinking of having the message still in MTD by showing which 
interface it would belongs, along with the details.
Boris Brezillon Nov. 26, 2018, 12:37 p.m. UTC | #6
On Mon, 26 Nov 2018 16:42:48 +0530
Jagan Teki <jagan@openedev.com> wrote:

> On 26/11/18 2:12 PM, Boris Brezillon wrote:
> > Hi Jagan,
> > 
> > On Thu, 22 Nov 2018 09:40:56 +0100
> > Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> >   
> >>>> +       /*
> >>>> +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
> >>>> +        * the MTD layer can still call mtd hooks without risking a
> >>>> +        * use-after-free bug. Still, things should be fixed to prevent the
> >>>> +        * spi_flash object from being destroyed when del_mtd_device() fails.
> >>>> +        */
> >>>> +       sf_mtd_info.priv = NULL;
> >>>> +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
> >>>> +              sf_mtd_info.name);  
> >>>
> >>> Why do we need this print?  
> >>
> >> Yes we do, just to keep the user informed that something bad happened
> >> and its spi-flash is no longer usable (at least through the MTD layer).
> >>  
> >>> can't we do the same thing in MTD core
> >>> itself, so-that it can be generic for all flash objects.  
> >>
> >> del_mtd_device() can fail, so it's the caller responsibility to decide
> >> what to do when that happens. Some users will propagate the error to
> >> the upper layer and maybe cancel the device removal (AFAICT,
> >> driver->remove() can return an error, not sure what happens in this
> >> case though). For others, like spi-flash, the device will go away, and
> >> all subsequent accesses will fail.  
> > 
> > I'm about to send a new version fixing the problem I mentioned in patch
> > 3, but before doing that, I'd like to know if my answer convinced you or
> > if you'd still prefer this message to go away (or be placed in
> > mtdcore/mtdpart.c).  
> 
> 
> I'm thinking of having the message still in MTD by showing which 
> interface it would belongs, along with the details.

Then we'd need something less
Boris Brezillon Nov. 26, 2018, 12:42 p.m. UTC | #7
On Mon, 26 Nov 2018 13:37:46 +0100
Boris Brezillon <boris.brezillon@bootlin.com> wrote:

> On Mon, 26 Nov 2018 16:42:48 +0530
> Jagan Teki <jagan@openedev.com> wrote:
> 
> > On 26/11/18 2:12 PM, Boris Brezillon wrote:  
> > > Hi Jagan,
> > > 
> > > On Thu, 22 Nov 2018 09:40:56 +0100
> > > Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> > >     
> > >>>> +       /*
> > >>>> +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
> > >>>> +        * the MTD layer can still call mtd hooks without risking a
> > >>>> +        * use-after-free bug. Still, things should be fixed to prevent the
> > >>>> +        * spi_flash object from being destroyed when del_mtd_device() fails.
> > >>>> +        */
> > >>>> +       sf_mtd_info.priv = NULL;
> > >>>> +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
> > >>>> +              sf_mtd_info.name);    
> > >>>
> > >>> Why do we need this print?    
> > >>
> > >> Yes we do, just to keep the user informed that something bad happened
> > >> and its spi-flash is no longer usable (at least through the MTD layer).
> > >>    
> > >>> can't we do the same thing in MTD core
> > >>> itself, so-that it can be generic for all flash objects.    
> > >>
> > >> del_mtd_device() can fail, so it's the caller responsibility to decide
> > >> what to do when that happens. Some users will propagate the error to
> > >> the upper layer and maybe cancel the device removal (AFAICT,
> > >> driver->remove() can return an error, not sure what happens in this
> > >> case though). For others, like spi-flash, the device will go away, and
> > >> all subsequent accesses will fail.    
> > > 
> > > I'm about to send a new version fixing the problem I mentioned in patch
> > > 3, but before doing that, I'd like to know if my answer convinced you or
> > > if you'd still prefer this message to go away (or be placed in
> > > mtdcore/mtdpart.c).    
> > 
> > 
> > I'm thinking of having the message still in MTD by showing which 
> > interface it would belongs, along with the details.  
> 
> Then we'd need something less 

Sorry, I inadvertently hit the send button :-/.

So, I was about to say that we need something less worrisome than the
message I added in the SF layer if we move it to the MTD layer because
some drivers might actually do the right thing when del_mtd_device()
returns an error. I keep thinking that putting an error message in
mtdcore.c is not the right thing to do, but if you insist, I'll add
one (maybe a debug()). In any case I'd like to keep the one we have
here, because in this specific case, there's simply nothing we can do
when MTD dev removal fails.
Jagan Teki Nov. 26, 2018, 1:05 p.m. UTC | #8
On 26/11/18 6:12 PM, Boris Brezillon wrote:
> On Mon, 26 Nov 2018 13:37:46 +0100
> Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> 
>> On Mon, 26 Nov 2018 16:42:48 +0530
>> Jagan Teki <jagan@openedev.com> wrote:
>>
>>> On 26/11/18 2:12 PM, Boris Brezillon wrote:
>>>> Hi Jagan,
>>>>
>>>> On Thu, 22 Nov 2018 09:40:56 +0100
>>>> Boris Brezillon <boris.brezillon@bootlin.com> wrote:
>>>>      
>>>>>>> +       /*
>>>>>>> +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
>>>>>>> +        * the MTD layer can still call mtd hooks without risking a
>>>>>>> +        * use-after-free bug. Still, things should be fixed to prevent the
>>>>>>> +        * spi_flash object from being destroyed when del_mtd_device() fails.
>>>>>>> +        */
>>>>>>> +       sf_mtd_info.priv = NULL;
>>>>>>> +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
>>>>>>> +              sf_mtd_info.name);
>>>>>>
>>>>>> Why do we need this print?
>>>>>
>>>>> Yes we do, just to keep the user informed that something bad happened
>>>>> and its spi-flash is no longer usable (at least through the MTD layer).
>>>>>     
>>>>>> can't we do the same thing in MTD core
>>>>>> itself, so-that it can be generic for all flash objects.
>>>>>
>>>>> del_mtd_device() can fail, so it's the caller responsibility to decide
>>>>> what to do when that happens. Some users will propagate the error to
>>>>> the upper layer and maybe cancel the device removal (AFAICT,
>>>>> driver->remove() can return an error, not sure what happens in this
>>>>> case though). For others, like spi-flash, the device will go away, and
>>>>> all subsequent accesses will fail.
>>>>
>>>> I'm about to send a new version fixing the problem I mentioned in patch
>>>> 3, but before doing that, I'd like to know if my answer convinced you or
>>>> if you'd still prefer this message to go away (or be placed in
>>>> mtdcore/mtdpart.c).
>>>
>>>
>>> I'm thinking of having the message still in MTD by showing which
>>> interface it would belongs, along with the details.
>>
>> Then we'd need something less
> 
> Sorry, I inadvertently hit the send button :-/.
> 
> So, I was about to say that we need something less worrisome than the
> message I added in the SF layer if we move it to the MTD layer because
> some drivers might actually do the right thing when del_mtd_device()
> returns an error. I keep thinking that putting an error message in
> mtdcore.c is not the right thing to do, but if you insist, I'll add
> one (maybe a debug()). In any case I'd like to keep the one we have
> here, because in this specific case, there's simply nothing we can do
> when MTD dev removal fails.
> 

Look like other flashes were deleting only via mtd_partitions. how would 
they know and does it not need for them to print the same information?
Miquel Raynal Nov. 26, 2018, 1:25 p.m. UTC | #9
Hi Jagan,

Jagan Teki <jagan@openedev.com> wrote on Mon, 26 Nov 2018 18:35:01
+0530:

> On 26/11/18 6:12 PM, Boris Brezillon wrote:
> > On Mon, 26 Nov 2018 13:37:46 +0100
> > Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> > 
> >> On Mon, 26 Nov 2018 16:42:48 +0530
> >> Jagan Teki <jagan@openedev.com> wrote:
> >>
> >>> On 26/11/18 2:12 PM, Boris Brezillon wrote:
> >>>> Hi Jagan,
> >>>>
> >>>> On Thu, 22 Nov 2018 09:40:56 +0100
> >>>> Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> >>>>      >>>>>>> +       /*
> >>>>>>> +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
> >>>>>>> +        * the MTD layer can still call mtd hooks without risking a
> >>>>>>> +        * use-after-free bug. Still, things should be fixed to prevent the
> >>>>>>> +        * spi_flash object from being destroyed when del_mtd_device() fails.
> >>>>>>> +        */
> >>>>>>> +       sf_mtd_info.priv = NULL;
> >>>>>>> +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
> >>>>>>> +              sf_mtd_info.name);
> >>>>>>
> >>>>>> Why do we need this print?
> >>>>>
> >>>>> Yes we do, just to keep the user informed that something bad happened
> >>>>> and its spi-flash is no longer usable (at least through the MTD layer).
> >>>>>     >>>>>> can't we do the same thing in MTD core
> >>>>>> itself, so-that it can be generic for all flash objects.
> >>>>>
> >>>>> del_mtd_device() can fail, so it's the caller responsibility to decide
> >>>>> what to do when that happens. Some users will propagate the error to
> >>>>> the upper layer and maybe cancel the device removal (AFAICT,
> >>>>> driver->remove() can return an error, not sure what happens in this
> >>>>> case though). For others, like spi-flash, the device will go away, and
> >>>>> all subsequent accesses will fail.
> >>>>
> >>>> I'm about to send a new version fixing the problem I mentioned in patch
> >>>> 3, but before doing that, I'd like to know if my answer convinced you or
> >>>> if you'd still prefer this message to go away (or be placed in
> >>>> mtdcore/mtdpart.c).
> >>>
> >>>
> >>> I'm thinking of having the message still in MTD by showing which
> >>> interface it would belongs, along with the details.
> >>
> >> Then we'd need something less
> > 
> > Sorry, I inadvertently hit the send button :-/.
> > 
> > So, I was about to say that we need something less worrisome than the
> > message I added in the SF layer if we move it to the MTD layer because
> > some drivers might actually do the right thing when del_mtd_device()
> > returns an error. I keep thinking that putting an error message in
> > mtdcore.c is not the right thing to do, but if you insist, I'll add
> > one (maybe a debug()). In any case I'd like to keep the one we have
> > here, because in this specific case, there's simply nothing we can do
> > when MTD dev removal fails.
> > 
> 
> Look like other flashes were deleting only via mtd_partitions. how would they know and does it not need for them to print the same information?

Do you see that sf does not use MTD in a consistent manner? The whole
point of this commit is to handle sf's lightnesses in a "more robust"
manner. This warning belongs to sf and not to the mtdcore, because
as stated in the comment above, we should "prevent the spi_flash
object from being destroyed when del_mtd_device() fails". If you don't
want such warning in sf, I think the best thing to do is to fix the
root issue.


Thanks,
Miquèl
Boris Brezillon Nov. 27, 2018, 12:36 p.m. UTC | #10
On Mon, 26 Nov 2018 14:25:44 +0100
Miquel Raynal <miquel.raynal@bootlin.com> wrote:

> Hi Jagan,
> 
> Jagan Teki <jagan@openedev.com> wrote on Mon, 26 Nov 2018 18:35:01
> +0530:
> 
> > On 26/11/18 6:12 PM, Boris Brezillon wrote:  
> > > On Mon, 26 Nov 2018 13:37:46 +0100
> > > Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> > >   
> > >> On Mon, 26 Nov 2018 16:42:48 +0530
> > >> Jagan Teki <jagan@openedev.com> wrote:
> > >>  
> > >>> On 26/11/18 2:12 PM, Boris Brezillon wrote:  
> > >>>> Hi Jagan,
> > >>>>
> > >>>> On Thu, 22 Nov 2018 09:40:56 +0100
> > >>>> Boris Brezillon <boris.brezillon@bootlin.com> wrote:  
> > >>>>      >>>>>>> +       /*  
> > >>>>>>> +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
> > >>>>>>> +        * the MTD layer can still call mtd hooks without risking a
> > >>>>>>> +        * use-after-free bug. Still, things should be fixed to prevent the
> > >>>>>>> +        * spi_flash object from being destroyed when del_mtd_device() fails.
> > >>>>>>> +        */
> > >>>>>>> +       sf_mtd_info.priv = NULL;
> > >>>>>>> +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
> > >>>>>>> +              sf_mtd_info.name);  
> > >>>>>>
> > >>>>>> Why do we need this print?  
> > >>>>>
> > >>>>> Yes we do, just to keep the user informed that something bad happened
> > >>>>> and its spi-flash is no longer usable (at least through the MTD layer).  
> > >>>>>     >>>>>> can't we do the same thing in MTD core  
> > >>>>>> itself, so-that it can be generic for all flash objects.  
> > >>>>>
> > >>>>> del_mtd_device() can fail, so it's the caller responsibility to decide
> > >>>>> what to do when that happens. Some users will propagate the error to
> > >>>>> the upper layer and maybe cancel the device removal (AFAICT,
> > >>>>> driver->remove() can return an error, not sure what happens in this
> > >>>>> case though). For others, like spi-flash, the device will go away, and
> > >>>>> all subsequent accesses will fail.  
> > >>>>
> > >>>> I'm about to send a new version fixing the problem I mentioned in patch
> > >>>> 3, but before doing that, I'd like to know if my answer convinced you or
> > >>>> if you'd still prefer this message to go away (or be placed in
> > >>>> mtdcore/mtdpart.c).  
> > >>>
> > >>>
> > >>> I'm thinking of having the message still in MTD by showing which
> > >>> interface it would belongs, along with the details.  
> > >>
> > >> Then we'd need something less  
> > > 
> > > Sorry, I inadvertently hit the send button :-/.
> > > 
> > > So, I was about to say that we need something less worrisome than the
> > > message I added in the SF layer if we move it to the MTD layer because
> > > some drivers might actually do the right thing when del_mtd_device()
> > > returns an error. I keep thinking that putting an error message in
> > > mtdcore.c is not the right thing to do, but if you insist, I'll add
> > > one (maybe a debug()). In any case I'd like to keep the one we have
> > > here, because in this specific case, there's simply nothing we can do
> > > when MTD dev removal fails.
> > >   
> > 
> > Look like other flashes were deleting only via mtd_partitions. how would they know and does it not need for them to print the same information?  
> 
> Do you see that sf does not use MTD in a consistent manner? The whole
> point of this commit is to handle sf's lightnesses in a "more robust"
> manner. This warning belongs to sf and not to the mtdcore, because
> as stated in the comment above, we should "prevent the spi_flash
> object from being destroyed when del_mtd_device() fails". If you don't
> want such warning in sf, I think the best thing to do is to fix the
> root issue.

Jagan, are you okay with this suggestion (add a debug() message in the
del_mtd_device() path and keep the one we already have in sf_mtd.c)?
Jagan Teki Nov. 27, 2018, 3:44 p.m. UTC | #11
On Tue, Nov 27, 2018 at 6:06 PM Boris Brezillon
<boris.brezillon@bootlin.com> wrote:
>
> On Mon, 26 Nov 2018 14:25:44 +0100
> Miquel Raynal <miquel.raynal@bootlin.com> wrote:
>
> > Hi Jagan,
> >
> > Jagan Teki <jagan@openedev.com> wrote on Mon, 26 Nov 2018 18:35:01
> > +0530:
> >
> > > On 26/11/18 6:12 PM, Boris Brezillon wrote:
> > > > On Mon, 26 Nov 2018 13:37:46 +0100
> > > > Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> > > >
> > > >> On Mon, 26 Nov 2018 16:42:48 +0530
> > > >> Jagan Teki <jagan@openedev.com> wrote:
> > > >>
> > > >>> On 26/11/18 2:12 PM, Boris Brezillon wrote:
> > > >>>> Hi Jagan,
> > > >>>>
> > > >>>> On Thu, 22 Nov 2018 09:40:56 +0100
> > > >>>> Boris Brezillon <boris.brezillon@bootlin.com> wrote:
> > > >>>>      >>>>>>> +       /*
> > > >>>>>>> +        * Setting mtd->priv to NULL is the best we can do. Thanks to that,
> > > >>>>>>> +        * the MTD layer can still call mtd hooks without risking a
> > > >>>>>>> +        * use-after-free bug. Still, things should be fixed to prevent the
> > > >>>>>>> +        * spi_flash object from being destroyed when del_mtd_device() fails.
> > > >>>>>>> +        */
> > > >>>>>>> +       sf_mtd_info.priv = NULL;
> > > >>>>>>> +       printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
> > > >>>>>>> +              sf_mtd_info.name);
> > > >>>>>>
> > > >>>>>> Why do we need this print?
> > > >>>>>
> > > >>>>> Yes we do, just to keep the user informed that something bad happened
> > > >>>>> and its spi-flash is no longer usable (at least through the MTD layer).
> > > >>>>>     >>>>>> can't we do the same thing in MTD core
> > > >>>>>> itself, so-that it can be generic for all flash objects.
> > > >>>>>
> > > >>>>> del_mtd_device() can fail, so it's the caller responsibility to decide
> > > >>>>> what to do when that happens. Some users will propagate the error to
> > > >>>>> the upper layer and maybe cancel the device removal (AFAICT,
> > > >>>>> driver->remove() can return an error, not sure what happens in this
> > > >>>>> case though). For others, like spi-flash, the device will go away, and
> > > >>>>> all subsequent accesses will fail.
> > > >>>>
> > > >>>> I'm about to send a new version fixing the problem I mentioned in patch
> > > >>>> 3, but before doing that, I'd like to know if my answer convinced you or
> > > >>>> if you'd still prefer this message to go away (or be placed in
> > > >>>> mtdcore/mtdpart.c).
> > > >>>
> > > >>>
> > > >>> I'm thinking of having the message still in MTD by showing which
> > > >>> interface it would belongs, along with the details.
> > > >>
> > > >> Then we'd need something less
> > > >
> > > > Sorry, I inadvertently hit the send button :-/.
> > > >
> > > > So, I was about to say that we need something less worrisome than the
> > > > message I added in the SF layer if we move it to the MTD layer because
> > > > some drivers might actually do the right thing when del_mtd_device()
> > > > returns an error. I keep thinking that putting an error message in
> > > > mtdcore.c is not the right thing to do, but if you insist, I'll add
> > > > one (maybe a debug()). In any case I'd like to keep the one we have
> > > > here, because in this specific case, there's simply nothing we can do
> > > > when MTD dev removal fails.
> > > >
> > >
> > > Look like other flashes were deleting only via mtd_partitions. how would they know and does it not need for them to print the same information?
> >
> > Do you see that sf does not use MTD in a consistent manner? The whole
> > point of this commit is to handle sf's lightnesses in a "more robust"
> > manner. This warning belongs to sf and not to the mtdcore, because
> > as stated in the comment above, we should "prevent the spi_flash
> > object from being destroyed when del_mtd_device() fails". If you don't
> > want such warning in sf, I think the best thing to do is to fix the
> > root issue.
>
> Jagan, are you okay with this suggestion (add a debug() message in the
> del_mtd_device() path and keep the one we already have in sf_mtd.c)?

Please proceed.
diff mbox series

Patch

diff --git a/drivers/mtd/spi/sf_mtd.c b/drivers/mtd/spi/sf_mtd.c
index aabbc3589435..68c36002bee2 100644
--- a/drivers/mtd/spi/sf_mtd.c
+++ b/drivers/mtd/spi/sf_mtd.c
@@ -18,6 +18,9 @@  static int spi_flash_mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
 	struct spi_flash *flash = mtd->priv;
 	int err;
 
+	if (!flash)
+		return -ENODEV;
+
 	instr->state = MTD_ERASING;
 
 	err = spi_flash_erase(flash, instr->addr, instr->len);
@@ -39,6 +42,9 @@  static int spi_flash_mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
 	struct spi_flash *flash = mtd->priv;
 	int err;
 
+	if (!flash)
+		return -ENODEV;
+
 	err = spi_flash_read(flash, from, len, buf);
 	if (!err)
 		*retlen = len;
@@ -52,6 +58,9 @@  static int spi_flash_mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 	struct spi_flash *flash = mtd->priv;
 	int err;
 
+	if (!flash)
+		return -ENODEV;
+
 	err = spi_flash_write(flash, to, len, buf);
 	if (!err)
 		*retlen = len;
@@ -76,8 +85,13 @@  int spi_flash_mtd_register(struct spi_flash *flash)
 {
 	int ret;
 
-	if (sf_mtd_registered)
-		del_mtd_device(&sf_mtd_info);
+	if (sf_mtd_registered) {
+		ret = del_mtd_device(&sf_mtd_info);
+		if (ret)
+			return ret;
+
+		sf_mtd_registered = false;
+	}
 
 	sf_mtd_registered = false;
 	memset(&sf_mtd_info, 0, sizeof(sf_mtd_info));
@@ -110,5 +124,24 @@  int spi_flash_mtd_register(struct spi_flash *flash)
 
 void spi_flash_mtd_unregister(void)
 {
-	del_mtd_device(&sf_mtd_info);
+	int ret;
+
+	if (!sf_mtd_registered)
+		return;
+
+	ret = del_mtd_device(&sf_mtd_info);
+	if (!ret) {
+		sf_mtd_registered = false;
+		return;
+	}
+
+	/*
+	 * Setting mtd->priv to NULL is the best we can do. Thanks to that,
+	 * the MTD layer can still call mtd hooks without risking a
+	 * use-after-free bug. Still, things should be fixed to prevent the
+	 * spi_flash object from being destroyed when del_mtd_device() fails.
+	 */
+	sf_mtd_info.priv = NULL;
+	printf("Failed to unregister MTD %s and the spi_flash object is going away: you're in deep trouble!",
+	       sf_mtd_info.name);
 }