diff mbox

[OpenWrt-Devel] b53 switch driver memory leak and reset gpio pin initialization fix

Message ID 1434132971-5931-1-git-send-email-blmink@mink.su
State Changes Requested
Delegated to: Jonas Gorski
Headers show

Commit Message

Fedor Konstantinov June 12, 2015, 6:16 p.m. UTC
Memory and switch reset gpio pin must be allocated on switch/module init and freed on removal. At least they should not be allocated 2 or more times in a row.

Signed-off-by: Fedor Konstantinov <blmink@mink.su>
---
Comments:

Following cmd sequence calls b53_switch_init() twice, so causes leak of memory.
Last ifconfig will fail also on targets which support switch reset gpio pin, because devm_gpio_request_one() will be called twice in a row.
ifconfig eth0 up
ifconfig eth0 down
ifconfig eth0 up

mmap/spi/srab drivers were not tested yet because I don't have such hardware.
---
 .../generic/files/drivers/net/phy/b53/b53_common.c | 19 +++++++++++++++
 .../generic/files/drivers/net/phy/b53/b53_mdio.c   |  6 +++++
 .../generic/files/drivers/net/phy/b53/b53_mmap.c   | 28 +++++++++++++++++-----
 .../generic/files/drivers/net/phy/b53/b53_priv.h   |  7 +++---
 .../generic/files/drivers/net/phy/b53/b53_spi.c    | 20 ++++++++++++----
 .../generic/files/drivers/net/phy/b53/b53_srab.c   | 28 +++++++++++++++++-----
 6 files changed, 88 insertions(+), 20 deletions(-)

Comments

Jonas Gorski June 12, 2015, 10:57 p.m. UTC | #1
Hi,

On Fri, Jun 12, 2015 at 8:16 PM, Fedor Konstantinov <blmink@mink.su> wrote:
> Memory and switch reset gpio pin must be allocated on switch/module init and freed on removal. At least they should not be allocated 2 or more times in a row.
>
> Signed-off-by: Fedor Konstantinov <blmink@mink.su>
> ---
> Comments:
>
> Following cmd sequence calls b53_switch_init() twice, so causes leak of memory.
> Last ifconfig will fail also on targets which support switch reset gpio pin, because devm_gpio_request_one() will be called twice in a row.
> ifconfig eth0 up
> ifconfig eth0 down
> ifconfig eth0 up

On what platform? This also requires a better explanation why this is
the correct fix.

> mmap/spi/srab drivers were not tested yet because I don't have such hardware.
> ---
>  .../generic/files/drivers/net/phy/b53/b53_common.c | 19 +++++++++++++++
>  .../generic/files/drivers/net/phy/b53/b53_mdio.c   |  6 +++++
>  .../generic/files/drivers/net/phy/b53/b53_mmap.c   | 28 +++++++++++++++++-----
>  .../generic/files/drivers/net/phy/b53/b53_priv.h   |  7 +++---
>  .../generic/files/drivers/net/phy/b53/b53_spi.c    | 20 ++++++++++++----
>  .../generic/files/drivers/net/phy/b53/b53_srab.c   | 28 +++++++++++++++++-----
>  6 files changed, 88 insertions(+), 20 deletions(-)
>
> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_common.c b/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
> index 47b5a8b..2e2f6aa 100644
> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
> @@ -1362,6 +1362,12 @@ struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
>  }
>  EXPORT_SYMBOL(b53_switch_alloc);
>
> +void b53_switch_free(struct device *dev, struct b53_device *priv)
> +{
> +       devm_kfree(dev, priv);
> +}
> +EXPORT_SYMBOL(b53_switch_free);
> +
>  int b53_switch_detect(struct b53_device *dev)
>  {
>         u32 id32;
> @@ -1452,6 +1458,19 @@ int b53_switch_register(struct b53_device *dev)
>  }
>  EXPORT_SYMBOL(b53_switch_register);
>
> +void b53_switch_remove(struct b53_device *dev)
> +{
> +       unregister_switch(&dev->sw_dev);
> +
> +       if (dev->reset_gpio >= 0)
> +               devm_gpio_free(dev->dev, dev->reset_gpio);
> +
> +       devm_kfree(dev->dev, dev->buf);
> +       devm_kfree(dev->dev, dev->vlans);
> +       devm_kfree(dev->dev, dev->ports);
> +}
> +EXPORT_SYMBOL(b53_switch_remove);

These look wrong, the whole point of using devm_* is that you do *not*
need to free the resources manually and it will be automatically done
on removal or probe failure.

> +
>  MODULE_AUTHOR("Jonas Gorski <jogo@openwrt.org>");
>  MODULE_DESCRIPTION("B53 switch library");
>  MODULE_LICENSE("Dual BSD/GPL");
> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c b/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
> index 3c25f0e..9a5f058 100644
> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
> @@ -285,6 +285,10 @@ static int b53_phy_config_init(struct phy_device *phydev)
>         struct b53_device *dev;
>         int ret;
>
> +       /* check if already initialized */
> +       if (phydev->priv)
> +               return 0;
> +

This is the only thing that looks somewhat valid, but needs a better
explanation why this might happen.

>         dev = b53_switch_alloc(&phydev->dev, &b53_mdio_ops, phydev->bus);
>         if (!dev)
>                 return -ENOMEM;
> @@ -314,6 +318,8 @@ static void b53_phy_remove(struct phy_device *phydev)
>
>         b53_switch_remove(priv);
>
> +       b53_switch_free(&phydev->dev, priv);
> +

See the above comment regarding devm_*free

>         phydev->priv = NULL;
>  }
>
> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c b/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
> index ab1895e..7c83758 100644
> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
> @@ -200,7 +200,12 @@ static struct b53_io_ops b53_mmap_ops = {
>  static int b53_mmap_probe(struct platform_device *pdev)
>  {
>         struct b53_platform_data *pdata = pdev->dev.platform_data;
> -       struct b53_device *dev;
> +       struct b53_device *dev = platform_get_drvdata(pdev);
> +       int ret;
> +
> +       /* check if already initialized */
> +       if (dev)
> +               return 0;

This shouldn't be possible. The probe shouldn't have been run twice,
and a remove should have unregistered the switch.

>
>         if (!pdata)
>                 return -EINVAL;
> @@ -209,20 +214,31 @@ static int b53_mmap_probe(struct platform_device *pdev)
>         if (!dev)
>                 return -ENOMEM;
>
> -       if (pdata)
> -               dev->pdata = pdata;
> +       dev->pdata = pdata;
> +
> +       ret = b53_switch_register(dev);
> +       if (ret) {
> +               dev_err(dev->dev, "failed to register switch: %i\n", ret);

If anything b53_switch_register should complain if it fails, not each
and every user of it.

> +               return ret;
> +       }
>
>         platform_set_drvdata(pdev, dev);
>
> -       return b53_switch_register(dev);
> +       return 0;

Undocumented, mostly unrelated code churn.

>  }
>
>  static int b53_mmap_remove(struct platform_device *pdev)
>  {
>         struct b53_device *dev = platform_get_drvdata(pdev);
>
> -       if (dev)
> -               b53_switch_remove(dev);
> +       if (!dev)
> +               return 0;
> +
> +       b53_switch_remove(dev);
> +
> +       b53_switch_free(&pdev->dev, dev);
> +
> +       platform_set_drvdata(pdev, NULL);

This is superfluous, the driver core already NULLs the drvdata on
remove / probe failure since 3.6.

>
>         return 0;
>  }
> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h b/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
> index 4336fdb..8ef68a1 100644
> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
> @@ -176,14 +176,13 @@ static inline struct b53_device *sw_to_b53(struct switch_dev *sw)
>  struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
>                                     void *priv);
>
> +void b53_switch_free(struct device *base, struct b53_device *priv);
> +
>  int b53_switch_detect(struct b53_device *dev);
>
>  int b53_switch_register(struct b53_device *dev);
>
> -static inline void b53_switch_remove(struct b53_device *dev)
> -{
> -       unregister_switch(&dev->sw_dev);
> -}
> +void b53_switch_remove(struct b53_device *dev);
>
>  static inline int b53_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
>  {
> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c b/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
> index 469a8dd..7cfb120 100644
> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
> @@ -284,9 +284,13 @@ static struct b53_io_ops b53_spi_ops = {
>
>  static int b53_spi_probe(struct spi_device *spi)
>  {
> -       struct b53_device *dev;
> +       struct b53_device *dev = spi_get_drvdata(spi);
>         int ret;
>
> +       /* check if already initialized */
> +       if (dev)
> +               return 0;

This should be impossible.

> +
>         dev = b53_switch_alloc(&spi->dev, &b53_spi_ops, spi);
>         if (!dev)
>                 return -ENOMEM;
> @@ -295,8 +299,10 @@ static int b53_spi_probe(struct spi_device *spi)
>                 dev->pdata = spi->dev.platform_data;
>
>         ret = b53_switch_register(dev);
> -       if (ret)
> +       if (ret) {
> +               dev_err(dev->dev, "failed to register switch: %i\n", ret);
>                 return ret;
> +       }
>
>         spi_set_drvdata(spi, dev);
>
> @@ -307,8 +313,14 @@ static int b53_spi_remove(struct spi_device *spi)
>  {
>         struct b53_device *dev = spi_get_drvdata(spi);
>
> -       if (dev)
> -               b53_switch_remove(dev);
> +       if (!dev)
> +               return 0;
> +
> +       b53_switch_remove(dev);
> +
> +       b53_switch_free(&spi->dev, dev);
> +
> +       spi_set_drvdata(spi, NULL);

This is superfluous, the driver core already NULLs the drvdata on
remove / probe failure since 3.6.

>
>         return 0;
>  }
> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c b/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
> index 012daa3..00eb0fe 100644
> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
> @@ -337,7 +337,12 @@ static struct b53_io_ops b53_srab_ops = {
>  static int b53_srab_probe(struct platform_device *pdev)
>  {
>         struct b53_platform_data *pdata = pdev->dev.platform_data;
> -       struct b53_device *dev;
> +       struct b53_device *dev = platform_get_drvdata(pdev);
> +       int ret;
> +
> +       /* check if already initialized */
> +       if (dev)
> +               return 0;

Should be impossible etc etc.

>
>         if (!pdata)
>                 return -EINVAL;
> @@ -346,20 +351,31 @@ static int b53_srab_probe(struct platform_device *pdev)
>         if (!dev)
>                 return -ENOMEM;
>
> -       if (pdata)
> -               dev->pdata = pdata;
> +       dev->pdata = pdata;
> +
> +       ret = b53_switch_register(dev);
> +       if (ret) {
> +               dev_err(dev->dev, "failed to register switch: %i\n", ret);
> +               return ret;
> +       }
>
>         platform_set_drvdata(pdev, dev);
>
> -       return b53_switch_register(dev);
> +       return 0;

Undocumented, mostly unrelated code churn.

>  }
>
>  static int b53_srab_remove(struct platform_device *pdev)
>  {
>         struct b53_device *dev = platform_get_drvdata(pdev);
>
> -       if (dev)
> -               b53_switch_remove(dev);
> +       if (!dev)
> +               return 0;
> +
> +       b53_switch_remove(dev);
> +
> +       b53_switch_free(&pdev->dev, dev);
> +
> +       platform_set_drvdata(pdev, NULL);

This is superfluous, the driver core already NULLs the drvdata on
remove / probe failure since 3.6.

>
>         return 0;
>  }


Regards
Jonas
Fedor Konstantinov June 13, 2015, 12:31 p.m. UTC | #2
Hi, Jonas
I inserted trace points (pr_info) at functions in b53_common and figured 
out that driver behave wrong.
Please have a look at this:

~~~cut~~~
root@(none):/# uname -a
Linux (none) 4.0.4 #1 SMP Sat Jun 13 02:59:36 MSK 2015 mips64 GNU/Linux
root@(none):/#
root@(none):/# ifconfig eth0 up
[   15.561304] b53_common: b53_switch_alloc
[   15.565265] b53_common: b53_switch_register
[   15.569960] b53_common: b53_switch_init
[   15.650583] b53_common: found switch: BCM53115, rev 8
[   15.659538] eth0: 1000 Mbps Full duplex, port 0
[   15.664439] eth1: 1000 Mbps Full duplex, port  1, queue  1
root@(none):/#
root@(none):/# root@(none):/# ifconfig eth0 down
[   42.697656] eth0: Link down
[   42.700742] eth0: 1000 Mbps Full duplex, port  0, queue  0
root@(none):/#
root@(none):/# ifconfig eth0 down
root@(none):/# ifconfig eth0 up
[   49.145311] b53_common: b53_switch_alloc
[   49.149267] b53_common: b53_switch_register
[   49.153972] b53_common: b53_switch_init
[   49.160932] Broadcom B53 (1) 8001180000001800:1e: failed to register 
switch: -16
ifconfig: SIOCSIFFLAGS: No such device
root@(none):/# reboot
~~~cut~~~

We can see that "ifconfig up" calls b53_switch_alloc which calls 
devm_kzalloc in sequence.
b53_switch_alloc is called by following sequence:
"ifconfig up" --> b53_mdio driver probe/register --> b53_phy_config_init 
--> b53_switch_alloc
Driver removes never (in case of "ifconfig down" either) and it do not 
free memory.
So,  in our example we have b53_switch_alloc called twice in a row.

As for switch gpio reset.. "failed to register switch: -16" is from 
devm_gpio_request_one().
-16 is EBUSY (Device or resource busy).

My system is D-Link DSR-1000 (Cavium Octeon MIPS64), I have added 
support for it.
But things described above affect any platform.

Please also see comments below.


13.06.2015 01:57, Jonas Gorski пишет:
> Hi,
>
> On Fri, Jun 12, 2015 at 8:16 PM, Fedor Konstantinov <blmink@mink.su> wrote:
>> Memory and switch reset gpio pin must be allocated on switch/module init and freed on removal. At least they should not be allocated 2 or more times in a row.
>>
>> Signed-off-by: Fedor Konstantinov <blmink@mink.su>
>> ---
>> Comments:
>>
>> Following cmd sequence calls b53_switch_init() twice, so causes leak of memory.
>> Last ifconfig will fail also on targets which support switch reset gpio pin, because devm_gpio_request_one() will be called twice in a row.
>> ifconfig eth0 up
>> ifconfig eth0 down
>> ifconfig eth0 up
> On what platform? This also requires a better explanation why this is
> the correct fix.
These affect any platform.
>> mmap/spi/srab drivers were not tested yet because I don't have such hardware.
>> ---
>>   .../generic/files/drivers/net/phy/b53/b53_common.c | 19 +++++++++++++++
>>   .../generic/files/drivers/net/phy/b53/b53_mdio.c   |  6 +++++
>>   .../generic/files/drivers/net/phy/b53/b53_mmap.c   | 28 +++++++++++++++++-----
>>   .../generic/files/drivers/net/phy/b53/b53_priv.h   |  7 +++---
>>   .../generic/files/drivers/net/phy/b53/b53_spi.c    | 20 ++++++++++++----
>>   .../generic/files/drivers/net/phy/b53/b53_srab.c   | 28 +++++++++++++++++-----
>>   6 files changed, 88 insertions(+), 20 deletions(-)
>>
>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_common.c b/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
>> index 47b5a8b..2e2f6aa 100644
>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
>> @@ -1362,6 +1362,12 @@ struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
>>   }
>>   EXPORT_SYMBOL(b53_switch_alloc);
>>
>> +void b53_switch_free(struct device *dev, struct b53_device *priv)
>> +{
>> +       devm_kfree(dev, priv);
>> +}
>> +EXPORT_SYMBOL(b53_switch_free);
>> +
>>   int b53_switch_detect(struct b53_device *dev)
>>   {
>>          u32 id32;
>> @@ -1452,6 +1458,19 @@ int b53_switch_register(struct b53_device *dev)
>>   }
>>   EXPORT_SYMBOL(b53_switch_register);
>>
>> +void b53_switch_remove(struct b53_device *dev)
>> +{
>> +       unregister_switch(&dev->sw_dev);
>> +
>> +       if (dev->reset_gpio >= 0)
>> +               devm_gpio_free(dev->dev, dev->reset_gpio);
>> +
>> +       devm_kfree(dev->dev, dev->buf);
>> +       devm_kfree(dev->dev, dev->vlans);
>> +       devm_kfree(dev->dev, dev->ports);
>> +}
>> +EXPORT_SYMBOL(b53_switch_remove);
> These look wrong, the whole point of using devm_* is that you do *not*
> need to free the resources manually and it will be automatically done
> on removal or probe failure.
Removal occurs never, unfortunately. But memory may be allocated several 
times. :(
In our case "ifconfig down" doesn't remove driver. But "ifconfig up" 
allocates memory.
>> +
>>   MODULE_AUTHOR("Jonas Gorski <jogo@openwrt.org>");
>>   MODULE_DESCRIPTION("B53 switch library");
>>   MODULE_LICENSE("Dual BSD/GPL");
>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c b/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
>> index 3c25f0e..9a5f058 100644
>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
>> @@ -285,6 +285,10 @@ static int b53_phy_config_init(struct phy_device *phydev)
>>          struct b53_device *dev;
>>          int ret;
>>
>> +       /* check if already initialized */
>> +       if (phydev->priv)
>> +               return 0;
>> +
> This is the only thing that looks somewhat valid, but needs a better
> explanation why this might happen.
This prevents registration of already registered switch and allocation 
of memory which already allocated.
>>          dev = b53_switch_alloc(&phydev->dev, &b53_mdio_ops, phydev->bus);
>>          if (!dev)
>>                  return -ENOMEM;
>> @@ -314,6 +318,8 @@ static void b53_phy_remove(struct phy_device *phydev)
>>
>>          b53_switch_remove(priv);
>>
>> +       b53_switch_free(&phydev->dev, priv);
>> +
> See the above comment regarding devm_*free
>
>>          phydev->priv = NULL;
>>   }
>>
>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c b/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
>> index ab1895e..7c83758 100644
>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
>> @@ -200,7 +200,12 @@ static struct b53_io_ops b53_mmap_ops = {
>>   static int b53_mmap_probe(struct platform_device *pdev)
>>   {
>>          struct b53_platform_data *pdata = pdev->dev.platform_data;
>> -       struct b53_device *dev;
>> +       struct b53_device *dev = platform_get_drvdata(pdev);
>> +       int ret;
>> +
>> +       /* check if already initialized */
>> +       if (dev)
>> +               return 0;
> This shouldn't be possible. The probe shouldn't have been run twice,
> and a remove should have unregistered the switch.
May be you are right. I didn't test mmap/spi/srab stuff.
But in case of mdio we have switch connected to MDIO bus as pseudo PHY. 
And it is initialized on every "ifconfig up".
And every initialization allocates memory.
In case of mmap/spi/srab it is important to know: when driver probe 
occurs and does it (or what) happens on "ifconfig up". Unfortunately, I 
don't have such hardware and can't test it.
>
>>          if (!pdata)
>>                  return -EINVAL;
>> @@ -209,20 +214,31 @@ static int b53_mmap_probe(struct platform_device *pdev)
>>          if (!dev)
>>                  return -ENOMEM;
>>
>> -       if (pdata)
>> -               dev->pdata = pdata;
>> +       dev->pdata = pdata;
>> +
>> +       ret = b53_switch_register(dev);
>> +       if (ret) {
>> +               dev_err(dev->dev, "failed to register switch: %i\n", ret);
> If anything b53_switch_register should complain if it fails, not each
> and every user of it.
>
>> +               return ret;
>> +       }
>>
>>          platform_set_drvdata(pdev, dev);
>>
>> -       return b53_switch_register(dev);
>> +       return 0;
> Undocumented, mostly unrelated code churn.
>
>>   }
>>
>>   static int b53_mmap_remove(struct platform_device *pdev)
>>   {
>>          struct b53_device *dev = platform_get_drvdata(pdev);
>>
>> -       if (dev)
>> -               b53_switch_remove(dev);
>> +       if (!dev)
>> +               return 0;
>> +
>> +       b53_switch_remove(dev);
>> +
>> +       b53_switch_free(&pdev->dev, dev);
>> +
>> +       platform_set_drvdata(pdev, NULL);
> This is superfluous, the driver core already NULLs the drvdata on
> remove / probe failure since 3.6.
>
>>          return 0;
>>   }
>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h b/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
>> index 4336fdb..8ef68a1 100644
>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
>> @@ -176,14 +176,13 @@ static inline struct b53_device *sw_to_b53(struct switch_dev *sw)
>>   struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
>>                                      void *priv);
>>
>> +void b53_switch_free(struct device *base, struct b53_device *priv);
>> +
>>   int b53_switch_detect(struct b53_device *dev);
>>
>>   int b53_switch_register(struct b53_device *dev);
>>
>> -static inline void b53_switch_remove(struct b53_device *dev)
>> -{
>> -       unregister_switch(&dev->sw_dev);
>> -}
>> +void b53_switch_remove(struct b53_device *dev);
>>
>>   static inline int b53_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
>>   {
>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c b/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
>> index 469a8dd..7cfb120 100644
>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
>> @@ -284,9 +284,13 @@ static struct b53_io_ops b53_spi_ops = {
>>
>>   static int b53_spi_probe(struct spi_device *spi)
>>   {
>> -       struct b53_device *dev;
>> +       struct b53_device *dev = spi_get_drvdata(spi);
>>          int ret;
>>
>> +       /* check if already initialized */
>> +       if (dev)
>> +               return 0;
> This should be impossible.
>
>> +
>>          dev = b53_switch_alloc(&spi->dev, &b53_spi_ops, spi);
>>          if (!dev)
>>                  return -ENOMEM;
>> @@ -295,8 +299,10 @@ static int b53_spi_probe(struct spi_device *spi)
>>                  dev->pdata = spi->dev.platform_data;
>>
>>          ret = b53_switch_register(dev);
>> -       if (ret)
>> +       if (ret) {
>> +               dev_err(dev->dev, "failed to register switch: %i\n", ret);
>>                  return ret;
>> +       }
>>
>>          spi_set_drvdata(spi, dev);
>>
>> @@ -307,8 +313,14 @@ static int b53_spi_remove(struct spi_device *spi)
>>   {
>>          struct b53_device *dev = spi_get_drvdata(spi);
>>
>> -       if (dev)
>> -               b53_switch_remove(dev);
>> +       if (!dev)
>> +               return 0;
>> +
>> +       b53_switch_remove(dev);
>> +
>> +       b53_switch_free(&spi->dev, dev);
>> +
>> +       spi_set_drvdata(spi, NULL);
> This is superfluous, the driver core already NULLs the drvdata on
> remove / probe failure since 3.6.
>
>>          return 0;
>>   }
>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c b/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
>> index 012daa3..00eb0fe 100644
>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
>> @@ -337,7 +337,12 @@ static struct b53_io_ops b53_srab_ops = {
>>   static int b53_srab_probe(struct platform_device *pdev)
>>   {
>>          struct b53_platform_data *pdata = pdev->dev.platform_data;
>> -       struct b53_device *dev;
>> +       struct b53_device *dev = platform_get_drvdata(pdev);
>> +       int ret;
>> +
>> +       /* check if already initialized */
>> +       if (dev)
>> +               return 0;
> Should be impossible etc etc.
>
>>          if (!pdata)
>>                  return -EINVAL;
>> @@ -346,20 +351,31 @@ static int b53_srab_probe(struct platform_device *pdev)
>>          if (!dev)
>>                  return -ENOMEM;
>>
>> -       if (pdata)
>> -               dev->pdata = pdata;
>> +       dev->pdata = pdata;
>> +
>> +       ret = b53_switch_register(dev);
>> +       if (ret) {
>> +               dev_err(dev->dev, "failed to register switch: %i\n", ret);
>> +               return ret;
>> +       }
>>
>>          platform_set_drvdata(pdev, dev);
>>
>> -       return b53_switch_register(dev);
>> +       return 0;
> Undocumented, mostly unrelated code churn.
>
>>   }
>>
>>   static int b53_srab_remove(struct platform_device *pdev)
>>   {
>>          struct b53_device *dev = platform_get_drvdata(pdev);
>>
>> -       if (dev)
>> -               b53_switch_remove(dev);
>> +       if (!dev)
>> +               return 0;
>> +
>> +       b53_switch_remove(dev);
>> +
>> +       b53_switch_free(&pdev->dev, dev);
>> +
>> +       platform_set_drvdata(pdev, NULL);
> This is superfluous, the driver core already NULLs the drvdata on
> remove / probe failure since 3.6.
>
>>          return 0;
>>   }
>
> Regards
> Jonas
--
Best regards
Fedor.
Jonas Gorski June 18, 2015, 9:11 p.m. UTC | #3
Hi Fedor,

please don't top post.

On Sat, Jun 13, 2015 at 2:31 PM, blmink <blmink@mink.su> wrote:
> Hi, Jonas
> I inserted trace points (pr_info) at functions in b53_common and figured out
> that driver behave wrong.
> Please have a look at this:
>
> ~~~cut~~~
> root@(none):/# uname -a
> Linux (none) 4.0.4 #1 SMP Sat Jun 13 02:59:36 MSK 2015 mips64 GNU/Linux
> root@(none):/#
> root@(none):/# ifconfig eth0 up
> [   15.561304] b53_common: b53_switch_alloc
> [   15.565265] b53_common: b53_switch_register
> [   15.569960] b53_common: b53_switch_init
> [   15.650583] b53_common: found switch: BCM53115, rev 8
> [   15.659538] eth0: 1000 Mbps Full duplex, port 0
> [   15.664439] eth1: 1000 Mbps Full duplex, port  1, queue  1
> root@(none):/#
> root@(none):/# root@(none):/# ifconfig eth0 down
> [   42.697656] eth0: Link down
> [   42.700742] eth0: 1000 Mbps Full duplex, port  0, queue  0
> root@(none):/#
> root@(none):/# ifconfig eth0 down
> root@(none):/# ifconfig eth0 up
> [   49.145311] b53_common: b53_switch_alloc
> [   49.149267] b53_common: b53_switch_register
> [   49.153972] b53_common: b53_switch_init
> [   49.160932] Broadcom B53 (1) 8001180000001800:1e: failed to register
> switch: -16
> ifconfig: SIOCSIFFLAGS: No such device
> root@(none):/# reboot
> ~~~cut~~~
>
> We can see that "ifconfig up" calls b53_switch_alloc which calls
> devm_kzalloc in sequence.
> b53_switch_alloc is called by following sequence:
> "ifconfig up" --> b53_mdio driver probe/register --> b53_phy_config_init -->
> b53_switch_alloc

Your analysis isn't quite correct.

Probe is called from mdio_bus_register -> mdio_bus_scan -> device_add
-> b53_phy_probe (independend of any ethernet devices).

Config_init is called from phy_connect -> phy_hw_init -> b53_phy_config_init.

> Driver removes never (in case of "ifconfig down" either) and it do not free
> memory.
> So,  in our example we have b53_switch_alloc called twice in a row.
>
> As for switch gpio reset.. "failed to register switch: -16" is from
> devm_gpio_request_one().
> -16 is EBUSY (Device or resource busy).
>
> My system is D-Link DSR-1000 (Cavium Octeon MIPS64), I have added support
> for it.
> But things described above affect any platform.

No, it is dependent on the way the ethernet driver works / interacts
with the phy subsystem. To use a phy, there are two setup calls and
two remove calls; phy_connect/phy_disconnect and phy_start/phy_stop.

The issue you are seeing is only seen with ethernet drivers that do
the phy_connect in their start (ifup), but not seen with drivers that
do it in their probe callback. That's why I said that the "already
alloc'd/registered" check in _config_init is the only valid part of
this patch.

The other possibility would be moving the allocation/registration to
the driver probe, but that would mean that we would lose the "ethX"
alias for the switch, which might still be used by some
configurations.

> Please also see comments below.
>
>
> 13.06.2015 01:57, Jonas Gorski пишет:
>>
>> Hi,
>>
>> On Fri, Jun 12, 2015 at 8:16 PM, Fedor Konstantinov <blmink@mink.su>
>> wrote:
>>>
>>> Memory and switch reset gpio pin must be allocated on switch/module init
>>> and freed on removal. At least they should not be allocated 2 or more times
>>> in a row.
>>>
>>> Signed-off-by: Fedor Konstantinov <blmink@mink.su>
>>> ---
>>> Comments:
>>>
>>> Following cmd sequence calls b53_switch_init() twice, so causes leak of
>>> memory.
>>> Last ifconfig will fail also on targets which support switch reset gpio
>>> pin, because devm_gpio_request_one() will be called twice in a row.
>>> ifconfig eth0 up
>>> ifconfig eth0 down
>>> ifconfig eth0 up
>>
>> On what platform? This also requires a better explanation why this is
>> the correct fix.
>
> These affect any platform.

As I explained above, it does not affect any platform.

>>> mmap/spi/srab drivers were not tested yet because I don't have such
>>> hardware.
>>> ---
>>>   .../generic/files/drivers/net/phy/b53/b53_common.c | 19 +++++++++++++++
>>>   .../generic/files/drivers/net/phy/b53/b53_mdio.c   |  6 +++++
>>>   .../generic/files/drivers/net/phy/b53/b53_mmap.c   | 28
>>> +++++++++++++++++-----
>>>   .../generic/files/drivers/net/phy/b53/b53_priv.h   |  7 +++---
>>>   .../generic/files/drivers/net/phy/b53/b53_spi.c    | 20
>>> ++++++++++++----
>>>   .../generic/files/drivers/net/phy/b53/b53_srab.c   | 28
>>> +++++++++++++++++-----
>>>   6 files changed, 88 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
>>> b/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
>>> index 47b5a8b..2e2f6aa 100644
>>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
>>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
>>> @@ -1362,6 +1362,12 @@ struct b53_device *b53_switch_alloc(struct device
>>> *base, struct b53_io_ops *ops,
>>>   }
>>>   EXPORT_SYMBOL(b53_switch_alloc);
>>>
>>> +void b53_switch_free(struct device *dev, struct b53_device *priv)
>>> +{
>>> +       devm_kfree(dev, priv);
>>> +}
>>> +EXPORT_SYMBOL(b53_switch_free);
>>> +
>>>   int b53_switch_detect(struct b53_device *dev)
>>>   {
>>>          u32 id32;
>>> @@ -1452,6 +1458,19 @@ int b53_switch_register(struct b53_device *dev)
>>>   }
>>>   EXPORT_SYMBOL(b53_switch_register);
>>>
>>> +void b53_switch_remove(struct b53_device *dev)
>>> +{
>>> +       unregister_switch(&dev->sw_dev);
>>> +
>>> +       if (dev->reset_gpio >= 0)
>>> +               devm_gpio_free(dev->dev, dev->reset_gpio);
>>> +
>>> +       devm_kfree(dev->dev, dev->buf);
>>> +       devm_kfree(dev->dev, dev->vlans);
>>> +       devm_kfree(dev->dev, dev->ports);
>>> +}
>>> +EXPORT_SYMBOL(b53_switch_remove);
>>
>> These look wrong, the whole point of using devm_* is that you do *not*
>> need to free the resources manually and it will be automatically done
>> on removal or probe failure.
>
> Removal occurs never, unfortunately. But memory may be allocated several
> times. :(
> In our case "ifconfig down" doesn't remove driver. But "ifconfig up"
> allocates memory.
>>>
>>> +
>>>   MODULE_AUTHOR("Jonas Gorski <jogo@openwrt.org>");
>>>   MODULE_DESCRIPTION("B53 switch library");
>>>   MODULE_LICENSE("Dual BSD/GPL");
>>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
>>> b/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
>>> index 3c25f0e..9a5f058 100644
>>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
>>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
>>> @@ -285,6 +285,10 @@ static int b53_phy_config_init(struct phy_device
>>> *phydev)
>>>          struct b53_device *dev;
>>>          int ret;
>>>
>>> +       /* check if already initialized */
>>> +       if (phydev->priv)
>>> +               return 0;
>>> +
>>
>> This is the only thing that looks somewhat valid, but needs a better
>> explanation why this might happen.
>
> This prevents registration of already registered switch and allocation of
> memory which already allocated.

That only explains what it does, not why it is needed.

>>>          dev = b53_switch_alloc(&phydev->dev, &b53_mdio_ops,
>>> phydev->bus);
>>>          if (!dev)
>>>                  return -ENOMEM;
>>> @@ -314,6 +318,8 @@ static void b53_phy_remove(struct phy_device *phydev)
>>>
>>>          b53_switch_remove(priv);
>>>
>>> +       b53_switch_free(&phydev->dev, priv);
>>> +
>>
>> See the above comment regarding devm_*free
>>
>>>          phydev->priv = NULL;
>>>   }
>>>
>>> diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
>>> b/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
>>> index ab1895e..7c83758 100644
>>> --- a/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
>>> +++ b/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
>>> @@ -200,7 +200,12 @@ static struct b53_io_ops b53_mmap_ops = {
>>>   static int b53_mmap_probe(struct platform_device *pdev)
>>>   {
>>>          struct b53_platform_data *pdata = pdev->dev.platform_data;
>>> -       struct b53_device *dev;
>>> +       struct b53_device *dev = platform_get_drvdata(pdev);
>>> +       int ret;
>>> +
>>> +       /* check if already initialized */
>>> +       if (dev)
>>> +               return 0;
>>
>> This shouldn't be possible. The probe shouldn't have been run twice,
>> and a remove should have unregistered the switch.
>
> May be you are right. I didn't test mmap/spi/srab stuff.
> But in case of mdio we have switch connected to MDIO bus as pseudo PHY. And
> it is initialized on every "ifconfig up".

And it's the _config_init that is called, not the _probe.

> And every initialization allocates memory.
> In case of mmap/spi/srab it is important to know: when driver probe occurs
> and does it (or what) happens on "ifconfig up". Unfortunately, I don't have
> such hardware and can't test it.

You need to understand the code better before you start modifying it.
You can ask questions, here or on IRC. Read e.g.
http://lxr.free-electrons.com/source/Documentation/driver-model/ for
an explanation of the lifetime of drivers and how they interact in the
linux kernel.


Regards
Jonas
diff mbox

Patch

diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_common.c b/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
index 47b5a8b..2e2f6aa 100644
--- a/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
+++ b/target/linux/generic/files/drivers/net/phy/b53/b53_common.c
@@ -1362,6 +1362,12 @@  struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
 }
 EXPORT_SYMBOL(b53_switch_alloc);
 
+void b53_switch_free(struct device *dev, struct b53_device *priv)
+{
+	devm_kfree(dev, priv);
+}
+EXPORT_SYMBOL(b53_switch_free);
+
 int b53_switch_detect(struct b53_device *dev)
 {
 	u32 id32;
@@ -1452,6 +1458,19 @@  int b53_switch_register(struct b53_device *dev)
 }
 EXPORT_SYMBOL(b53_switch_register);
 
+void b53_switch_remove(struct b53_device *dev)
+{
+	unregister_switch(&dev->sw_dev);
+
+	if (dev->reset_gpio >= 0)
+		devm_gpio_free(dev->dev, dev->reset_gpio);
+
+	devm_kfree(dev->dev, dev->buf);
+	devm_kfree(dev->dev, dev->vlans);
+	devm_kfree(dev->dev, dev->ports);
+}
+EXPORT_SYMBOL(b53_switch_remove);
+
 MODULE_AUTHOR("Jonas Gorski <jogo@openwrt.org>");
 MODULE_DESCRIPTION("B53 switch library");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c b/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
index 3c25f0e..9a5f058 100644
--- a/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
+++ b/target/linux/generic/files/drivers/net/phy/b53/b53_mdio.c
@@ -285,6 +285,10 @@  static int b53_phy_config_init(struct phy_device *phydev)
 	struct b53_device *dev;
 	int ret;
 
+	/* check if already initialized */
+	if (phydev->priv)
+		return 0;
+
 	dev = b53_switch_alloc(&phydev->dev, &b53_mdio_ops, phydev->bus);
 	if (!dev)
 		return -ENOMEM;
@@ -314,6 +318,8 @@  static void b53_phy_remove(struct phy_device *phydev)
 
 	b53_switch_remove(priv);
 
+	b53_switch_free(&phydev->dev, priv);
+
 	phydev->priv = NULL;
 }
 
diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c b/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
index ab1895e..7c83758 100644
--- a/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
+++ b/target/linux/generic/files/drivers/net/phy/b53/b53_mmap.c
@@ -200,7 +200,12 @@  static struct b53_io_ops b53_mmap_ops = {
 static int b53_mmap_probe(struct platform_device *pdev)
 {
 	struct b53_platform_data *pdata = pdev->dev.platform_data;
-	struct b53_device *dev;
+	struct b53_device *dev = platform_get_drvdata(pdev);
+	int ret;
+
+	/* check if already initialized */
+	if (dev)
+		return 0;
 
 	if (!pdata)
 		return -EINVAL;
@@ -209,20 +214,31 @@  static int b53_mmap_probe(struct platform_device *pdev)
 	if (!dev)
 		return -ENOMEM;
 
-	if (pdata)
-		dev->pdata = pdata;
+	dev->pdata = pdata;
+
+	ret = b53_switch_register(dev);
+	if (ret) {
+		dev_err(dev->dev, "failed to register switch: %i\n", ret);
+		return ret;
+	}
 
 	platform_set_drvdata(pdev, dev);
 
-	return b53_switch_register(dev);
+	return 0;
 }
 
 static int b53_mmap_remove(struct platform_device *pdev)
 {
 	struct b53_device *dev = platform_get_drvdata(pdev);
 
-	if (dev)
-		b53_switch_remove(dev);
+	if (!dev)
+		return 0;
+
+	b53_switch_remove(dev);
+
+	b53_switch_free(&pdev->dev, dev);
+
+	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h b/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
index 4336fdb..8ef68a1 100644
--- a/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
+++ b/target/linux/generic/files/drivers/net/phy/b53/b53_priv.h
@@ -176,14 +176,13 @@  static inline struct b53_device *sw_to_b53(struct switch_dev *sw)
 struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
 				    void *priv);
 
+void b53_switch_free(struct device *base, struct b53_device *priv);
+
 int b53_switch_detect(struct b53_device *dev);
 
 int b53_switch_register(struct b53_device *dev);
 
-static inline void b53_switch_remove(struct b53_device *dev)
-{
-	unregister_switch(&dev->sw_dev);
-}
+void b53_switch_remove(struct b53_device *dev);
 
 static inline int b53_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
 {
diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c b/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
index 469a8dd..7cfb120 100644
--- a/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
+++ b/target/linux/generic/files/drivers/net/phy/b53/b53_spi.c
@@ -284,9 +284,13 @@  static struct b53_io_ops b53_spi_ops = {
 
 static int b53_spi_probe(struct spi_device *spi)
 {
-	struct b53_device *dev;
+	struct b53_device *dev = spi_get_drvdata(spi);
 	int ret;
 
+	/* check if already initialized */
+	if (dev)
+		return 0;
+
 	dev = b53_switch_alloc(&spi->dev, &b53_spi_ops, spi);
 	if (!dev)
 		return -ENOMEM;
@@ -295,8 +299,10 @@  static int b53_spi_probe(struct spi_device *spi)
 		dev->pdata = spi->dev.platform_data;
 
 	ret = b53_switch_register(dev);
-	if (ret)
+	if (ret) {
+		dev_err(dev->dev, "failed to register switch: %i\n", ret);
 		return ret;
+	}
 
 	spi_set_drvdata(spi, dev);
 
@@ -307,8 +313,14 @@  static int b53_spi_remove(struct spi_device *spi)
 {
 	struct b53_device *dev = spi_get_drvdata(spi);
 
-	if (dev)
-		b53_switch_remove(dev);
+	if (!dev)
+		return 0;
+
+	b53_switch_remove(dev);
+
+	b53_switch_free(&spi->dev, dev);
+
+	spi_set_drvdata(spi, NULL);
 
 	return 0;
 }
diff --git a/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c b/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
index 012daa3..00eb0fe 100644
--- a/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
+++ b/target/linux/generic/files/drivers/net/phy/b53/b53_srab.c
@@ -337,7 +337,12 @@  static struct b53_io_ops b53_srab_ops = {
 static int b53_srab_probe(struct platform_device *pdev)
 {
 	struct b53_platform_data *pdata = pdev->dev.platform_data;
-	struct b53_device *dev;
+	struct b53_device *dev = platform_get_drvdata(pdev);
+	int ret;
+
+	/* check if already initialized */
+	if (dev)
+		return 0;
 
 	if (!pdata)
 		return -EINVAL;
@@ -346,20 +351,31 @@  static int b53_srab_probe(struct platform_device *pdev)
 	if (!dev)
 		return -ENOMEM;
 
-	if (pdata)
-		dev->pdata = pdata;
+	dev->pdata = pdata;
+
+	ret = b53_switch_register(dev);
+	if (ret) {
+		dev_err(dev->dev, "failed to register switch: %i\n", ret);
+		return ret;
+	}
 
 	platform_set_drvdata(pdev, dev);
 
-	return b53_switch_register(dev);
+	return 0;
 }
 
 static int b53_srab_remove(struct platform_device *pdev)
 {
 	struct b53_device *dev = platform_get_drvdata(pdev);
 
-	if (dev)
-		b53_switch_remove(dev);
+	if (!dev)
+		return 0;
+
+	b53_switch_remove(dev);
+
+	b53_switch_free(&pdev->dev, dev);
+
+	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }