diff mbox series

[v4,01/18] of: overlay: add tests to validate kfrees from overlay removal

Message ID 1539657458-24401-2-git-send-email-frowand.list@gmail.com
State Changes Requested, archived
Headers show
Series of: overlay: validation checks, subsequent fixes | expand

Commit Message

Frank Rowand Oct. 16, 2018, 2:37 a.m. UTC
From: Frank Rowand <frank.rowand@sony.com>

Add checks:
  - attempted kfree due to refcount reaching zero before overlay
    is removed
  - properties linked to an overlay node when the node is removed
  - node refcount > one during node removal in a changeset destroy,
    if the node was created by the changeset

After applying this patch, several validation warnings will be
reported from the devicetree unittest during boot due to
pre-existing devicetree bugs. The warnings will be similar to:

  OF: ERROR: of_node_release() overlay node /testcase-data/overlay-node/test-bus/test-unittest11/test-unittest111 contains unexpected properties
  OF: ERROR: memory leak - destroy cset entry: attach overlay node /testcase-data-2/substation@100/hvac-medium-2 expected refcount 1 instead of 2.  of_node_get() / of_node_put() are unbalanced for this node.

Signed-off-by: Frank Rowand <frank.rowand@sony.com>
---
Changes since v3:
  - Add expected value of refcount for destroy cset entry error.  Also
    explain the cause of the error.

 drivers/of/dynamic.c | 29 +++++++++++++++++++++++++++++
 drivers/of/overlay.c |  1 +
 include/linux/of.h   | 15 ++++++++++-----
 3 files changed, 40 insertions(+), 5 deletions(-)

Comments

Alan Tull Oct. 17, 2018, 9:30 p.m. UTC | #1
On Mon, Oct 15, 2018 at 9:39 PM <frowand.list@gmail.com> wrote:

Hi Frank,

>
> From: Frank Rowand <frank.rowand@sony.com>
>
> Add checks:
>   - attempted kfree due to refcount reaching zero before overlay
>     is removed
>   - properties linked to an overlay node when the node is removed
>   - node refcount > one during node removal in a changeset destroy,
>     if the node was created by the changeset
>
> After applying this patch, several validation warnings will be
> reported from the devicetree unittest during boot due to
> pre-existing devicetree bugs. The warnings will be similar to:
>
>   OF: ERROR: of_node_release() overlay node /testcase-data/overlay-node/test-bus/test-unittest11/test-unittest111 contains unexpected properties
>   OF: ERROR: memory leak - destroy cset entry: attach overlay node /testcase-data-2/substation@100/hvac-medium-2 expected refcount 1 instead of 2.  of_node_get() / of_node_put() are unbalanced for this node.
>
> Signed-off-by: Frank Rowand <frank.rowand@sony.com>
> ---
> Changes since v3:
>   - Add expected value of refcount for destroy cset entry error.  Also
>     explain the cause of the error.
>
>  drivers/of/dynamic.c | 29 +++++++++++++++++++++++++++++
>  drivers/of/overlay.c |  1 +
>  include/linux/of.h   | 15 ++++++++++-----
>  3 files changed, 40 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
> index f4f8ed9b5454..24c97b7a050f 100644
> --- a/drivers/of/dynamic.c
> +++ b/drivers/of/dynamic.c
> @@ -330,6 +330,25 @@ void of_node_release(struct kobject *kobj)
>         if (!of_node_check_flag(node, OF_DYNAMIC))
>                 return;
>
> +       if (of_node_check_flag(node, OF_OVERLAY)) {
> +
> +               if (!of_node_check_flag(node, OF_OVERLAY_FREE_CSET)) {
> +                       /* premature refcount of zero, do not free memory */
> +                       pr_err("ERROR: memory leak %s() overlay node %pOF before free overlay changeset\n",
> +                              __func__, node);
> +                       return;
> +               }
> +
> +               /*
> +                * If node->properties non-empty then properties were added
> +                * to this node either by different overlay that has not
> +                * yet been removed, or by a non-overlay mechanism.
> +                */
> +               if (node->properties)
> +                       pr_err("ERROR: %s() overlay node %pOF contains unexpected properties\n",
> +                              __func__, node);
> +       }
> +
>         property_list_free(node->properties);
>         property_list_free(node->deadprops);
>
> @@ -434,6 +453,16 @@ struct device_node *__of_node_dup(const struct device_node *np,
>
>  static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
>  {
> +       if (ce->action == OF_RECONFIG_ATTACH_NODE &&
> +           of_node_check_flag(ce->np, OF_OVERLAY)) {
> +               if (kref_read(&ce->np->kobj.kref) > 1) {
> +                       pr_err("ERROR: memory leak - destroy cset entry: attach overlay node %pOF expected refcount 1 instead of %d.  of_node_get() / of_node_put() are unbalanced for this node.\n",
> +                              ce->np, kref_read(&ce->np->kobj.kref));

Still testing as much as I have time to do.

I'm hitting this error message once when removing an overlay that adds
several child nodes.  The only node I get the message for was a node
that added a fixed-clock (the other nodes didn't trigger the error).
Then even if I edited all the rest of the overlay DTS and removed all
other child nodes and all references to the clock from other nodes, I
still got the error.

Removing dtbo: 1-socfpga_arria10_socdk_sdmmc_ghrd_ovl_ext_cfg.dtb
[   72.032270] OF: ERROR: memory leak - destroy cset entry: attach
overlay node /soc/base_fpga_region/clk_0 expected refcount 1 instead
of 2.  of_node_get() / of_node_put() are unbalanced for this node.

Here's the very stripped down overlay:

/dts-v1/;
/plugin/;
/ {
        fragment@0 {
                target-path = "/soc/base_fpga_region";
                #address-cells = <1>;
                #size-cells = <1>;

                __overlay__ {
                        external-fpga-config;

                        #address-cells = <1>;
                        #size-cells = <1>;

                        clk_0: clk_0 {
                                compatible = "fixed-clock";
                                #clock-cells = <0>;
                                clock-frequency = <100000000>;  /* 100.00 MHz */
                                clock-output-names = "clk_0-clk";
                        };
                };
        };
};

I'll look at it some more tomorrow and try to figure out what's
special about this node.

Alan

> +               } else {
> +                       of_node_set_flag(ce->np, OF_OVERLAY_FREE_CSET);
> +               }
> +       }
> +
>         of_node_put(ce->np);
>         list_del(&ce->node);
>         kfree(ce);
> diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
> index eda57ef12fd0..1176cb4b6e4e 100644
> --- a/drivers/of/overlay.c
> +++ b/drivers/of/overlay.c
> @@ -373,6 +373,7 @@ static int add_changeset_node(struct overlay_changeset *ovcs,
>                         return -ENOMEM;
>
>                 tchild->parent = target_node;
> +               of_node_set_flag(tchild, OF_OVERLAY);
>
>                 ret = of_changeset_attach_node(&ovcs->cset, tchild);
>                 if (ret)
> diff --git a/include/linux/of.h b/include/linux/of.h
> index 4d25e4f952d9..aa1dafaec6ae 100644
> --- a/include/linux/of.h
> +++ b/include/linux/of.h
> @@ -138,11 +138,16 @@ static inline void of_node_put(struct device_node *node) { }
>  extern struct device_node *of_stdout;
>  extern raw_spinlock_t devtree_lock;
>
> -/* flag descriptions (need to be visible even when !CONFIG_OF) */
> -#define OF_DYNAMIC     1 /* node and properties were allocated via kmalloc */
> -#define OF_DETACHED    2 /* node has been detached from the device tree */
> -#define OF_POPULATED   3 /* device already created for the node */
> -#define OF_POPULATED_BUS       4 /* of_platform_populate recursed to children of this node */
> +/*
> + * struct device_node flag descriptions
> + * (need to be visible even when !CONFIG_OF)
> + */
> +#define OF_DYNAMIC             1 /* (and properties) allocated via kmalloc */
> +#define OF_DETACHED            2 /* detached from the device tree */
> +#define OF_POPULATED           3 /* device already created */
> +#define OF_POPULATED_BUS       4 /* platform bus created for children */
> +#define OF_OVERLAY             5 /* allocated for an overlay */
> +#define OF_OVERLAY_FREE_CSET   6 /* in overlay cset being freed */
>
>  #define OF_BAD_ADDR    ((u64)-1)
>
> --
> Frank Rowand <frank.rowand@sony.com>
>
Rob Herring (Arm) Oct. 18, 2018, 5:03 p.m. UTC | #2
On Mon, Oct 15, 2018 at 07:37:21PM -0700, frowand.list@gmail.com wrote:
> From: Frank Rowand <frank.rowand@sony.com>
> 
> Add checks:
>   - attempted kfree due to refcount reaching zero before overlay
>     is removed
>   - properties linked to an overlay node when the node is removed
>   - node refcount > one during node removal in a changeset destroy,
>     if the node was created by the changeset
> 
> After applying this patch, several validation warnings will be
> reported from the devicetree unittest during boot due to
> pre-existing devicetree bugs. The warnings will be similar to:
> 
>   OF: ERROR: of_node_release() overlay node /testcase-data/overlay-node/test-bus/test-unittest11/test-unittest111 contains unexpected properties
>   OF: ERROR: memory leak - destroy cset entry: attach overlay node /testcase-data-2/substation@100/hvac-medium-2 expected refcount 1 instead of 2.  of_node_get() / of_node_put() are unbalanced for this node.

These messages could be formatted more consistently. Put the path either 
at the beginning (after any prefix) or end. Beginning is more like a 
compiler error. End puts what the problem is before it's off the edge of 
the screen. 

> Signed-off-by: Frank Rowand <frank.rowand@sony.com>
> ---
> Changes since v3:
>   - Add expected value of refcount for destroy cset entry error.  Also
>     explain the cause of the error.
> 
>  drivers/of/dynamic.c | 29 +++++++++++++++++++++++++++++
>  drivers/of/overlay.c |  1 +
>  include/linux/of.h   | 15 ++++++++++-----
>  3 files changed, 40 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
> index f4f8ed9b5454..24c97b7a050f 100644
> --- a/drivers/of/dynamic.c
> +++ b/drivers/of/dynamic.c
> @@ -330,6 +330,25 @@ void of_node_release(struct kobject *kobj)
>  	if (!of_node_check_flag(node, OF_DYNAMIC))
>  		return;
>  
> +	if (of_node_check_flag(node, OF_OVERLAY)) {
> +
> +		if (!of_node_check_flag(node, OF_OVERLAY_FREE_CSET)) {

I worry the flags are getting unwieldy.

> +			/* premature refcount of zero, do not free memory */
> +			pr_err("ERROR: memory leak %s() overlay node %pOF before free overlay changeset\n",
> +			       __func__, node);
> +			return;
> +		}
> +
> +		/*
> +		 * If node->properties non-empty then properties were added
> +		 * to this node either by different overlay that has not
> +		 * yet been removed, or by a non-overlay mechanism.
> +		 */
> +		if (node->properties)
> +			pr_err("ERROR: %s() overlay node %pOF contains unexpected properties\n",
> +			       __func__, node);
> +	}
> +
>  	property_list_free(node->properties);
>  	property_list_free(node->deadprops);
>  
> @@ -434,6 +453,16 @@ struct device_node *__of_node_dup(const struct device_node *np,
>  
>  static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
>  {
> +	if (ce->action == OF_RECONFIG_ATTACH_NODE &&
> +	    of_node_check_flag(ce->np, OF_OVERLAY)) {
> +		if (kref_read(&ce->np->kobj.kref) > 1) {
> +			pr_err("ERROR: memory leak - destroy cset entry: attach overlay node %pOF expected refcount 1 instead of %d.  of_node_get() / of_node_put() are unbalanced for this node.\n",
> +			       ce->np, kref_read(&ce->np->kobj.kref));
> +		} else {
> +			of_node_set_flag(ce->np, OF_OVERLAY_FREE_CSET);
> +		}
> +	}
> +
>  	of_node_put(ce->np);
>  	list_del(&ce->node);
>  	kfree(ce);
> diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
> index eda57ef12fd0..1176cb4b6e4e 100644
> --- a/drivers/of/overlay.c
> +++ b/drivers/of/overlay.c
> @@ -373,6 +373,7 @@ static int add_changeset_node(struct overlay_changeset *ovcs,
>  			return -ENOMEM;
>  
>  		tchild->parent = target_node;
> +		of_node_set_flag(tchild, OF_OVERLAY);
>  
>  		ret = of_changeset_attach_node(&ovcs->cset, tchild);
>  		if (ret)
> diff --git a/include/linux/of.h b/include/linux/of.h
> index 4d25e4f952d9..aa1dafaec6ae 100644
> --- a/include/linux/of.h
> +++ b/include/linux/of.h
> @@ -138,11 +138,16 @@ static inline void of_node_put(struct device_node *node) { }
>  extern struct device_node *of_stdout;
>  extern raw_spinlock_t devtree_lock;
>  
> -/* flag descriptions (need to be visible even when !CONFIG_OF) */
> -#define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
> -#define OF_DETACHED	2 /* node has been detached from the device tree */
> -#define OF_POPULATED	3 /* device already created for the node */
> -#define OF_POPULATED_BUS	4 /* of_platform_populate recursed to children of this node */
> +/*
> + * struct device_node flag descriptions
> + * (need to be visible even when !CONFIG_OF)
> + */
> +#define OF_DYNAMIC		1 /* (and properties) allocated via kmalloc */
> +#define OF_DETACHED		2 /* detached from the device tree */
> +#define OF_POPULATED		3 /* device already created */
> +#define OF_POPULATED_BUS	4 /* platform bus created for children */
> +#define OF_OVERLAY		5 /* allocated for an overlay */
> +#define OF_OVERLAY_FREE_CSET	6 /* in overlay cset being freed */
>  
>  #define OF_BAD_ADDR	((u64)-1)
>  
> -- 
> Frank Rowand <frank.rowand@sony.com>
>
Frank Rowand Oct. 18, 2018, 7:01 p.m. UTC | #3
On 10/18/18 10:03, Rob Herring wrote:
> On Mon, Oct 15, 2018 at 07:37:21PM -0700, frowand.list@gmail.com wrote:
>> From: Frank Rowand <frank.rowand@sony.com>
>>
>> Add checks:
>>   - attempted kfree due to refcount reaching zero before overlay
>>     is removed
>>   - properties linked to an overlay node when the node is removed
>>   - node refcount > one during node removal in a changeset destroy,
>>     if the node was created by the changeset
>>
>> After applying this patch, several validation warnings will be
>> reported from the devicetree unittest during boot due to
>> pre-existing devicetree bugs. The warnings will be similar to:
>>
>>   OF: ERROR: of_node_release() overlay node /testcase-data/overlay-node/test-bus/test-unittest11/test-unittest111 contains unexpected properties
>>   OF: ERROR: memory leak - destroy cset entry: attach overlay node /testcase-data-2/substation@100/hvac-medium-2 expected refcount 1 instead of 2.  of_node_get() / of_node_put() are unbalanced for this node.
> 
> These messages could be formatted more consistently. Put the path either 
> at the beginning (after any prefix) or end. Beginning is more like a 
> compiler error. End puts what the problem is before it's off the edge of 
> the screen. 

The inconsistency makes the word flow more natural, but I agree that
consistency is more important.  I think I can make all the messages
say the problem first, then provide the path at the end.


>> Signed-off-by: Frank Rowand <frank.rowand@sony.com>
>> ---
>> Changes since v3:
>>   - Add expected value of refcount for destroy cset entry error.  Also
>>     explain the cause of the error.
>>
>>  drivers/of/dynamic.c | 29 +++++++++++++++++++++++++++++
>>  drivers/of/overlay.c |  1 +
>>  include/linux/of.h   | 15 ++++++++++-----
>>  3 files changed, 40 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
>> index f4f8ed9b5454..24c97b7a050f 100644
>> --- a/drivers/of/dynamic.c
>> +++ b/drivers/of/dynamic.c
>> @@ -330,6 +330,25 @@ void of_node_release(struct kobject *kobj)
>>  	if (!of_node_check_flag(node, OF_DYNAMIC))
>>  		return;
>>  
>> +	if (of_node_check_flag(node, OF_OVERLAY)) {
>> +
>> +		if (!of_node_check_flag(node, OF_OVERLAY_FREE_CSET)) {
> 
> I worry the flags are getting unwieldy.

I considered that.  I think we are still ok, and I don't have a better
solution than adding flag values.  (I did have some Rube Goldberg
variations.)


> 
>> +			/* premature refcount of zero, do not free memory */
>> +			pr_err("ERROR: memory leak %s() overlay node %pOF before free overlay changeset\n",
>> +			       __func__, node);
>> +			return;
>> +		}
>> +
>> +		/*
>> +		 * If node->properties non-empty then properties were added
>> +		 * to this node either by different overlay that has not
>> +		 * yet been removed, or by a non-overlay mechanism.
>> +		 */
>> +		if (node->properties)
>> +			pr_err("ERROR: %s() overlay node %pOF contains unexpected properties\n",
>> +			       __func__, node);
>> +	}
>> +
>>  	property_list_free(node->properties);
>>  	property_list_free(node->deadprops);
>>  
>> @@ -434,6 +453,16 @@ struct device_node *__of_node_dup(const struct device_node *np,
>>  
>>  static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
>>  {
>> +	if (ce->action == OF_RECONFIG_ATTACH_NODE &&
>> +	    of_node_check_flag(ce->np, OF_OVERLAY)) {
>> +		if (kref_read(&ce->np->kobj.kref) > 1) {
>> +			pr_err("ERROR: memory leak - destroy cset entry: attach overlay node %pOF expected refcount 1 instead of %d.  of_node_get() / of_node_put() are unbalanced for this node.\n",
>> +			       ce->np, kref_read(&ce->np->kobj.kref));
>> +		} else {
>> +			of_node_set_flag(ce->np, OF_OVERLAY_FREE_CSET);
>> +		}
>> +	}
>> +
>>  	of_node_put(ce->np);
>>  	list_del(&ce->node);
>>  	kfree(ce);
>> diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
>> index eda57ef12fd0..1176cb4b6e4e 100644
>> --- a/drivers/of/overlay.c
>> +++ b/drivers/of/overlay.c
>> @@ -373,6 +373,7 @@ static int add_changeset_node(struct overlay_changeset *ovcs,
>>  			return -ENOMEM;
>>  
>>  		tchild->parent = target_node;
>> +		of_node_set_flag(tchild, OF_OVERLAY);
>>  
>>  		ret = of_changeset_attach_node(&ovcs->cset, tchild);
>>  		if (ret)
>> diff --git a/include/linux/of.h b/include/linux/of.h
>> index 4d25e4f952d9..aa1dafaec6ae 100644
>> --- a/include/linux/of.h
>> +++ b/include/linux/of.h
>> @@ -138,11 +138,16 @@ static inline void of_node_put(struct device_node *node) { }
>>  extern struct device_node *of_stdout;
>>  extern raw_spinlock_t devtree_lock;
>>  
>> -/* flag descriptions (need to be visible even when !CONFIG_OF) */
>> -#define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
>> -#define OF_DETACHED	2 /* node has been detached from the device tree */
>> -#define OF_POPULATED	3 /* device already created for the node */
>> -#define OF_POPULATED_BUS	4 /* of_platform_populate recursed to children of this node */
>> +/*
>> + * struct device_node flag descriptions
>> + * (need to be visible even when !CONFIG_OF)
>> + */
>> +#define OF_DYNAMIC		1 /* (and properties) allocated via kmalloc */
>> +#define OF_DETACHED		2 /* detached from the device tree */
>> +#define OF_POPULATED		3 /* device already created */
>> +#define OF_POPULATED_BUS	4 /* platform bus created for children */
>> +#define OF_OVERLAY		5 /* allocated for an overlay */
>> +#define OF_OVERLAY_FREE_CSET	6 /* in overlay cset being freed */
>>  
>>  #define OF_BAD_ADDR	((u64)-1)
>>  
>> -- 
>> Frank Rowand <frank.rowand@sony.com>
>>
>
Alan Tull Oct. 18, 2018, 8:24 p.m. UTC | #4
On Wed, Oct 17, 2018 at 4:30 PM Alan Tull <atull@kernel.org> wrote:
>
> On Mon, Oct 15, 2018 at 9:39 PM <frowand.list@gmail.com> wrote:
>
> Hi Frank,
>
> >
> > From: Frank Rowand <frank.rowand@sony.com>
> >
> > Add checks:
> >   - attempted kfree due to refcount reaching zero before overlay
> >     is removed
> >   - properties linked to an overlay node when the node is removed
> >   - node refcount > one during node removal in a changeset destroy,
> >     if the node was created by the changeset
> >
> > After applying this patch, several validation warnings will be
> > reported from the devicetree unittest during boot due to
> > pre-existing devicetree bugs. The warnings will be similar to:
> >
> >   OF: ERROR: of_node_release() overlay node /testcase-data/overlay-node/test-bus/test-unittest11/test-unittest111 contains unexpected properties
> >   OF: ERROR: memory leak - destroy cset entry: attach overlay node /testcase-data-2/substation@100/hvac-medium-2 expected refcount 1 instead of 2.  of_node_get() / of_node_put() are unbalanced for this node.
> >
> > Signed-off-by: Frank Rowand <frank.rowand@sony.com>
> > ---
> > Changes since v3:
> >   - Add expected value of refcount for destroy cset entry error.  Also
> >     explain the cause of the error.
> >
> >  drivers/of/dynamic.c | 29 +++++++++++++++++++++++++++++
> >  drivers/of/overlay.c |  1 +
> >  include/linux/of.h   | 15 ++++++++++-----
> >  3 files changed, 40 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
> > index f4f8ed9b5454..24c97b7a050f 100644
> > --- a/drivers/of/dynamic.c
> > +++ b/drivers/of/dynamic.c
> > @@ -330,6 +330,25 @@ void of_node_release(struct kobject *kobj)
> >         if (!of_node_check_flag(node, OF_DYNAMIC))
> >                 return;
> >
> > +       if (of_node_check_flag(node, OF_OVERLAY)) {
> > +
> > +               if (!of_node_check_flag(node, OF_OVERLAY_FREE_CSET)) {
> > +                       /* premature refcount of zero, do not free memory */
> > +                       pr_err("ERROR: memory leak %s() overlay node %pOF before free overlay changeset\n",
> > +                              __func__, node);
> > +                       return;
> > +               }
> > +
> > +               /*
> > +                * If node->properties non-empty then properties were added
> > +                * to this node either by different overlay that has not
> > +                * yet been removed, or by a non-overlay mechanism.
> > +                */
> > +               if (node->properties)
> > +                       pr_err("ERROR: %s() overlay node %pOF contains unexpected properties\n",
> > +                              __func__, node);
> > +       }
> > +
> >         property_list_free(node->properties);
> >         property_list_free(node->deadprops);
> >
> > @@ -434,6 +453,16 @@ struct device_node *__of_node_dup(const struct device_node *np,
> >
> >  static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
> >  {
> > +       if (ce->action == OF_RECONFIG_ATTACH_NODE &&
> > +           of_node_check_flag(ce->np, OF_OVERLAY)) {
> > +               if (kref_read(&ce->np->kobj.kref) > 1) {
> > +                       pr_err("ERROR: memory leak - destroy cset entry: attach overlay node %pOF expected refcount 1 instead of %d.  of_node_get() / of_node_put() are unbalanced for this node.\n",
> > +                              ce->np, kref_read(&ce->np->kobj.kref));
>
> Still testing as much as I have time to do.
>
> I'm hitting this error message once when removing an overlay that adds
> several child nodes.  The only node I get the message for was a node
> that added a fixed-clock (the other nodes didn't trigger the error).
> Then even if I edited all the rest of the overlay DTS and removed all
> other child nodes and all references to the clock from other nodes, I
> still got the error.
>
> Removing dtbo: 1-socfpga_arria10_socdk_sdmmc_ghrd_ovl_ext_cfg.dtb
> [   72.032270] OF: ERROR: memory leak - destroy cset entry: attach
> overlay node /soc/base_fpga_region/clk_0 expected refcount 1 instead
> of 2.  of_node_get() / of_node_put() are unbalanced for this node.

Update: with some helpful offline debug patches from Frank, I was able
to find the source of the of_node_get/put unbalance.  The fixed-rate
clock driver calls of_clk_add_provider() when probed but never calls
of_clk_del_provider()

This patchset quite likely will uncover other of_node_get/put
unbalances around the kernel.

Alan

>
> Here's the very stripped down overlay:
>
> /dts-v1/;
> /plugin/;
> / {
>         fragment@0 {
>                 target-path = "/soc/base_fpga_region";
>                 #address-cells = <1>;
>                 #size-cells = <1>;
>
>                 __overlay__ {
>                         external-fpga-config;
>
>                         #address-cells = <1>;
>                         #size-cells = <1>;
>
>                         clk_0: clk_0 {
>                                 compatible = "fixed-clock";
>                                 #clock-cells = <0>;
>                                 clock-frequency = <100000000>;  /* 100.00 MHz */
>                                 clock-output-names = "clk_0-clk";
>                         };
>                 };
>         };
> };
diff mbox series

Patch

diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index f4f8ed9b5454..24c97b7a050f 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -330,6 +330,25 @@  void of_node_release(struct kobject *kobj)
 	if (!of_node_check_flag(node, OF_DYNAMIC))
 		return;
 
+	if (of_node_check_flag(node, OF_OVERLAY)) {
+
+		if (!of_node_check_flag(node, OF_OVERLAY_FREE_CSET)) {
+			/* premature refcount of zero, do not free memory */
+			pr_err("ERROR: memory leak %s() overlay node %pOF before free overlay changeset\n",
+			       __func__, node);
+			return;
+		}
+
+		/*
+		 * If node->properties non-empty then properties were added
+		 * to this node either by different overlay that has not
+		 * yet been removed, or by a non-overlay mechanism.
+		 */
+		if (node->properties)
+			pr_err("ERROR: %s() overlay node %pOF contains unexpected properties\n",
+			       __func__, node);
+	}
+
 	property_list_free(node->properties);
 	property_list_free(node->deadprops);
 
@@ -434,6 +453,16 @@  struct device_node *__of_node_dup(const struct device_node *np,
 
 static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
 {
+	if (ce->action == OF_RECONFIG_ATTACH_NODE &&
+	    of_node_check_flag(ce->np, OF_OVERLAY)) {
+		if (kref_read(&ce->np->kobj.kref) > 1) {
+			pr_err("ERROR: memory leak - destroy cset entry: attach overlay node %pOF expected refcount 1 instead of %d.  of_node_get() / of_node_put() are unbalanced for this node.\n",
+			       ce->np, kref_read(&ce->np->kobj.kref));
+		} else {
+			of_node_set_flag(ce->np, OF_OVERLAY_FREE_CSET);
+		}
+	}
+
 	of_node_put(ce->np);
 	list_del(&ce->node);
 	kfree(ce);
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index eda57ef12fd0..1176cb4b6e4e 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -373,6 +373,7 @@  static int add_changeset_node(struct overlay_changeset *ovcs,
 			return -ENOMEM;
 
 		tchild->parent = target_node;
+		of_node_set_flag(tchild, OF_OVERLAY);
 
 		ret = of_changeset_attach_node(&ovcs->cset, tchild);
 		if (ret)
diff --git a/include/linux/of.h b/include/linux/of.h
index 4d25e4f952d9..aa1dafaec6ae 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -138,11 +138,16 @@  static inline void of_node_put(struct device_node *node) { }
 extern struct device_node *of_stdout;
 extern raw_spinlock_t devtree_lock;
 
-/* flag descriptions (need to be visible even when !CONFIG_OF) */
-#define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
-#define OF_DETACHED	2 /* node has been detached from the device tree */
-#define OF_POPULATED	3 /* device already created for the node */
-#define OF_POPULATED_BUS	4 /* of_platform_populate recursed to children of this node */
+/*
+ * struct device_node flag descriptions
+ * (need to be visible even when !CONFIG_OF)
+ */
+#define OF_DYNAMIC		1 /* (and properties) allocated via kmalloc */
+#define OF_DETACHED		2 /* detached from the device tree */
+#define OF_POPULATED		3 /* device already created */
+#define OF_POPULATED_BUS	4 /* platform bus created for children */
+#define OF_OVERLAY		5 /* allocated for an overlay */
+#define OF_OVERLAY_FREE_CSET	6 /* in overlay cset being freed */
 
 #define OF_BAD_ADDR	((u64)-1)