diff mbox series

net: fec: Fix reference count leak in fec series ops

Message ID 20201108095310.2892555-1-zhangqilong3@huawei.com
State Changes Requested
Delegated to: David Miller
Headers show
Series net: fec: Fix reference count leak in fec series ops | expand

Checks

Context Check Description
jkicinski/cover_letter success Link
jkicinski/fixes_present success Link
jkicinski/patch_count success Link
jkicinski/tree_selection success Guessed tree name to be net-next
jkicinski/subject_prefix warning Target tree name not specified in the subject
jkicinski/source_inline success Was 0 now: 0
jkicinski/verify_signedoff success Link
jkicinski/module_param success Was 0 now: 0
jkicinski/build_32bit success Errors and warnings before: 0 this patch: 0
jkicinski/kdoc success Errors and warnings before: 0 this patch: 0
jkicinski/verify_fixes success Link
jkicinski/checkpatch success total: 0 errors, 0 warnings, 0 checks, 58 lines checked
jkicinski/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
jkicinski/header_inline success Link
jkicinski/stable success Stable not CCed

Commit Message

Zhang Qilong Nov. 8, 2020, 9:53 a.m. UTC
pm_runtime_get_sync() will increment pm usage at first and it
will resume the device later. If runtime of the device has
error or device is in inaccessible state(or other error state),
resume operation will fail. If we do not call put operation to
decrease the reference, it will result in reference count leak.
Moreover, this device cannot enter the idle state and always
stay busy or other non-idle state later. So we fixed it through
adding pm_runtime_put_noidle.

Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while using mdio bus")
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
---
 drivers/net/ethernet/freescale/fec_main.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

Comments

Andy Duan Nov. 9, 2020, 1:49 a.m. UTC | #1
From: Zhang Qilong <zhangqilong3@huawei.com> Sent: Sunday, November 8, 2020 5:53 PM
> pm_runtime_get_sync() will increment pm usage at first and it will resume the
> device later. If runtime of the device has error or device is in inaccessible
> state(or other error state), resume operation will fail. If we do not call put
> operation to decrease the reference, it will result in reference count leak.
> Moreover, this device cannot enter the idle state and always stay busy or other
> non-idle state later. So we fixed it through adding pm_runtime_put_noidle.
> 
> Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while using mdio bus")
> Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>

From early discussion for the topic, Wolfram Sang wonder if such de-reference can
be better handled by pm runtime core code.

https://lkml.org/lkml/2020/6/14/76

Regards,
Andy
> ---
>  drivers/net/ethernet/freescale/fec_main.c | 22 ++++++++++++++++------
>  1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_main.c
> b/drivers/net/ethernet/freescale/fec_main.c
> index d7919555250d..6c02f885c67e 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -1809,8 +1809,10 @@ static int fec_enet_mdio_read(struct mii_bus *bus,
> int mii_id, int regnum)
>  	bool is_c45 = !!(regnum & MII_ADDR_C45);
> 
>  	ret = pm_runtime_get_sync(dev);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		pm_runtime_put_noidle(dev);
>  		return ret;
> +	}
> 
>  	if (is_c45) {
>  		frame_start = FEC_MMFR_ST_C45;
> @@ -1868,10 +1870,12 @@ static int fec_enet_mdio_write(struct mii_bus
> *bus, int mii_id, int regnum,
>  	bool is_c45 = !!(regnum & MII_ADDR_C45);
> 
>  	ret = pm_runtime_get_sync(dev);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		pm_runtime_put_noidle(dev);
>  		return ret;
> -	else
> +	} else {
>  		ret = 0;
> +	}
> 
>  	if (is_c45) {
>  		frame_start = FEC_MMFR_ST_C45;
> @@ -2276,8 +2280,10 @@ static void fec_enet_get_regs(struct net_device
> *ndev,
>  	int ret;
> 
>  	ret = pm_runtime_get_sync(dev);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		pm_runtime_put_noidle(dev);
>  		return;
> +	}
> 
>  	regs->version = fec_enet_register_version;
> 
> @@ -2977,8 +2983,10 @@ fec_enet_open(struct net_device *ndev)
>  	bool reset_again;
> 
>  	ret = pm_runtime_get_sync(&fep->pdev->dev);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		pm_runtime_put_noidle(&fep->pdev->dev);
>  		return ret;
> +	}
> 
>  	pinctrl_pm_select_default_state(&fep->pdev->dev);
>  	ret = fec_enet_clk_enable(ndev, true); @@ -3771,8 +3779,10 @@
> fec_drv_remove(struct platform_device *pdev)
>  	int ret;
> 
>  	ret = pm_runtime_get_sync(&pdev->dev);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		pm_runtime_put_noidle(&pdev->dev);
>  		return ret;
> +	}
> 
>  	cancel_work_sync(&fep->tx_timeout_work);
>  	fec_ptp_stop(pdev);
> --
> 2.25.4
Zhang Qilong Nov. 9, 2020, 2:51 a.m. UTC | #2
> From: Zhang Qilong <zhangqilong3@huawei.com> Sent: Sunday, November 8,
> 2020 5:53 PM
> > pm_runtime_get_sync() will increment pm usage at first and it will
> > resume the device later. If runtime of the device has error or device
> > is in inaccessible state(or other error state), resume operation will
> > fail. If we do not call put operation to decrease the reference, it will result in
> reference count leak.
> > Moreover, this device cannot enter the idle state and always stay busy
> > or other non-idle state later. So we fixed it through adding
> pm_runtime_put_noidle.
> >
> > Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while using
> > mdio bus")
> > Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
> 
> From early discussion for the topic, Wolfram Sang wonder if such de-reference
> can be better handled by pm runtime core code.
>
I have read the discussion just now, They didn't give a definite result. I agreed
with introducing a new or help function to replace the pm_runtime_get_sync
gradually. How do you think so ?

Regards,
Zhang

> https://lkml.org/lkml/2020/6/14/76
> 
> Regards,
> Andy
> > ---
> >  drivers/net/ethernet/freescale/fec_main.c | 22 ++++++++++++++++------
> >  1 file changed, 16 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/freescale/fec_main.c
> > b/drivers/net/ethernet/freescale/fec_main.c
> > index d7919555250d..6c02f885c67e 100644
> > --- a/drivers/net/ethernet/freescale/fec_main.c
> > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > @@ -1809,8 +1809,10 @@ static int fec_enet_mdio_read(struct mii_bus
> > *bus, int mii_id, int regnum)
> >  	bool is_c45 = !!(regnum & MII_ADDR_C45);
> >
> >  	ret = pm_runtime_get_sync(dev);
> > -	if (ret < 0)
> > +	if (ret < 0) {
> > +		pm_runtime_put_noidle(dev);
> >  		return ret;
> > +	}
> >
> >  	if (is_c45) {
> >  		frame_start = FEC_MMFR_ST_C45;
> > @@ -1868,10 +1870,12 @@ static int fec_enet_mdio_write(struct mii_bus
> > *bus, int mii_id, int regnum,
> >  	bool is_c45 = !!(regnum & MII_ADDR_C45);
> >
> >  	ret = pm_runtime_get_sync(dev);
> > -	if (ret < 0)
> > +	if (ret < 0) {
> > +		pm_runtime_put_noidle(dev);
> >  		return ret;
> > -	else
> > +	} else {
> >  		ret = 0;
> > +	}
> >
> >  	if (is_c45) {
> >  		frame_start = FEC_MMFR_ST_C45;
> > @@ -2276,8 +2280,10 @@ static void fec_enet_get_regs(struct net_device
> > *ndev,
> >  	int ret;
> >
> >  	ret = pm_runtime_get_sync(dev);
> > -	if (ret < 0)
> > +	if (ret < 0) {
> > +		pm_runtime_put_noidle(dev);
> >  		return;
> > +	}
> >
> >  	regs->version = fec_enet_register_version;
> >
> > @@ -2977,8 +2983,10 @@ fec_enet_open(struct net_device *ndev)
> >  	bool reset_again;
> >
> >  	ret = pm_runtime_get_sync(&fep->pdev->dev);
> > -	if (ret < 0)
> > +	if (ret < 0) {
> > +		pm_runtime_put_noidle(&fep->pdev->dev);
> >  		return ret;
> > +	}
> >
> >  	pinctrl_pm_select_default_state(&fep->pdev->dev);
> >  	ret = fec_enet_clk_enable(ndev, true); @@ -3771,8 +3779,10 @@
> > fec_drv_remove(struct platform_device *pdev)
> >  	int ret;
> >
> >  	ret = pm_runtime_get_sync(&pdev->dev);
> > -	if (ret < 0)
> > +	if (ret < 0) {
> > +		pm_runtime_put_noidle(&pdev->dev);
> >  		return ret;
> > +	}
> >
> >  	cancel_work_sync(&fep->tx_timeout_work);
> >  	fec_ptp_stop(pdev);
> > --
> > 2.25.4
Andy Duan Nov. 9, 2020, 3:01 a.m. UTC | #3
From: zhangqilong <zhangqilong3@huawei.com>ent: Monday, November 9, 2020 10:52 AM
> > From: Zhang Qilong <zhangqilong3@huawei.com> Sent: Sunday, November 8,
> > 2020 5:53 PM
> > > pm_runtime_get_sync() will increment pm usage at first and it will
> > > resume the device later. If runtime of the device has error or
> > > device is in inaccessible state(or other error state), resume
> > > operation will fail. If we do not call put operation to decrease the
> > > reference, it will result in
> > reference count leak.
> > > Moreover, this device cannot enter the idle state and always stay
> > > busy or other non-idle state later. So we fixed it through adding
> > pm_runtime_put_noidle.
> > >
> > > Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while
> > > using mdio bus")
> > > Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
> >
> > From early discussion for the topic, Wolfram Sang wonder if such
> > de-reference can be better handled by pm runtime core code.
> >
> I have read the discussion just now, They didn't give a definite result. I agreed
> with introducing a new or help function to replace the pm_runtime_get_sync
> gradually. How do you think so ?
> 
> Regards,
> Zhang

I also think so to avoid so much duplication code in function driver.
> 
> > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml
> > .org%2Flkml%2F2020%2F6%2F14%2F76&amp;data=04%7C01%7Cfugang.dua
> n%40nxp.
> >
> com%7Ceaef5020d1c143dfda5208d8845a6391%7C686ea1d3bc2b4c6fa92cd99
> c5c301
> >
> 635%7C0%7C0%7C637404871050135426%7CUnknown%7CTWFpbGZsb3d8eyJ
> WIjoiMC4wL
> >
> jAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;s
> data
> > =GLi34HRLzzTnTjT54dpNXkMdvuPuyVhcSuAGqc1rdvw%3D&amp;reserved=0
> >
> > Regards,
> > Andy
> > > ---
> > >  drivers/net/ethernet/freescale/fec_main.c | 22
> > > ++++++++++++++++------
> > >  1 file changed, 16 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/freescale/fec_main.c
> > > b/drivers/net/ethernet/freescale/fec_main.c
> > > index d7919555250d..6c02f885c67e 100644
> > > --- a/drivers/net/ethernet/freescale/fec_main.c
> > > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > > @@ -1809,8 +1809,10 @@ static int fec_enet_mdio_read(struct mii_bus
> > > *bus, int mii_id, int regnum)
> > >  	bool is_c45 = !!(regnum & MII_ADDR_C45);
> > >
> > >  	ret = pm_runtime_get_sync(dev);
> > > -	if (ret < 0)
> > > +	if (ret < 0) {
> > > +		pm_runtime_put_noidle(dev);
> > >  		return ret;
> > > +	}
> > >
> > >  	if (is_c45) {
> > >  		frame_start = FEC_MMFR_ST_C45;
> > > @@ -1868,10 +1870,12 @@ static int fec_enet_mdio_write(struct
> > > mii_bus *bus, int mii_id, int regnum,
> > >  	bool is_c45 = !!(regnum & MII_ADDR_C45);
> > >
> > >  	ret = pm_runtime_get_sync(dev);
> > > -	if (ret < 0)
> > > +	if (ret < 0) {
> > > +		pm_runtime_put_noidle(dev);
> > >  		return ret;
> > > -	else
> > > +	} else {
> > >  		ret = 0;
> > > +	}
> > >
> > >  	if (is_c45) {
> > >  		frame_start = FEC_MMFR_ST_C45;
> > > @@ -2276,8 +2280,10 @@ static void fec_enet_get_regs(struct
> > > net_device *ndev,
> > >  	int ret;
> > >
> > >  	ret = pm_runtime_get_sync(dev);
> > > -	if (ret < 0)
> > > +	if (ret < 0) {
> > > +		pm_runtime_put_noidle(dev);
> > >  		return;
> > > +	}
> > >
> > >  	regs->version = fec_enet_register_version;
> > >
> > > @@ -2977,8 +2983,10 @@ fec_enet_open(struct net_device *ndev)
> > >  	bool reset_again;
> > >
> > >  	ret = pm_runtime_get_sync(&fep->pdev->dev);
> > > -	if (ret < 0)
> > > +	if (ret < 0) {
> > > +		pm_runtime_put_noidle(&fep->pdev->dev);
> > >  		return ret;
> > > +	}
> > >
> > >  	pinctrl_pm_select_default_state(&fep->pdev->dev);
> > >  	ret = fec_enet_clk_enable(ndev, true); @@ -3771,8 +3779,10 @@
> > > fec_drv_remove(struct platform_device *pdev)
> > >  	int ret;
> > >
> > >  	ret = pm_runtime_get_sync(&pdev->dev);
> > > -	if (ret < 0)
> > > +	if (ret < 0) {
> > > +		pm_runtime_put_noidle(&pdev->dev);
> > >  		return ret;
> > > +	}
> > >
> > >  	cancel_work_sync(&fep->tx_timeout_work);
> > >  	fec_ptp_stop(pdev);
> > > --
> > > 2.25.4
Zhang Qilong Nov. 9, 2020, 8:17 a.m. UTC | #4
> From: zhangqilong <zhangqilong3@huawei.com>ent: Monday, November 9,
> 2020 10:52 AM
> > > From: Zhang Qilong <zhangqilong3@huawei.com> Sent: Sunday, November
> > > 8,
> > > 2020 5:53 PM
> > > > pm_runtime_get_sync() will increment pm usage at first and it will
> > > > resume the device later. If runtime of the device has error or
> > > > device is in inaccessible state(or other error state), resume
> > > > operation will fail. If we do not call put operation to decrease
> > > > the reference, it will result in
> > > reference count leak.
> > > > Moreover, this device cannot enter the idle state and always stay
> > > > busy or other non-idle state later. So we fixed it through adding
> > > pm_runtime_put_noidle.
> > > >
> > > > Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while
> > > > using mdio bus")
> > > > Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
> > >
> > > From early discussion for the topic, Wolfram Sang wonder if such
> > > de-reference can be better handled by pm runtime core code.
> > >
> > I have read the discussion just now, They didn't give a definite
> > result. I agreed with introducing a new or help function to replace
> > the pm_runtime_get_sync gradually. How do you think so ?
> >
> > Regards,
> > Zhang
> 
> I also think so to avoid so much duplication code in function driver.

Hi,
 
I have added a more general operation in flowing patch
https://lore.kernel.org/netdev/20201109080938.4174745-1-zhangqilong3@huawei.com/T/#t
to solve this problem. Comments are welcome if you are free.

Regards,
Zhang

> >
> > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flk
> > >
> ml .org%2Flkml%2F2020%2F6%2F14%2F76&amp;data=04%7C01%7Cfugang.du
> a
> > n%40nxp.
> > >
> >
> com%7Ceaef5020d1c143dfda5208d8845a6391%7C686ea1d3bc2b4c6fa92cd99
> > c5c301
> > >
> >
> 635%7C0%7C0%7C637404871050135426%7CUnknown%7CTWFpbGZsb3d8eyJ
> > WIjoiMC4wL
> > >
> >
> jAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;
> s
> > data
> > >
> =GLi34HRLzzTnTjT54dpNXkMdvuPuyVhcSuAGqc1rdvw%3D&amp;reserved=0
> > >
> > > Regards,
> > > Andy
> > > > ---
> > > >  drivers/net/ethernet/freescale/fec_main.c | 22
> > > > ++++++++++++++++------
> > > >  1 file changed, 16 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/freescale/fec_main.c
> > > > b/drivers/net/ethernet/freescale/fec_main.c
> > > > index d7919555250d..6c02f885c67e 100644
> > > > --- a/drivers/net/ethernet/freescale/fec_main.c
> > > > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > > > @@ -1809,8 +1809,10 @@ static int fec_enet_mdio_read(struct
> > > > mii_bus *bus, int mii_id, int regnum)
> > > >  	bool is_c45 = !!(regnum & MII_ADDR_C45);
> > > >
> > > >  	ret = pm_runtime_get_sync(dev);
> > > > -	if (ret < 0)
> > > > +	if (ret < 0) {
> > > > +		pm_runtime_put_noidle(dev);
> > > >  		return ret;
> > > > +	}
> > > >
> > > >  	if (is_c45) {
> > > >  		frame_start = FEC_MMFR_ST_C45;
> > > > @@ -1868,10 +1870,12 @@ static int fec_enet_mdio_write(struct
> > > > mii_bus *bus, int mii_id, int regnum,
> > > >  	bool is_c45 = !!(regnum & MII_ADDR_C45);
> > > >
> > > >  	ret = pm_runtime_get_sync(dev);
> > > > -	if (ret < 0)
> > > > +	if (ret < 0) {
> > > > +		pm_runtime_put_noidle(dev);
> > > >  		return ret;
> > > > -	else
> > > > +	} else {
> > > >  		ret = 0;
> > > > +	}
> > > >
> > > >  	if (is_c45) {
> > > >  		frame_start = FEC_MMFR_ST_C45;
> > > > @@ -2276,8 +2280,10 @@ static void fec_enet_get_regs(struct
> > > > net_device *ndev,
> > > >  	int ret;
> > > >
> > > >  	ret = pm_runtime_get_sync(dev);
> > > > -	if (ret < 0)
> > > > +	if (ret < 0) {
> > > > +		pm_runtime_put_noidle(dev);
> > > >  		return;
> > > > +	}
> > > >
> > > >  	regs->version = fec_enet_register_version;
> > > >
> > > > @@ -2977,8 +2983,10 @@ fec_enet_open(struct net_device *ndev)
> > > >  	bool reset_again;
> > > >
> > > >  	ret = pm_runtime_get_sync(&fep->pdev->dev);
> > > > -	if (ret < 0)
> > > > +	if (ret < 0) {
> > > > +		pm_runtime_put_noidle(&fep->pdev->dev);
> > > >  		return ret;
> > > > +	}
> > > >
> > > >  	pinctrl_pm_select_default_state(&fep->pdev->dev);
> > > >  	ret = fec_enet_clk_enable(ndev, true); @@ -3771,8 +3779,10 @@
> > > > fec_drv_remove(struct platform_device *pdev)
> > > >  	int ret;
> > > >
> > > >  	ret = pm_runtime_get_sync(&pdev->dev);
> > > > -	if (ret < 0)
> > > > +	if (ret < 0) {
> > > > +		pm_runtime_put_noidle(&pdev->dev);
> > > >  		return ret;
> > > > +	}
> > > >
> > > >  	cancel_work_sync(&fep->tx_timeout_work);
> > > >  	fec_ptp_stop(pdev);
> > > > --
> > > > 2.25.4
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d7919555250d..6c02f885c67e 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1809,8 +1809,10 @@  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
 	ret = pm_runtime_get_sync(dev);
-	if (ret < 0)
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
 		return ret;
+	}
 
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
@@ -1868,10 +1870,12 @@  static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
 	ret = pm_runtime_get_sync(dev);
-	if (ret < 0)
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
 		return ret;
-	else
+	} else {
 		ret = 0;
+	}
 
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
@@ -2276,8 +2280,10 @@  static void fec_enet_get_regs(struct net_device *ndev,
 	int ret;
 
 	ret = pm_runtime_get_sync(dev);
-	if (ret < 0)
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
 		return;
+	}
 
 	regs->version = fec_enet_register_version;
 
@@ -2977,8 +2983,10 @@  fec_enet_open(struct net_device *ndev)
 	bool reset_again;
 
 	ret = pm_runtime_get_sync(&fep->pdev->dev);
-	if (ret < 0)
+	if (ret < 0) {
+		pm_runtime_put_noidle(&fep->pdev->dev);
 		return ret;
+	}
 
 	pinctrl_pm_select_default_state(&fep->pdev->dev);
 	ret = fec_enet_clk_enable(ndev, true);
@@ -3771,8 +3779,10 @@  fec_drv_remove(struct platform_device *pdev)
 	int ret;
 
 	ret = pm_runtime_get_sync(&pdev->dev);
-	if (ret < 0)
+	if (ret < 0) {
+		pm_runtime_put_noidle(&pdev->dev);
 		return ret;
+	}
 
 	cancel_work_sync(&fep->tx_timeout_work);
 	fec_ptp_stop(pdev);