diff mbox

net: ethernet : stmicro: fixed power suspend and resume failure in stmmac driver

Message ID 1412056534-7995-1-git-send-email-hliang1025@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

hliang1025@gmail.com Sept. 30, 2014, 5:55 a.m. UTC
From: Hao Liang <hliang1025@gmail.com>

This is the fix for a power management issue caused by suspend and resume function in stmmac_main.c.
After enable CONFIG_DEBUG_ATOMIC_SLEEP which enable sleep-inside atomic section checking, power
managemet can not work normally. Board couldn't wakeup successfully after suspend. Command
"echo mem > /sys/power/state" suspend the board.

In suspend and resume function of stmmac driver, there are some sleep-inside function in atomic section
created by spin lock. These functions will causes system warnings and wakeup issue when enable
CONFIG_DEBUG_ATOMIC_SLEEP.

This bug was fixed by:
* replace some sleep function with non-sleep function
  clk_disable_unprepare -> clk_disable ...
* decrease the atomic area created by spin lock function. The original atomic area in resume function is
too large.

Signed-off-by: Hao Liang <hliang1025@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

Comments

Giuseppe CAVALLARO Sept. 30, 2014, 1 p.m. UTC | #1
Hello Hao Liang

On 9/30/2014 7:55 AM, hliang1025@gmail.com wrote:
> From: Hao Liang <hliang1025@gmail.com>
>
> This is the fix for a power management issue caused by suspend and resume function in stmmac_main.c.
> After enable CONFIG_DEBUG_ATOMIC_SLEEP which enable sleep-inside atomic section checking, power
> managemet can not work normally. Board couldn't wakeup successfully after suspend. Command
> "echo mem > /sys/power/state" suspend the board.
>
> In suspend and resume function of stmmac driver, there are some sleep-inside function in atomic section
> created by spin lock. These functions will causes system warnings and wakeup issue when enable
> CONFIG_DEBUG_ATOMIC_SLEEP.
>
> This bug was fixed by:
> * replace some sleep function with non-sleep function
>    clk_disable_unprepare -> clk_disable ...
> * decrease the atomic area created by spin lock function. The original atomic area in resume function is
> too large.

I had done something in this patch:
    [PATCH (net.git)] stmmac: fix and review whole driver locking

Indeed I was not able to review it after some advice but I will do it!

So we can review your patch and then I will re-base my work on top of
yours.

I just wonder if the spinlock can be safely removed instead of
surrounding just the code to clear the descriptors that should
not be claimed by others. Maybe the only part shared is the
stmmac_hw_setup that can be invoked by open method.

peppe

> Signed-off-by: Hao Liang <hliang1025@gmail.com>
> ---
>   drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   13 +++++++------
>   1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 6e6ee22..2effbfe 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -2924,9 +2924,8 @@ int stmmac_suspend(struct net_device *ndev)
>   	if (priv->phydev)
>   		phy_stop(priv->phydev);
>
> -	spin_lock_irqsave(&priv->lock, flags);
> -
>   	netif_device_detach(ndev);
> +
>   	netif_stop_queue(ndev);
>
>   	napi_disable(&priv->napi);
> @@ -2935,6 +2934,8 @@ int stmmac_suspend(struct net_device *ndev)
>   	priv->hw->dma->stop_tx(priv->ioaddr);
>   	priv->hw->dma->stop_rx(priv->ioaddr);
>
> +	spin_lock_irqsave(&priv->lock, flags);
> +
>   	stmmac_clear_descriptors(priv);
>
>   	/* Enable Power down mode by programming the PMT regs */
> @@ -2945,7 +2946,7 @@ int stmmac_suspend(struct net_device *ndev)
>   		stmmac_set_mac(priv->ioaddr, false);
>   		pinctrl_pm_select_sleep_state(priv->device);
>   		/* Disable clock in case of PWM is off */
> -		clk_disable_unprepare(priv->stmmac_clk);
> +		clk_disable(priv->stmmac_clk);
>   	}
>   	spin_unlock_irqrestore(&priv->lock, flags);
>
> @@ -2977,12 +2978,14 @@ int stmmac_resume(struct net_device *ndev)
>   	} else {
>   		pinctrl_pm_select_default_state(priv->device);
>   		/* enable the clk prevously disabled */
> -		clk_prepare_enable(priv->stmmac_clk);
> +		clk_enable(priv->stmmac_clk);
>   		/* reset the phy so that it's ready */
>   		if (priv->mii)
>   			stmmac_mdio_reset(priv->mii);
>   	}
>
> +	spin_unlock_irqrestore(&priv->lock, flags);
> +
>   	netif_device_attach(ndev);
>
>   	stmmac_hw_setup(ndev);
> @@ -2991,8 +2994,6 @@ int stmmac_resume(struct net_device *ndev)
>
>   	netif_start_queue(ndev);
>
> -	spin_unlock_irqrestore(&priv->lock, flags);
> -
>   	if (priv->phydev)
>   		phy_start(priv->phydev);
>
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Sept. 30, 2014, 7:10 p.m. UTC | #2
From: <hliang1025@gmail.com>
Date: Tue, 30 Sep 2014 13:55:34 +0800

> From: Hao Liang <hliang1025@gmail.com>
> 
> This is the fix for a power management issue caused by suspend and resume function in stmmac_main.c.
> After enable CONFIG_DEBUG_ATOMIC_SLEEP which enable sleep-inside atomic section checking, power
> managemet can not work normally. Board couldn't wakeup successfully after suspend. Command
> "echo mem > /sys/power/state" suspend the board.
> 
> In suspend and resume function of stmmac driver, there are some sleep-inside function in atomic section
> created by spin lock. These functions will causes system warnings and wakeup issue when enable
> CONFIG_DEBUG_ATOMIC_SLEEP.
> 
> This bug was fixed by:
> * replace some sleep function with non-sleep function
>   clk_disable_unprepare -> clk_disable ...
> * decrease the atomic area created by spin lock function. The original atomic area in resume function is
> too large.
> 
> Signed-off-by: Hao Liang <hliang1025@gmail.com>

I really think the ->mac->xxx calls need to be under the lock, and therefore this
spinlock region shortening is not valid.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Oct. 1, 2014, 5:45 p.m. UTC | #3
From: Hao Liang <hliang1025@gmail.com>
Date: Wed, 1 Oct 2014 14:08:28 +0800

> I double-check my patch and the ->mac->xxx calls are still under the lock.
> I think that lock is trying to protect priv struct and related data, so i
> just remove some functions have no bearing on priv struct.

It's preventing parallel invocations of the ->mac->xxx calls.

The other instances are in device open/close, where RTNL semaphore is
held, and no other code paths in the driver can be active.

You need the lock.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Giuseppe CAVALLARO Oct. 16, 2014, 8:58 a.m. UTC | #4
Hello Hao Liang

On 10/1/2014 7:45 PM, David Miller wrote:
> From: Hao Liang <hliang1025@gmail.com>
> Date: Wed, 1 Oct 2014 14:08:28 +0800
>
>> I double-check my patch and the ->mac->xxx calls are still under the lock.
>> I think that lock is trying to protect priv struct and related data, so i
>> just remove some functions have no bearing on priv struct.
>
> It's preventing parallel invocations of the ->mac->xxx calls.
>
> The other instances are in device open/close, where RTNL semaphore is
> held, and no other code paths in the driver can be active.
>
> You need the lock.

Do you have a new patch for this problem after David's advice?

I am reviewing the patches sent some weeks ago for driver looking
and I can also try to fix this in case of you have no news.

Let me know,

Regards,
peppe

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Oct. 20, 2014, 3:36 p.m. UTC | #5
From: Hao Liang <hliang1025@gmail.com>
Date: Mon, 20 Oct 2014 16:55:08 +0800

> I can't quite seize David's meaing whether the ->mac->xxx should
> inside the lock or outside the lock.  In my opinion, the ->mac->xxx
> calls did not operate any shared data or struct. The calls just
> control the hardware by write data to registers.  So ->mac->xxx
> calls can be removed from lock.

If you make a decision to program the hardware in a certain way, you
must make sure that hardware programming operation occurs atomically.

This could be because it takes multiple register writes to perform
the operation and they are dependent upon eachother, or you are
makeing multiple modifications which must occur in a certain
sequence.

If you allow these ->mac->xxx calls to run in parallel with eachother
I guarantee it will cause problems.

Stop brushing it off as a non-issue, unless you want your driver
to mysteriously not work when the race is triggered.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6e6ee22..2effbfe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2924,9 +2924,8 @@  int stmmac_suspend(struct net_device *ndev)
 	if (priv->phydev)
 		phy_stop(priv->phydev);
 
-	spin_lock_irqsave(&priv->lock, flags);
-
 	netif_device_detach(ndev);
+
 	netif_stop_queue(ndev);
 
 	napi_disable(&priv->napi);
@@ -2935,6 +2934,8 @@  int stmmac_suspend(struct net_device *ndev)
 	priv->hw->dma->stop_tx(priv->ioaddr);
 	priv->hw->dma->stop_rx(priv->ioaddr);
 
+	spin_lock_irqsave(&priv->lock, flags);
+
 	stmmac_clear_descriptors(priv);
 
 	/* Enable Power down mode by programming the PMT regs */
@@ -2945,7 +2946,7 @@  int stmmac_suspend(struct net_device *ndev)
 		stmmac_set_mac(priv->ioaddr, false);
 		pinctrl_pm_select_sleep_state(priv->device);
 		/* Disable clock in case of PWM is off */
-		clk_disable_unprepare(priv->stmmac_clk);
+		clk_disable(priv->stmmac_clk);
 	}
 	spin_unlock_irqrestore(&priv->lock, flags);
 
@@ -2977,12 +2978,14 @@  int stmmac_resume(struct net_device *ndev)
 	} else {
 		pinctrl_pm_select_default_state(priv->device);
 		/* enable the clk prevously disabled */
-		clk_prepare_enable(priv->stmmac_clk);
+		clk_enable(priv->stmmac_clk);
 		/* reset the phy so that it's ready */
 		if (priv->mii)
 			stmmac_mdio_reset(priv->mii);
 	}
 
+	spin_unlock_irqrestore(&priv->lock, flags);
+
 	netif_device_attach(ndev);
 
 	stmmac_hw_setup(ndev);
@@ -2991,8 +2994,6 @@  int stmmac_resume(struct net_device *ndev)
 
 	netif_start_queue(ndev);
 
-	spin_unlock_irqrestore(&priv->lock, flags);
-
 	if (priv->phydev)
 		phy_start(priv->phydev);