diff mbox series

can: flexcan: fix deadlock when using self wakeup

Message ID 20190517023652.19285-1-qiangqing.zhang@nxp.com
State Awaiting Upstream
Delegated to: David Miller
Headers show
Series can: flexcan: fix deadlock when using self wakeup | expand

Commit Message

Joakim Zhang May 17, 2019, 2:39 a.m. UTC
As reproted by Sean Nyekjaer bellow:
When suspending, when there is still can traffic on the
interfaces the flexcan immediately wakes the platform again.
As it should :-)
But it throws this error msg:
[ 3169.378661] PM: noirq suspend of devices failed

On the way down to suspend the interface that throws the error
message does call flexcan_suspend but fails to call
flexcan_noirq_suspend.
That means the flexcan_enter_stop_mode is called, but on the way
out of suspend the driver only calls flexcan_resume and skips
flexcan_noirq_resume, thus it doesn't call flexcan_exit_stop_mode.
This leaves the flexcan in stop mode, and with the current driver
it can't recover from this even with a soft reboot, it requires a
hard reboot.

Fixes: de3578c198c6 ("can: flexcan: add self wakeup support")

This patch intends to fix the issue, and also add comment to explain the
wakeup flow.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/net/can/flexcan.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

Comments

Joakim Zhang May 17, 2019, 2:47 a.m. UTC | #1
Hi Marc,

Sean Nyekjaer reported the issue, but the fix is incorrect.
https://www.spinics.net/lists/linux-can/msg01447.html

Could you help to add Sean Nyekjaer to this thread as I can't get the email address.
You can add the "Reported-by" tag if you pick up the patch.

Thanks a lot!

Best Regards,
Joakim Zhang

> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年5月17日 10:39
> To: mkl@pengutronix.de; linux-can@vger.kernel.org
> Cc: dl-linux-imx <linux-imx@nxp.com>; wg@grandegger.com;
> netdev@vger.kernel.org; Joakim Zhang <qiangqing.zhang@nxp.com>
> Subject: [PATCH] can: flexcan: fix deadlock when using self wakeup
> 
> As reproted by Sean Nyekjaer bellow:
> When suspending, when there is still can traffic on the interfaces the flexcan
> immediately wakes the platform again.
> As it should :-)
> But it throws this error msg:
> [ 3169.378661] PM: noirq suspend of devices failed
> 
> On the way down to suspend the interface that throws the error message does
> call flexcan_suspend but fails to call flexcan_noirq_suspend.
> That means the flexcan_enter_stop_mode is called, but on the way out of
> suspend the driver only calls flexcan_resume and skips flexcan_noirq_resume,
> thus it doesn't call flexcan_exit_stop_mode.
> This leaves the flexcan in stop mode, and with the current driver it can't
> recover from this even with a soft reboot, it requires a hard reboot.
> 
> Fixes: de3578c198c6 ("can: flexcan: add self wakeup support")
> 
> This patch intends to fix the issue, and also add comment to explain the
> wakeup flow.
> 
> Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> ---
>  drivers/net/can/flexcan.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index
> e35083ff31ee..6fbce473a8c7 100644
> --- a/drivers/net/can/flexcan.c
> +++ b/drivers/net/can/flexcan.c
> @@ -286,6 +286,7 @@ struct flexcan_priv {
>  	const struct flexcan_devtype_data *devtype_data;
>  	struct regulator *reg_xceiver;
>  	struct flexcan_stop_mode stm;
> +	bool in_stop_mode;
> 
>  	/* Read and Write APIs */
>  	u32 (*read)(void __iomem *addr);
> @@ -1653,6 +1654,7 @@ static int __maybe_unused flexcan_suspend(struct
> device *device)
>  		if (device_may_wakeup(device)) {
>  			enable_irq_wake(dev->irq);
>  			flexcan_enter_stop_mode(priv);
> +			priv->in_stop_mode = true;
>  		} else {
>  			err = flexcan_chip_disable(priv);
>  			if (err)
> @@ -1679,6 +1681,11 @@ static int __maybe_unused flexcan_resume(struct
> device *device)
>  		netif_device_attach(dev);
>  		netif_start_queue(dev);
>  		if (device_may_wakeup(device)) {
> +			if (priv->in_stop_mode) {
> +				flexcan_enable_wakeup_irq(priv, false);
> +				flexcan_exit_stop_mode(priv);
> +				priv->in_stop_mode = false;
> +			}
>  			disable_irq_wake(dev->irq);
>  		} else {
>  			err = pm_runtime_force_resume(device); @@ -1715,6
> +1722,11 @@ static int __maybe_unused flexcan_noirq_suspend(struct device
> *device)
>  	struct net_device *dev = dev_get_drvdata(device);
>  	struct flexcan_priv *priv = netdev_priv(dev);
> 
> +	/* Need enable wakeup interrupt in noirq suspend stage. Otherwise,
> +	 * it will trigger continuously wakeup interrupt if the wakeup event
> +	 * comes before noirq suspend stage, and simultaneously in has enter
> +	 * the stop mode.
> +	 */
>  	if (netif_running(dev) && device_may_wakeup(device))
>  		flexcan_enable_wakeup_irq(priv, true);
> 
> @@ -1726,9 +1738,14 @@ static int __maybe_unused
> flexcan_noirq_resume(struct device *device)
>  	struct net_device *dev = dev_get_drvdata(device);
>  	struct flexcan_priv *priv = netdev_priv(dev);
> 
> +	/* Need exit stop mode in noirq resume stage. Otherwise, it will
> +	 * trigger continuously wakeup interrupt if the wakeup event comes,
> +	 * and simultaneously it has still in stop mode.
> +	 */
>  	if (netif_running(dev) && device_may_wakeup(device)) {
>  		flexcan_enable_wakeup_irq(priv, false);
>  		flexcan_exit_stop_mode(priv);
> +		priv->in_stop_mode = false;
>  	}
> 
>  	return 0;
> --
> 2.17.1
Marc Kleine-Budde May 17, 2019, 5:18 p.m. UTC | #2
On 5/17/19 4:39 AM, Joakim Zhang wrote:
> As reproted by Sean Nyekjaer bellow:
> When suspending, when there is still can traffic on the
> interfaces the flexcan immediately wakes the platform again.
> As it should :-)
> But it throws this error msg:
> [ 3169.378661] PM: noirq suspend of devices failed
> 
> On the way down to suspend the interface that throws the error
> message does call flexcan_suspend but fails to call
> flexcan_noirq_suspend.
> That means the flexcan_enter_stop_mode is called, but on the way
> out of suspend the driver only calls flexcan_resume and skips
> flexcan_noirq_resume, thus it doesn't call flexcan_exit_stop_mode.
> This leaves the flexcan in stop mode, and with the current driver
> it can't recover from this even with a soft reboot, it requires a
> hard reboot.
> 
> Fixes: de3578c198c6 ("can: flexcan: add self wakeup support")
> 
> This patch intends to fix the issue, and also add comment to explain the
> wakeup flow.
> 
> Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>

The existing self wakeup support:

| de3578c198c6 ("can: flexcan: add self wakeup support")

looks broken to me.

According to the data sheet:

> To enter stop mode, the CPU should manually assert a global Stop Mode
> request (see the CAN1_STOP_REQ and CAN2_STOP_REQ bit in the register
> IOMUXC_GPR4) and check the acknowledgement asserted by the FlexCAN
> (see the CAN1_STOP_ACK and CAN2_STOP_ACK in the register
> IOMUXC_GPR4). The CPU must only consider the FlexCAN in Stop Mode
> when both request and acknowledgement conditions are satisfied.
you have to poll for the acknowledgement, which is not done in the
driver. Please fix that first.

As far as I understand the documentation the suspend() and resume
functions should be symmetric. If they are, you shouldn't need the
in_stop_mode hack.

regards,
Marc
Sean Nyekjaer June 8, 2019, 5:41 p.m. UTC | #3
On 17/05/2019 04.39, Joakim Zhang wrote:
> As reproted by Sean Nyekjaer bellow:
> When suspending, when there is still can traffic on the
> interfaces the flexcan immediately wakes the platform again.
> As it should :-)
> But it throws this error msg:
> [ 3169.378661] PM: noirq suspend of devices failed
> 
> On the way down to suspend the interface that throws the error
> message does call flexcan_suspend but fails to call
> flexcan_noirq_suspend.
> That means the flexcan_enter_stop_mode is called, but on the way
> out of suspend the driver only calls flexcan_resume and skips
> flexcan_noirq_resume, thus it doesn't call flexcan_exit_stop_mode.
> This leaves the flexcan in stop mode, and with the current driver
> it can't recover from this even with a soft reboot, it requires a
> hard reboot.
> 
> Fixes: de3578c198c6 ("can: flexcan: add self wakeup support")
> 
> This patch intends to fix the issue, and also add comment to explain the
> wakeup flow.
> Reported-by: Sean Nyekjaer <sean@geanix.com>
> Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>

How is it going with the updated patch?

/Sean
Joakim Zhang June 11, 2019, 6:55 a.m. UTC | #4
> -----Original Message-----
> From: Marc Kleine-Budde <mkl@pengutronix.de>
> Sent: 2019年5月18日 1:19
> To: Joakim Zhang <qiangqing.zhang@nxp.com>; linux-can@vger.kernel.org
> Cc: dl-linux-imx <linux-imx@nxp.com>; wg@grandegger.com;
> netdev@vger.kernel.org; Aisheng Dong <aisheng.dong@nxp.com>
> Subject: Re: [PATCH] can: flexcan: fix deadlock when using self wakeup
> 
> On 5/17/19 4:39 AM, Joakim Zhang wrote:
> > As reproted by Sean Nyekjaer bellow:
> > When suspending, when there is still can traffic on the interfaces the
> > flexcan immediately wakes the platform again.
> > As it should :-)
> > But it throws this error msg:
> > [ 3169.378661] PM: noirq suspend of devices failed
> >
> > On the way down to suspend the interface that throws the error message
> > does call flexcan_suspend but fails to call flexcan_noirq_suspend.
> > That means the flexcan_enter_stop_mode is called, but on the way out
> > of suspend the driver only calls flexcan_resume and skips
> > flexcan_noirq_resume, thus it doesn't call flexcan_exit_stop_mode.
> > This leaves the flexcan in stop mode, and with the current driver it
> > can't recover from this even with a soft reboot, it requires a hard
> > reboot.
> >
> > Fixes: de3578c198c6 ("can: flexcan: add self wakeup support")
> >
> > This patch intends to fix the issue, and also add comment to explain
> > the wakeup flow.
> >
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> 
> The existing self wakeup support:
> 
> | de3578c198c6 ("can: flexcan: add self wakeup support")
> 
> looks broken to me.
> 
> According to the data sheet:
> 
> > To enter stop mode, the CPU should manually assert a global Stop Mode
> > request (see the CAN1_STOP_REQ and CAN2_STOP_REQ bit in the register
> > IOMUXC_GPR4) and check the acknowledgement asserted by the FlexCAN
> > (see the CAN1_STOP_ACK and CAN2_STOP_ACK in the register
> IOMUXC_GPR4).
> > The CPU must only consider the FlexCAN in Stop Mode when both request
> > and acknowledgement conditions are satisfied.
> you have to poll for the acknowledgement, which is not done in the driver.
> Please fix that first.

Hi Marc,

  Patch has sent out to fix it. Thank you!

> As far as I understand the documentation the suspend() and resume functions
> should be symmetric. If they are, you shouldn't need the in_stop_mode hack.

  Yes, we don't need in_stop_mode hack if we can let FlexCAN stop mode mechanism be symmetric
in suspend() and resume(). However, it can't be realized as I explained in the patch.
  Ang suggestion can provide?
 
Best Regards,
Joakim Zhang

> regards,
> Marc
> 
> --
> Pengutronix e.K.                  | Marc Kleine-Budde           |
> Industrial Linux Solutions        | Phone: +49-231-2826-924     |
> Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
> Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |
Joakim Zhang June 11, 2019, 6:58 a.m. UTC | #5
> -----Original Message-----
> From: Sean Nyekjaer <sean@geanix.com>
> Sent: 2019年6月9日 1:42
> To: Joakim Zhang <qiangqing.zhang@nxp.com>; mkl@pengutronix.de;
> linux-can@vger.kernel.org
> Cc: dl-linux-imx <linux-imx@nxp.com>; wg@grandegger.com;
> netdev@vger.kernel.org
> Subject: Re: [PATCH] can: flexcan: fix deadlock when using self wakeup
> 
> 
> 
> On 17/05/2019 04.39, Joakim Zhang wrote:
> > As reproted by Sean Nyekjaer bellow:
> > When suspending, when there is still can traffic on the interfaces the
> > flexcan immediately wakes the platform again.
> > As it should :-)
> > But it throws this error msg:
> > [ 3169.378661] PM: noirq suspend of devices failed
> >
> > On the way down to suspend the interface that throws the error message
> > does call flexcan_suspend but fails to call flexcan_noirq_suspend.
> > That means the flexcan_enter_stop_mode is called, but on the way out
> > of suspend the driver only calls flexcan_resume and skips
> > flexcan_noirq_resume, thus it doesn't call flexcan_exit_stop_mode.
> > This leaves the flexcan in stop mode, and with the current driver it
> > can't recover from this even with a soft reboot, it requires a hard
> > reboot.
> >
> > Fixes: de3578c198c6 ("can: flexcan: add self wakeup support")
> >
> > This patch intends to fix the issue, and also add comment to explain
> > the wakeup flow.
> > Reported-by: Sean Nyekjaer <sean@geanix.com>
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> 
> How is it going with the updated patch?

Hi Sean,

	I still need discuss with Marc about the solution.

Joakim Zhang
> /Sean
Sean Nyekjaer June 11, 2019, 7:08 a.m. UTC | #6
On 11/06/2019 08.58, Joakim Zhang wrote:
>>
>> How is it going with the updated patch?
> 
> Hi Sean,
> 
> 	I still need discuss with Marc about the solution.
> 
> Joakim Zhang

Hi, Joakim

Please include me in the loop :-)

/Sean
Joakim Zhang June 11, 2019, 7:21 a.m. UTC | #7
> -----Original Message-----
> From: Sean Nyekjaer <sean@geanix.com>
> Sent: 2019年6月11日 15:08
> To: Joakim Zhang <qiangqing.zhang@nxp.com>
> Cc: mkl@pengutronix.de; linux-can@vger.kernel.org; dl-linux-imx
> <linux-imx@nxp.com>; wg@grandegger.com; netdev@vger.kernel.org
> Subject: Re: [PATCH] can: flexcan: fix deadlock when using self wakeup
> 
> 
> 
> On 11/06/2019 08.58, Joakim Zhang wrote:
> >>
> >> How is it going with the updated patch?
> >
> > Hi Sean,
> >
> > 	I still need discuss with Marc about the solution.
> >
> > Joakim Zhang
> 
> Hi, Joakim
> 
> Please include me in the loop :-)

Okay, will add you if anything update.

Joakim Zhang
> 
> /Sean
diff mbox series

Patch

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index e35083ff31ee..6fbce473a8c7 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -286,6 +286,7 @@  struct flexcan_priv {
 	const struct flexcan_devtype_data *devtype_data;
 	struct regulator *reg_xceiver;
 	struct flexcan_stop_mode stm;
+	bool in_stop_mode;
 
 	/* Read and Write APIs */
 	u32 (*read)(void __iomem *addr);
@@ -1653,6 +1654,7 @@  static int __maybe_unused flexcan_suspend(struct device *device)
 		if (device_may_wakeup(device)) {
 			enable_irq_wake(dev->irq);
 			flexcan_enter_stop_mode(priv);
+			priv->in_stop_mode = true;
 		} else {
 			err = flexcan_chip_disable(priv);
 			if (err)
@@ -1679,6 +1681,11 @@  static int __maybe_unused flexcan_resume(struct device *device)
 		netif_device_attach(dev);
 		netif_start_queue(dev);
 		if (device_may_wakeup(device)) {
+			if (priv->in_stop_mode) {
+				flexcan_enable_wakeup_irq(priv, false);
+				flexcan_exit_stop_mode(priv);
+				priv->in_stop_mode = false;
+			}
 			disable_irq_wake(dev->irq);
 		} else {
 			err = pm_runtime_force_resume(device);
@@ -1715,6 +1722,11 @@  static int __maybe_unused flexcan_noirq_suspend(struct device *device)
 	struct net_device *dev = dev_get_drvdata(device);
 	struct flexcan_priv *priv = netdev_priv(dev);
 
+	/* Need enable wakeup interrupt in noirq suspend stage. Otherwise,
+	 * it will trigger continuously wakeup interrupt if the wakeup event
+	 * comes before noirq suspend stage, and simultaneously in has enter
+	 * the stop mode.
+	 */
 	if (netif_running(dev) && device_may_wakeup(device))
 		flexcan_enable_wakeup_irq(priv, true);
 
@@ -1726,9 +1738,14 @@  static int __maybe_unused flexcan_noirq_resume(struct device *device)
 	struct net_device *dev = dev_get_drvdata(device);
 	struct flexcan_priv *priv = netdev_priv(dev);
 
+	/* Need exit stop mode in noirq resume stage. Otherwise, it will
+	 * trigger continuously wakeup interrupt if the wakeup event comes,
+	 * and simultaneously it has still in stop mode.
+	 */
 	if (netif_running(dev) && device_may_wakeup(device)) {
 		flexcan_enable_wakeup_irq(priv, false);
 		flexcan_exit_stop_mode(priv);
+		priv->in_stop_mode = false;
 	}
 
 	return 0;