diff mbox series

mtd: nand: raw: brcmnand: When oops in progress use pio and interrupt polling

Message ID 1556733121-20133-1-git-send-email-kdasu.kdev@gmail.com
State Superseded
Delegated to: Miquel Raynal
Headers show
Series mtd: nand: raw: brcmnand: When oops in progress use pio and interrupt polling | expand

Commit Message

Kamal Dasu May 1, 2019, 5:46 p.m. UTC
If mtd_oops is in progress switch to polling for nand command completion
interrupts and use PIO mode wihtout DMA so that the mtd_oops buffer can
be completely written in the assinged nand partition. This is needed in
cases where the panic does not happen on cpu0 and there is only one online
CPU and the panic is not on cpu0.

Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
---
 drivers/mtd/nand/raw/brcmnand/brcmnand.c | 55 ++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 3 deletions(-)

Comments

Miquel Raynal May 2, 2019, 8:25 a.m. UTC | #1
Hi Kamal,

Kamal Dasu <kdasu.kdev@gmail.com> wrote on Wed,  1 May 2019 13:46:15
-0400:

> If mtd_oops is in progress switch to polling for nand command completion

s/nand/NAND/

> interrupts and use PIO mode wihtout DMA so that the mtd_oops buffer can
> be completely written in the assinged nand partition.

What about:

"If mtd_oops is in progress, switch to polling during NAND command
completion instead of relying on DMA/interrupts so that the mtd_oops
buffer can be completely written in the assigned NAND partition."

> This is needed in
> cases where the panic does not happen on cpu0 and there is only one online
> CPU and the panic is not on cpu0.

"This is needed in case the panic does not happen on CPU0 and there is
only one online CPU."

I am not sure to understand the problem or how this can happen (and
be a problem). Have you met such issue already or is this purely
speculative?

> 
> Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
> ---
>  drivers/mtd/nand/raw/brcmnand/brcmnand.c | 55 ++++++++++++++++++++++++++++++--
>  1 file changed, 52 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> index 482c6f0..cfbe51a 100644
> --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> @@ -823,6 +823,12 @@ static inline bool has_flash_dma(struct brcmnand_controller *ctrl)
>  	return ctrl->flash_dma_base;
>  }
>  
> +static inline void disable_flash_dma_xfer(struct brcmnand_controller *ctrl)
> +{
> +	if (has_flash_dma(ctrl))
> +		ctrl->flash_dma_base = 0;
> +}
> +
>  static inline bool flash_dma_buf_ok(const void *buf)
>  {
>  	return buf && !is_vmalloc_addr(buf) &&
> @@ -1237,15 +1243,58 @@ static void brcmnand_cmd_ctrl(struct nand_chip *chip, int dat,
>  	/* intentionally left blank */
>  }
>  
> +static bool is_mtd_oops_in_progress(void)
> +{
> +	int i = 0;
> +
> +#ifdef CONFIG_MTD_OOPS
> +	if (oops_in_progress && smp_processor_id()) {
> +		int cpu = 0;
> +
> +		for_each_online_cpu(cpu)
> +			++i;
> +	}
> +#endif
> +	return i == 1 ? true : false;

I suppose return (i == 1); is enough

> +}
> +
> +static bool brcmstb_nand_wait_for_completion(struct nand_chip *chip)
> +{
> +	struct brcmnand_host *host = nand_get_controller_data(chip);
> +	struct brcmnand_controller *ctrl = host->ctrl;
> +	bool err = false;
> +	int sts;
> +
> +	if (is_mtd_oops_in_progress()) {
> +		/* Switch to interrupt polling and PIO mode */
> +		disable_flash_dma_xfer(ctrl);
> +		sts = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY |
> +					       NAND_STATUS_READY,
> +					       NAND_CTRL_RDY |
> +					       NAND_STATUS_READY, 0);
> +		err = (sts < 0) ? true : false;
> +	} else {
> +		unsigned long timeo = msecs_to_jiffies(
> +						NAND_POLL_STATUS_TIMEOUT_MS);
> +		/* wait for completion interrupt */
> +		sts = wait_for_completion_timeout(&ctrl->done, timeo);
> +		err = (sts <= 0) ? true : false;
> +	}
> +
> +	return err;
> +}
> +
>  static int brcmnand_waitfunc(struct nand_chip *chip)
>  {
>  	struct brcmnand_host *host = nand_get_controller_data(chip);
>  	struct brcmnand_controller *ctrl = host->ctrl;
> -	unsigned long timeo = msecs_to_jiffies(100);
> +	bool err = false;
>  
>  	dev_dbg(ctrl->dev, "wait on native cmd %d\n", ctrl->cmd_pending);
> -	if (ctrl->cmd_pending &&
> -			wait_for_completion_timeout(&ctrl->done, timeo) <= 0) {

What about the wait_for_completion_timeout() call in brcmnand_write()?

> +	if (ctrl->cmd_pending)
> +		err = brcmstb_nand_wait_for_completion(chip);
> +
> +	if (err) {
>  		u32 cmd = brcmnand_read_reg(ctrl, BRCMNAND_CMD_START)
>  					>> brcmnand_cmd_shift(ctrl);  
>  


Thanks,
Miquèl
Kamal Dasu May 2, 2019, 2:09 p.m. UTC | #2
On Thu, May 2, 2019 at 4:25 AM Miquel Raynal <miquel.raynal@bootlin.com> wrote:
>
> Hi Kamal,
>
> Kamal Dasu <kdasu.kdev@gmail.com> wrote on Wed,  1 May 2019 13:46:15
> -0400:
>
> > If mtd_oops is in progress switch to polling for nand command completion
>
> s/nand/NAND/

Will change this.

>
> > interrupts and use PIO mode wihtout DMA so that the mtd_oops buffer can
> > be completely written in the assinged nand partition.
>
> What about:
>
> "If mtd_oops is in progress, switch to polling during NAND command
> completion instead of relying on DMA/interrupts so that the mtd_oops
> buffer can be completely written in the assigned NAND partition."
>

Will make this change as well

> > This is needed in
> > cases where the panic does not happen on cpu0 and there is only one online
> > CPU and the panic is not on cpu0.
>
> "This is needed in case the panic does not happen on CPU0 and there is
> only one online CPU."
>
> I am not sure to understand the problem or how this can happen (and
> be a problem). Have you met such issue already or is this purely
> speculative?

We have seen this issue and tested it on multi core SoCs.

>
> >
> > Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
> > ---
> >  drivers/mtd/nand/raw/brcmnand/brcmnand.c | 55 ++++++++++++++++++++++++++++++--
> >  1 file changed, 52 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> > index 482c6f0..cfbe51a 100644
> > --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> > +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
> > @@ -823,6 +823,12 @@ static inline bool has_flash_dma(struct brcmnand_controller *ctrl)
> >       return ctrl->flash_dma_base;
> >  }
> >
> > +static inline void disable_flash_dma_xfer(struct brcmnand_controller *ctrl)
> > +{
> > +     if (has_flash_dma(ctrl))
> > +             ctrl->flash_dma_base = 0;
> > +}
> > +
> >  static inline bool flash_dma_buf_ok(const void *buf)
> >  {
> >       return buf && !is_vmalloc_addr(buf) &&
> > @@ -1237,15 +1243,58 @@ static void brcmnand_cmd_ctrl(struct nand_chip *chip, int dat,
> >       /* intentionally left blank */
> >  }
> >
> > +static bool is_mtd_oops_in_progress(void)
> > +{
> > +     int i = 0;
> > +
> > +#ifdef CONFIG_MTD_OOPS
> > +     if (oops_in_progress && smp_processor_id()) {
> > +             int cpu = 0;
> > +
> > +             for_each_online_cpu(cpu)
> > +                     ++i;
> > +     }
> > +#endif
> > +     return i == 1 ? true : false;
>
> I suppose return (i == 1); is enough
>

Ok will make the change.

> > +}
> > +
> > +static bool brcmstb_nand_wait_for_completion(struct nand_chip *chip)
> > +{
> > +     struct brcmnand_host *host = nand_get_controller_data(chip);
> > +     struct brcmnand_controller *ctrl = host->ctrl;
> > +     bool err = false;
> > +     int sts;
> > +
> > +     if (is_mtd_oops_in_progress()) {
> > +             /* Switch to interrupt polling and PIO mode */
> > +             disable_flash_dma_xfer(ctrl);
> > +             sts = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY |
> > +                                            NAND_STATUS_READY,
> > +                                            NAND_CTRL_RDY |
> > +                                            NAND_STATUS_READY, 0);
> > +             err = (sts < 0) ? true : false;
> > +     } else {
> > +             unsigned long timeo = msecs_to_jiffies(
> > +                                             NAND_POLL_STATUS_TIMEOUT_MS);
> > +             /* wait for completion interrupt */
> > +             sts = wait_for_completion_timeout(&ctrl->done, timeo);
> > +             err = (sts <= 0) ? true : false;
> > +     }
> > +
> > +     return err;
> > +}
> > +
> >  static int brcmnand_waitfunc(struct nand_chip *chip)
> >  {
> >       struct brcmnand_host *host = nand_get_controller_data(chip);
> >       struct brcmnand_controller *ctrl = host->ctrl;
> > -     unsigned long timeo = msecs_to_jiffies(100);
> > +     bool err = false;
> >
> >       dev_dbg(ctrl->dev, "wait on native cmd %d\n", ctrl->cmd_pending);
> > -     if (ctrl->cmd_pending &&
> > -                     wait_for_completion_timeout(&ctrl->done, timeo) <= 0) {
>
> What about the wait_for_completion_timeout() call in brcmnand_write()?
>

brcmnand_write() too calls brcmnand_waitfunc(), will poll if it needs
to for completion.

> > +     if (ctrl->cmd_pending)
> > +             err = brcmstb_nand_wait_for_completion(chip);
> > +
> > +     if (err) {
> >               u32 cmd = brcmnand_read_reg(ctrl, BRCMNAND_CMD_START)
> >                                       >> brcmnand_cmd_shift(ctrl);
> >
>
>
> Thanks,
> Miquèl

Thanks
Kamal
Richard Weinberger May 6, 2019, 4:01 p.m. UTC | #3
On Wed, May 1, 2019 at 7:52 PM Kamal Dasu <kdasu.kdev@gmail.com> wrote:
>
> If mtd_oops is in progress switch to polling for nand command completion
> interrupts and use PIO mode wihtout DMA so that the mtd_oops buffer can
> be completely written in the assinged nand partition. This is needed in
> cases where the panic does not happen on cpu0 and there is only one online
> CPU and the panic is not on cpu0.

This optimization is highly specific to your hardware and AFAIK cannot
be applied
in general to brcmnand.

So the problem you see is that depending on the oops you can no longer use dma
or interrupts in the driver?

How about adding a new flag to panic_nand_write() which tells the nand
driver that
this is a panic write?
That way you can fall back to pio and polling mode without checking cpu numbers
and oops_in_progress.
Kamal Dasu May 16, 2019, 2:47 p.m. UTC | #4
On Mon, May 6, 2019 at 12:01 PM Richard Weinberger
<richard.weinberger@gmail.com> wrote:
>
> On Wed, May 1, 2019 at 7:52 PM Kamal Dasu <kdasu.kdev@gmail.com> wrote:
> >
> > If mtd_oops is in progress switch to polling for nand command completion
> > interrupts and use PIO mode wihtout DMA so that the mtd_oops buffer can
> > be completely written in the assinged nand partition. This is needed in
> > cases where the panic does not happen on cpu0 and there is only one online
> > CPU and the panic is not on cpu0.
>
> This optimization is highly specific to your hardware and AFAIK cannot
> be applied
> in general to brcmnand.
>
> So the problem you see is that depending on the oops you can no longer use dma
> or interrupts in the driver?
>
> How about adding a new flag to panic_nand_write() which tells the nand
> driver that
> this is a panic write?
> That way you can fall back to pio and polling mode without checking cpu numbers
> and oops_in_progress.
>

Thanks for your review  Richard. Will add flag to let low level
controller drivers know that that its a panic_write and make brcmnand
code more generic and simply fallback to pio and polling in such a
case. Will send a V2 patch with these recommended changes.

Thanks
Kamal

> --
> Thanks,
> //richard
diff mbox series

Patch

diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 482c6f0..cfbe51a 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -823,6 +823,12 @@  static inline bool has_flash_dma(struct brcmnand_controller *ctrl)
 	return ctrl->flash_dma_base;
 }
 
+static inline void disable_flash_dma_xfer(struct brcmnand_controller *ctrl)
+{
+	if (has_flash_dma(ctrl))
+		ctrl->flash_dma_base = 0;
+}
+
 static inline bool flash_dma_buf_ok(const void *buf)
 {
 	return buf && !is_vmalloc_addr(buf) &&
@@ -1237,15 +1243,58 @@  static void brcmnand_cmd_ctrl(struct nand_chip *chip, int dat,
 	/* intentionally left blank */
 }
 
+static bool is_mtd_oops_in_progress(void)
+{
+	int i = 0;
+
+#ifdef CONFIG_MTD_OOPS
+	if (oops_in_progress && smp_processor_id()) {
+		int cpu = 0;
+
+		for_each_online_cpu(cpu)
+			++i;
+	}
+#endif
+	return i == 1 ? true : false;
+}
+
+static bool brcmstb_nand_wait_for_completion(struct nand_chip *chip)
+{
+	struct brcmnand_host *host = nand_get_controller_data(chip);
+	struct brcmnand_controller *ctrl = host->ctrl;
+	bool err = false;
+	int sts;
+
+	if (is_mtd_oops_in_progress()) {
+		/* Switch to interrupt polling and PIO mode */
+		disable_flash_dma_xfer(ctrl);
+		sts = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY |
+					       NAND_STATUS_READY,
+					       NAND_CTRL_RDY |
+					       NAND_STATUS_READY, 0);
+		err = (sts < 0) ? true : false;
+	} else {
+		unsigned long timeo = msecs_to_jiffies(
+						NAND_POLL_STATUS_TIMEOUT_MS);
+		/* wait for completion interrupt */
+		sts = wait_for_completion_timeout(&ctrl->done, timeo);
+		err = (sts <= 0) ? true : false;
+	}
+
+	return err;
+}
+
 static int brcmnand_waitfunc(struct nand_chip *chip)
 {
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	struct brcmnand_controller *ctrl = host->ctrl;
-	unsigned long timeo = msecs_to_jiffies(100);
+	bool err = false;
 
 	dev_dbg(ctrl->dev, "wait on native cmd %d\n", ctrl->cmd_pending);
-	if (ctrl->cmd_pending &&
-			wait_for_completion_timeout(&ctrl->done, timeo) <= 0) {
+	if (ctrl->cmd_pending)
+		err = brcmstb_nand_wait_for_completion(chip);
+
+	if (err) {
 		u32 cmd = brcmnand_read_reg(ctrl, BRCMNAND_CMD_START)
 					>> brcmnand_cmd_shift(ctrl);