Patchwork [6/8] mxc_nand: fix correct_data function

login
register
mail settings
Submitter Sascha Hauer
Date Aug. 6, 2010, 1:53 p.m.
Message ID <1281102791-8089-7-git-send-email-s.hauer@pengutronix.de>
Download mbox | patch
Permalink /patch/61118/
State Accepted
Commit 94f77e50d658be1d3ff23fb65e4d075a6f4ebee3
Headers show

Comments

Sascha Hauer - Aug. 6, 2010, 1:53 p.m.
The v2 controller has a totally different mechanism to check
whether the data we read had ecc errors or not. Implement this.
The mechanism in the v2 controller happens to be identical to
the v3 controller.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 drivers/mtd/nand/mxc_nand.c |   42 ++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 40 insertions(+), 2 deletions(-)
Baruch Siach - Aug. 8, 2010, 6:19 a.m.
Hi Sascha,

On Fri, Aug 06, 2010 at 03:53:09PM +0200, Sascha Hauer wrote:
> The v2 controller has a totally different mechanism to check
> whether the data we read had ecc errors or not. Implement this.
> The mechanism in the v2 controller happens to be identical to
> the v3 controller.
> 
> Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
> ---
>  drivers/mtd/nand/mxc_nand.c |   42 ++++++++++++++++++++++++++++++++++++++++--
>  1 files changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
> index 48b6dca..a6dc5a5 100644
> --- a/drivers/mtd/nand/mxc_nand.c
> +++ b/drivers/mtd/nand/mxc_nand.c
> @@ -94,6 +94,7 @@ struct mxc_nand_host {
>  	struct clk		*clk;
>  	int			clk_act;
>  	int			irq;
> +	int			eccsize;

This new mxc_nand_host field doesn't get initialized in this patch, although 
it is used in the .correct implementation. The next patch in this series 
includes the eccsize initialization code. This might break bisect. Won't it be 
safer to switch the order of these patches?

baruch

>  	wait_queue_head_t	irq_waitq;
>  
> @@ -342,7 +343,7 @@ static void mxc_nand_enable_hwecc(struct mtd_info *mtd, int mode)
>  	 */
>  }
>  
> -static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
> +static int mxc_nand_correct_data_v1(struct mtd_info *mtd, u_char *dat,
>  				 u_char *read_ecc, u_char *calc_ecc)
>  {
>  	struct nand_chip *nand_chip = mtd->priv;
> @@ -364,6 +365,40 @@ static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
>  	return 0;
>  }
>  
> +static int mxc_nand_correct_data_v2_v3(struct mtd_info *mtd, u_char *dat,
> +				 u_char *read_ecc, u_char *calc_ecc)
> +{
> +	struct nand_chip *nand_chip = mtd->priv;
> +	struct mxc_nand_host *host = nand_chip->priv;
> +	u32 ecc_stat, err;
> +	int no_subpages = 1;
> +	int ret = 0;
> +	u8 ecc_bit_mask, err_limit;
> +
> +	ecc_bit_mask = (host->eccsize == 4) ? 0x7 : 0xf;
> +	err_limit = (host->eccsize == 4) ? 0x4 : 0x8;
> +
> +	no_subpages = mtd->writesize >> 9;
> +
> +	ecc_stat = readl(NFC_V1_V2_ECC_STATUS_RESULT);
> +
> +	do {
> +		err = ecc_stat & ecc_bit_mask;
> +		if (err > err_limit) {
> +			printk(KERN_WARNING "UnCorrectable RS-ECC Error\n");
> +			return -1;
> +		} else {
> +			ret += err;
> +		}
> +		ecc_stat >>= 4;
> +	} while (--no_subpages);
> +
> +	mtd->ecc_stats.corrected += ret;
> +	pr_debug("%d Symbol Correctable RS-ECC Error\n", ret);
> +
> +	return ret;
> +}
> +
>  static int mxc_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
>  				  u_char *ecc_code)
>  {
> @@ -790,7 +825,10 @@ static int __init mxcnd_probe(struct platform_device *pdev)
>  	if (pdata->hw_ecc) {
>  		this->ecc.calculate = mxc_nand_calculate_ecc;
>  		this->ecc.hwctl = mxc_nand_enable_hwecc;
> -		this->ecc.correct = mxc_nand_correct_data;
> +		if (nfc_is_v1())
> +			this->ecc.correct = mxc_nand_correct_data_v1;
> +		else
> +			this->ecc.correct = mxc_nand_correct_data_v2_v3;
>  		this->ecc.mode = NAND_ECC_HW;
>  	} else {
>  		this->ecc.mode = NAND_ECC_SOFT;
Russell King - ARM Linux - Aug. 8, 2010, 8:19 a.m.
On Fri, Aug 06, 2010 at 03:53:09PM +0200, Sascha Hauer wrote:
> +		err = ecc_stat & ecc_bit_mask;
> +		if (err > err_limit) {
> +			printk(KERN_WARNING "UnCorrectable RS-ECC Error\n");
> +			return -1;

Someone's being lazy.
Baruch Siach - Aug. 8, 2010, 8:32 a.m.
Hi Russell,

On Sun, Aug 08, 2010 at 09:19:56AM +0100, Russell King - ARM Linux wrote:
> On Fri, Aug 06, 2010 at 03:53:09PM +0200, Sascha Hauer wrote:
> > +		err = ecc_stat & ecc_bit_mask;
> > +		if (err > err_limit) {
> > +			printk(KERN_WARNING "UnCorrectable RS-ECC Error\n");
> > +			return -1;
> 
> Someone's being lazy.

The code at nand_read_subpage() (drivers/mtd/nand/nand_base.c) expects the 
.correct callback to return -1 on an uncorrectable error:

                stat = chip->ecc.correct(mtd, p, &chip->buffers->ecccode[i], &chip->buffers->ecccalc[i]);
                if (stat == -1)
                        mtd->ecc_stats.failed++;
                else
                        mtd->ecc_stats.corrected += stat;

baruch
Sascha Hauer - Aug. 8, 2010, 8:43 p.m.
On Sun, Aug 08, 2010 at 11:32:44AM +0300, Baruch Siach wrote:
> Hi Russell,
> 
> On Sun, Aug 08, 2010 at 09:19:56AM +0100, Russell King - ARM Linux wrote:
> > On Fri, Aug 06, 2010 at 03:53:09PM +0200, Sascha Hauer wrote:
> > > +		err = ecc_stat & ecc_bit_mask;
> > > +		if (err > err_limit) {
> > > +			printk(KERN_WARNING "UnCorrectable RS-ECC Error\n");
> > > +			return -1;
> > 
> > Someone's being lazy.
> 
> The code at nand_read_subpage() (drivers/mtd/nand/nand_base.c) expects the 
> .correct callback to return -1 on an uncorrectable error:
> 
>                 stat = chip->ecc.correct(mtd, p, &chip->buffers->ecccode[i], &chip->buffers->ecccalc[i]);
>                 if (stat == -1)
>                         mtd->ecc_stats.failed++;
>                 else
>                         mtd->ecc_stats.corrected += stat;

Then this should be changed to check for stat < 0. I found some drivers
in the tree returning an errno value in their .correct function instead
of -1.

Sascha
Sascha Hauer - Aug. 8, 2010, 8:44 p.m.
On Sun, Aug 08, 2010 at 09:19:50AM +0300, Baruch Siach wrote:
> Hi Sascha,
> 
> On Fri, Aug 06, 2010 at 03:53:09PM +0200, Sascha Hauer wrote:
> > The v2 controller has a totally different mechanism to check
> > whether the data we read had ecc errors or not. Implement this.
> > The mechanism in the v2 controller happens to be identical to
> > the v3 controller.
> > 
> > Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
> > ---
> >  drivers/mtd/nand/mxc_nand.c |   42 ++++++++++++++++++++++++++++++++++++++++--
> >  1 files changed, 40 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
> > index 48b6dca..a6dc5a5 100644
> > --- a/drivers/mtd/nand/mxc_nand.c
> > +++ b/drivers/mtd/nand/mxc_nand.c
> > @@ -94,6 +94,7 @@ struct mxc_nand_host {
> >  	struct clk		*clk;
> >  	int			clk_act;
> >  	int			irq;
> > +	int			eccsize;
> 
> This new mxc_nand_host field doesn't get initialized in this patch, although 
> it is used in the .correct implementation. The next patch in this series 
> includes the eccsize initialization code. This might break bisect. Won't it be 
> safer to switch the order of these patches?

Ok, will fix.

Sascha
Russell King - ARM Linux - Aug. 8, 2010, 10:10 p.m.
On Sun, Aug 08, 2010 at 10:43:41PM +0200, Sascha Hauer wrote:
> On Sun, Aug 08, 2010 at 11:32:44AM +0300, Baruch Siach wrote:
> > Hi Russell,
> > 
> > On Sun, Aug 08, 2010 at 09:19:56AM +0100, Russell King - ARM Linux wrote:
> > > On Fri, Aug 06, 2010 at 03:53:09PM +0200, Sascha Hauer wrote:
> > > > +		err = ecc_stat & ecc_bit_mask;
> > > > +		if (err > err_limit) {
> > > > +			printk(KERN_WARNING "UnCorrectable RS-ECC Error\n");
> > > > +			return -1;
> > > 
> > > Someone's being lazy.
> > 
> > The code at nand_read_subpage() (drivers/mtd/nand/nand_base.c) expects the 
> > .correct callback to return -1 on an uncorrectable error:
> > 
> >                 stat = chip->ecc.correct(mtd, p, &chip->buffers->ecccode[i], &chip->buffers->ecccalc[i]);
> >                 if (stat == -1)
> >                         mtd->ecc_stats.failed++;
> >                 else
> >                         mtd->ecc_stats.corrected += stat;
> 
> Then this should be changed to check for stat < 0. I found some drivers
> in the tree returning an errno value in their .correct function instead
> of -1.

That's the whole danger of the whole 'return -1' evilness in the kernel.
The long established convention in the kernel is that negative numbers
returned from functions are negative errno codes.

As soon as you decide that you want a function to return -1 to indicate
an error rather than a real errno code, you lose clarity on which
functions need '-1' and which are proper negative errno codes, and then
you end up with people returning negative errno codes for functions
which should be -1, and people returning -1 for functions which should
be negative errno codes.

If you want a function to return -1 for error, then it probably makes
sense to create a ECC_CORRECT_FAILED definition which happens to be -1.
Or some other number.  But don't use plain '-1' - it looks far too much
like "I was lazy, I couldn't be bothered to look up a proper errno."

We know full well that _lots_ of people submit stuff with 'return -1'
statements where they should be proper negative errno codes, so it's
something people are having a great deal of trouble with already.  Let's
not further confuse everyone by having functions expecting called methods
to do a plain "return -1;" on error.

Patch

diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 48b6dca..a6dc5a5 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -94,6 +94,7 @@  struct mxc_nand_host {
 	struct clk		*clk;
 	int			clk_act;
 	int			irq;
+	int			eccsize;
 
 	wait_queue_head_t	irq_waitq;
 
@@ -342,7 +343,7 @@  static void mxc_nand_enable_hwecc(struct mtd_info *mtd, int mode)
 	 */
 }
 
-static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
+static int mxc_nand_correct_data_v1(struct mtd_info *mtd, u_char *dat,
 				 u_char *read_ecc, u_char *calc_ecc)
 {
 	struct nand_chip *nand_chip = mtd->priv;
@@ -364,6 +365,40 @@  static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
 	return 0;
 }
 
+static int mxc_nand_correct_data_v2_v3(struct mtd_info *mtd, u_char *dat,
+				 u_char *read_ecc, u_char *calc_ecc)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
+	u32 ecc_stat, err;
+	int no_subpages = 1;
+	int ret = 0;
+	u8 ecc_bit_mask, err_limit;
+
+	ecc_bit_mask = (host->eccsize == 4) ? 0x7 : 0xf;
+	err_limit = (host->eccsize == 4) ? 0x4 : 0x8;
+
+	no_subpages = mtd->writesize >> 9;
+
+	ecc_stat = readl(NFC_V1_V2_ECC_STATUS_RESULT);
+
+	do {
+		err = ecc_stat & ecc_bit_mask;
+		if (err > err_limit) {
+			printk(KERN_WARNING "UnCorrectable RS-ECC Error\n");
+			return -1;
+		} else {
+			ret += err;
+		}
+		ecc_stat >>= 4;
+	} while (--no_subpages);
+
+	mtd->ecc_stats.corrected += ret;
+	pr_debug("%d Symbol Correctable RS-ECC Error\n", ret);
+
+	return ret;
+}
+
 static int mxc_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
 				  u_char *ecc_code)
 {
@@ -790,7 +825,10 @@  static int __init mxcnd_probe(struct platform_device *pdev)
 	if (pdata->hw_ecc) {
 		this->ecc.calculate = mxc_nand_calculate_ecc;
 		this->ecc.hwctl = mxc_nand_enable_hwecc;
-		this->ecc.correct = mxc_nand_correct_data;
+		if (nfc_is_v1())
+			this->ecc.correct = mxc_nand_correct_data_v1;
+		else
+			this->ecc.correct = mxc_nand_correct_data_v2_v3;
 		this->ecc.mode = NAND_ECC_HW;
 	} else {
 		this->ecc.mode = NAND_ECC_SOFT;