Patchwork cfi: AMD/Fujitsu compatibles: add panic write support

login
register
mail settings
Submitter Ira Snyder
Date Jan. 6, 2012, 7:29 p.m.
Message ID <1325878159-32306-1-git-send-email-iws@ovro.caltech.edu>
Download mbox | patch
Permalink /patch/134703/
State Accepted
Commit 30ec5a2cb17d78482de0cf9e38721410d48c086d
Headers show

Comments

Ira Snyder - Jan. 6, 2012, 7:29 p.m.
This allows the mtdoops driver to work on flash chips using the
AMD/Fujitsu compatible command set.

As the code comments note, the locks used throughout the normal code
paths in the driver are ignored, so that the chance of writing out the
kernel's last messages are maximized.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: linux-mtd@lists.infradead.org
---

This was tested with a Spansion S29GL512P flash chip. It is identified by
the kernel with the following output in the kernel log:
Found 1 x16 devices at 0x0 in 16-bit bank. Manufacturer ID 0x000001 Chip ID 0x002301

Rick, I have CC'd you on this email since I thought you might be
interested. While I was attempting to search for others who had written a
panic_write() for this chip, I came across your email to the linux-mtd
mailing list in April 2011.

 drivers/mtd/chips/cfi_cmdset_0002.c |  240 +++++++++++++++++++++++++++++++++++
 1 files changed, 240 insertions(+), 0 deletions(-)
Florian Fainelli - Jan. 10, 2012, 10:26 a.m.
Hello,

On 01/06/12 20:29, Ira W. Snyder wrote:
> This allows the mtdoops driver to work on flash chips using the
> AMD/Fujitsu compatible command set.
>
> As the code comments note, the locks used throughout the normal code
> paths in the driver are ignored, so that the chance of writing out the
> kernel's last messages are maximized.

This patch made me looking at the panic code, but should not this be 
made conditionnal to the enabling/disabling of the MTD oops driver?

>
> Signed-off-by: Ira W. Snyder<iws@ovro.caltech.edu>
> Cc: David Woodhouse<dwmw2@infradead.org>
> Cc: linux-mtd@lists.infradead.org
> ---
>
> This was tested with a Spansion S29GL512P flash chip. It is identified by
> the kernel with the following output in the kernel log:
> Found 1 x16 devices at 0x0 in 16-bit bank. Manufacturer ID 0x000001 Chip ID 0x002301
>
> Rick, I have CC'd you on this email since I thought you might be
> interested. While I was attempting to search for others who had written a
> panic_write() for this chip, I came across your email to the linux-mtd
> mailing list in April 2011.
>
>   drivers/mtd/chips/cfi_cmdset_0002.c |  240 +++++++++++++++++++++++++++++++++++
>   1 files changed, 240 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
> index 8d70895..e2d94bb 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> @@ -59,6 +59,9 @@ static void cfi_amdstd_resume (struct mtd_info *);
>   static int cfi_amdstd_reboot(struct notifier_block *, unsigned long, void *);
>   static int cfi_amdstd_secsi_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
>
> +static int cfi_amdstd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
> +				  size_t *retlen, const u_char *buf);
> +
>   static void cfi_amdstd_destroy(struct mtd_info *);
>
>   struct mtd_info *cfi_cmdset_0002(struct map_info *, int);
> @@ -443,6 +446,7 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
>   	pr_debug("MTD %s(): write buffer size %d\n", __func__,
>   			mtd->writebufsize);
>
> +	mtd->panic_write = cfi_amdstd_panic_write;
>   	mtd->reboot_notifier.notifier_call = cfi_amdstd_reboot;
>
>   	if (cfi->cfi_mode==CFI_MODE_CFI){
> @@ -1562,6 +1566,242 @@ static int cfi_amdstd_write_buffers(struct mtd_info *mtd, loff_t to, size_t len,
>   	return 0;
>   }
>
> +/*
> + * Wait for the flash chip to become ready to write data
> + *
> + * This is only called during the panic_write() path. When panic_write()
> + * is called, the kernel is in the process of a panic, and will soon be
> + * dead. Therefore we don't take any locks, and attempt to get access
> + * to the chip as soon as possible.
> + */
> +static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip,
> +				 unsigned long adr)
> +{
> +	struct cfi_private *cfi = map->fldrv_priv;
> +	int retries = 10;
> +	int i;
> +
> +	/*
> +	 * If the driver thinks the chip is idle, and no toggle bits
> +	 * are changing, then the chip is actually idle for sure.
> +	 */
> +	if (chip->state == FL_READY&&  chip_ready(map, adr))
> +		return 0;
> +
> +	/*
> +	 * Try several times to reset the chip and then wait for it
> +	 * to become idle. The upper limit of a few milliseconds of
> +	 * delay isn't a big problem: the kernel is dying anyway. It
> +	 * is more important to save the messages.
> +	 */
> +	while (retries>  0) {
> +		const unsigned long timeo = (HZ / 1000) + 1;
> +
> +		/* send the reset command */
> +		map_write(map, CMD(0xF0), chip->start);
> +
> +		/* wait for the chip to become ready */
> +		for (i = 0; i<  jiffies_to_usecs(timeo); i++) {
> +			if (chip_ready(map, adr))
> +				return 0;
> +
> +			udelay(1);
> +		}
> +	}
> +
> +	/* the chip never became ready */
> +	return -EBUSY;
> +}
> +
> +/*
> + * Write out one word of data to a single flash chip during a kernel panic
> + *
> + * This is only called during the panic_write() path. When panic_write()
> + * is called, the kernel is in the process of a panic, and will soon be
> + * dead. Therefore we don't take any locks, and attempt to get access
> + * to the chip as soon as possible.
> + *
> + * The implementation of this routine is intentionally similar to
> + * do_write_oneword(), in order to ease code maintenance.
> + */
> +static int do_panic_write_oneword(struct map_info *map, struct flchip *chip,
> +				  unsigned long adr, map_word datum)
> +{
> +	const unsigned long uWriteTimeout = (HZ / 1000) + 1;
> +	struct cfi_private *cfi = map->fldrv_priv;
> +	int retry_cnt = 0;
> +	map_word oldd;
> +	int ret = 0;
> +	int i;
> +
> +	adr += chip->start;
> +
> +	ret = cfi_amdstd_panic_wait(map, chip, adr);
> +	if (ret)
> +		return ret;
> +
> +	pr_debug("MTD %s(): PANIC WRITE 0x%.8lx(0x%.8lx)\n",
> +			__func__, adr, datum.x[0]);
> +
> +	/*
> +	 * Check for a NOP for the case when the datum to write is already
> +	 * present - it saves time and works around buggy chips that corrupt
> +	 * data at other locations when 0xff is written to a location that
> +	 * already contains 0xff.
> +	 */
> +	oldd = map_read(map, adr);
> +	if (map_word_equal(map, oldd, datum)) {
> +		pr_debug("MTD %s(): NOP\n", __func__);
> +		goto op_done;
> +	}
> +
> +	ENABLE_VPP(map);
> +
> +retry:
> +	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> +	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
> +	cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> +	map_write(map, datum, adr);
> +
> +	for (i = 0; i<  jiffies_to_usecs(uWriteTimeout); i++) {
> +		if (chip_ready(map, adr))
> +			break;
> +
> +		udelay(1);
> +	}
> +
> +	if (!chip_good(map, adr, datum)) {
> +		/* reset on all failures. */
> +		map_write(map, CMD(0xF0), chip->start);
> +		/* FIXME - should have reset delay before continuing */
> +
> +		if (++retry_cnt<= MAX_WORD_RETRIES)
> +			goto retry;
> +
> +		ret = -EIO;
> +	}
> +
> +op_done:
> +	DISABLE_VPP(map);
> +	return ret;
> +}
> +
> +/*
> + * Write out some data during a kernel panic
> + *
> + * This is used by the mtdoops driver to save the dying messages from a
> + * kernel which has panic'd.
> + *
> + * This routine ignores all of the locking used throughout the rest of the
> + * driver, in order to ensure that the data gets written out no matter what
> + * state this driver (and the flash chip itself) was in when the kernel crashed.
> + *
> + * The implementation of this routine is intentionally similar to
> + * cfi_amdstd_write_words(), in order to ease code maintenance.
> + */
> +static int cfi_amdstd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
> +				  size_t *retlen, const u_char *buf)
> +{
> +	struct map_info *map = mtd->priv;
> +	struct cfi_private *cfi = map->fldrv_priv;
> +	unsigned long ofs, chipstart;
> +	int ret = 0;
> +	int chipnum;
> +
> +	*retlen = 0;
> +	if (!len)
> +		return 0;
> +
> +	chipnum = to>>  cfi->chipshift;
> +	ofs = to - (chipnum<<  cfi->chipshift);
> +	chipstart = cfi->chips[chipnum].start;
> +
> +	/* If it's not bus aligned, do the first byte write */
> +	if (ofs&  (map_bankwidth(map) - 1)) {
> +		unsigned long bus_ofs = ofs&  ~(map_bankwidth(map) - 1);
> +		int i = ofs - bus_ofs;
> +		int n = 0;
> +		map_word tmp_buf;
> +
> +		ret = cfi_amdstd_panic_wait(map,&cfi->chips[chipnum], bus_ofs);
> +		if (ret)
> +			return ret;
> +
> +		/* Load 'tmp_buf' with old contents of flash */
> +		tmp_buf = map_read(map, bus_ofs + chipstart);
> +
> +		/* Number of bytes to copy from buffer */
> +		n = min_t(int, len, map_bankwidth(map) - i);
> +
> +		tmp_buf = map_word_load_partial(map, tmp_buf, buf, i, n);
> +
> +		ret = do_panic_write_oneword(map,&cfi->chips[chipnum],
> +					     bus_ofs, tmp_buf);
> +		if (ret)
> +			return ret;
> +
> +		ofs += n;
> +		buf += n;
> +		(*retlen) += n;
> +		len -= n;
> +
> +		if (ofs>>  cfi->chipshift) {
> +			chipnum++;
> +			ofs = 0;
> +			if (chipnum == cfi->numchips)
> +				return 0;
> +		}
> +	}
> +
> +	/* We are now aligned, write as much as possible */
> +	while (len>= map_bankwidth(map)) {
> +		map_word datum;
> +
> +		datum = map_word_load(map, buf);
> +
> +		ret = do_panic_write_oneword(map,&cfi->chips[chipnum],
> +					     ofs, datum);
> +		if (ret)
> +			return ret;
> +
> +		ofs += map_bankwidth(map);
> +		buf += map_bankwidth(map);
> +		(*retlen) += map_bankwidth(map);
> +		len -= map_bankwidth(map);
> +
> +		if (ofs>>  cfi->chipshift) {
> +			chipnum++;
> +			ofs = 0;
> +			if (chipnum == cfi->numchips)
> +				return 0;
> +
> +			chipstart = cfi->chips[chipnum].start;
> +		}
> +	}
> +
> +	/* Write the trailing bytes if any */
> +	if (len&  (map_bankwidth(map) - 1)) {
> +		map_word tmp_buf;
> +
> +		ret = cfi_amdstd_panic_wait(map,&cfi->chips[chipnum], ofs);
> +		if (ret)
> +			return ret;
> +
> +		tmp_buf = map_read(map, ofs + chipstart);
> +
> +		tmp_buf = map_word_load_partial(map, tmp_buf, buf, 0, len);
> +
> +		ret = do_panic_write_oneword(map,&cfi->chips[chipnum],
> +					     ofs, tmp_buf);
> +		if (ret)
> +			return ret;
> +
> +		(*retlen) += len;
> +	}
> +
> +	return 0;
> +}
> +
>
>   /*
>    * Handle devices with one erase region, that only implement
Ira Snyder - Jan. 10, 2012, 4:38 p.m.
On Tue, Jan 10, 2012 at 11:26:23AM +0100, Florian Fainelli wrote:
> Hello,
> 
> On 01/06/12 20:29, Ira W. Snyder wrote:
> > This allows the mtdoops driver to work on flash chips using the
> > AMD/Fujitsu compatible command set.
> >
> > As the code comments note, the locks used throughout the normal code
> > paths in the driver are ignored, so that the chance of writing out the
> > kernel's last messages are maximized.
> 
> This patch made me looking at the panic code, but should not this be 
> made conditionnal to the enabling/disabling of the MTD oops driver?
> 

It is reasonable to make this code conditional based on CONFIG_MTD_OOPS.
The mtdoops driver is the only user of mtd->panic_write().

I think if we decide to make panic_write conditional for this driver, we
should make it conditional for all the others also. They are:

drivers/mtd/nand/nand_base.c
drivers/mtd/onenand/onenand_base.c

Thanks for the comments,
Ira

> >
> > Signed-off-by: Ira W. Snyder<iws@ovro.caltech.edu>
> > Cc: David Woodhouse<dwmw2@infradead.org>
> > Cc: linux-mtd@lists.infradead.org
> > ---
> >
> > This was tested with a Spansion S29GL512P flash chip. It is identified by
> > the kernel with the following output in the kernel log:
> > Found 1 x16 devices at 0x0 in 16-bit bank. Manufacturer ID 0x000001 Chip ID 0x002301
> >
> > Rick, I have CC'd you on this email since I thought you might be
> > interested. While I was attempting to search for others who had written a
> > panic_write() for this chip, I came across your email to the linux-mtd
> > mailing list in April 2011.
> >
> >   drivers/mtd/chips/cfi_cmdset_0002.c |  240 +++++++++++++++++++++++++++++++++++
> >   1 files changed, 240 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
> > index 8d70895..e2d94bb 100644
> > --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> > +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> > @@ -59,6 +59,9 @@ static void cfi_amdstd_resume (struct mtd_info *);
> >   static int cfi_amdstd_reboot(struct notifier_block *, unsigned long, void *);
> >   static int cfi_amdstd_secsi_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
> >
> > +static int cfi_amdstd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
> > +				  size_t *retlen, const u_char *buf);
> > +
> >   static void cfi_amdstd_destroy(struct mtd_info *);
> >
> >   struct mtd_info *cfi_cmdset_0002(struct map_info *, int);
> > @@ -443,6 +446,7 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
> >   	pr_debug("MTD %s(): write buffer size %d\n", __func__,
> >   			mtd->writebufsize);
> >
> > +	mtd->panic_write = cfi_amdstd_panic_write;
> >   	mtd->reboot_notifier.notifier_call = cfi_amdstd_reboot;
> >
> >   	if (cfi->cfi_mode==CFI_MODE_CFI){
> > @@ -1562,6 +1566,242 @@ static int cfi_amdstd_write_buffers(struct mtd_info *mtd, loff_t to, size_t len,
> >   	return 0;
> >   }
> >
> > +/*
> > + * Wait for the flash chip to become ready to write data
> > + *
> > + * This is only called during the panic_write() path. When panic_write()
> > + * is called, the kernel is in the process of a panic, and will soon be
> > + * dead. Therefore we don't take any locks, and attempt to get access
> > + * to the chip as soon as possible.
> > + */
> > +static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip,
> > +				 unsigned long adr)
> > +{
> > +	struct cfi_private *cfi = map->fldrv_priv;
> > +	int retries = 10;
> > +	int i;
> > +
> > +	/*
> > +	 * If the driver thinks the chip is idle, and no toggle bits
> > +	 * are changing, then the chip is actually idle for sure.
> > +	 */
> > +	if (chip->state == FL_READY&&  chip_ready(map, adr))
> > +		return 0;
> > +
> > +	/*
> > +	 * Try several times to reset the chip and then wait for it
> > +	 * to become idle. The upper limit of a few milliseconds of
> > +	 * delay isn't a big problem: the kernel is dying anyway. It
> > +	 * is more important to save the messages.
> > +	 */
> > +	while (retries>  0) {
> > +		const unsigned long timeo = (HZ / 1000) + 1;
> > +
> > +		/* send the reset command */
> > +		map_write(map, CMD(0xF0), chip->start);
> > +
> > +		/* wait for the chip to become ready */
> > +		for (i = 0; i<  jiffies_to_usecs(timeo); i++) {
> > +			if (chip_ready(map, adr))
> > +				return 0;
> > +
> > +			udelay(1);
> > +		}
> > +	}
> > +
> > +	/* the chip never became ready */
> > +	return -EBUSY;
> > +}
> > +
> > +/*
> > + * Write out one word of data to a single flash chip during a kernel panic
> > + *
> > + * This is only called during the panic_write() path. When panic_write()
> > + * is called, the kernel is in the process of a panic, and will soon be
> > + * dead. Therefore we don't take any locks, and attempt to get access
> > + * to the chip as soon as possible.
> > + *
> > + * The implementation of this routine is intentionally similar to
> > + * do_write_oneword(), in order to ease code maintenance.
> > + */
> > +static int do_panic_write_oneword(struct map_info *map, struct flchip *chip,
> > +				  unsigned long adr, map_word datum)
> > +{
> > +	const unsigned long uWriteTimeout = (HZ / 1000) + 1;
> > +	struct cfi_private *cfi = map->fldrv_priv;
> > +	int retry_cnt = 0;
> > +	map_word oldd;
> > +	int ret = 0;
> > +	int i;
> > +
> > +	adr += chip->start;
> > +
> > +	ret = cfi_amdstd_panic_wait(map, chip, adr);
> > +	if (ret)
> > +		return ret;
> > +
> > +	pr_debug("MTD %s(): PANIC WRITE 0x%.8lx(0x%.8lx)\n",
> > +			__func__, adr, datum.x[0]);
> > +
> > +	/*
> > +	 * Check for a NOP for the case when the datum to write is already
> > +	 * present - it saves time and works around buggy chips that corrupt
> > +	 * data at other locations when 0xff is written to a location that
> > +	 * already contains 0xff.
> > +	 */
> > +	oldd = map_read(map, adr);
> > +	if (map_word_equal(map, oldd, datum)) {
> > +		pr_debug("MTD %s(): NOP\n", __func__);
> > +		goto op_done;
> > +	}
> > +
> > +	ENABLE_VPP(map);
> > +
> > +retry:
> > +	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> > +	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
> > +	cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> > +	map_write(map, datum, adr);
> > +
> > +	for (i = 0; i<  jiffies_to_usecs(uWriteTimeout); i++) {
> > +		if (chip_ready(map, adr))
> > +			break;
> > +
> > +		udelay(1);
> > +	}
> > +
> > +	if (!chip_good(map, adr, datum)) {
> > +		/* reset on all failures. */
> > +		map_write(map, CMD(0xF0), chip->start);
> > +		/* FIXME - should have reset delay before continuing */
> > +
> > +		if (++retry_cnt<= MAX_WORD_RETRIES)
> > +			goto retry;
> > +
> > +		ret = -EIO;
> > +	}
> > +
> > +op_done:
> > +	DISABLE_VPP(map);
> > +	return ret;
> > +}
> > +
> > +/*
> > + * Write out some data during a kernel panic
> > + *
> > + * This is used by the mtdoops driver to save the dying messages from a
> > + * kernel which has panic'd.
> > + *
> > + * This routine ignores all of the locking used throughout the rest of the
> > + * driver, in order to ensure that the data gets written out no matter what
> > + * state this driver (and the flash chip itself) was in when the kernel crashed.
> > + *
> > + * The implementation of this routine is intentionally similar to
> > + * cfi_amdstd_write_words(), in order to ease code maintenance.
> > + */
> > +static int cfi_amdstd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
> > +				  size_t *retlen, const u_char *buf)
> > +{
> > +	struct map_info *map = mtd->priv;
> > +	struct cfi_private *cfi = map->fldrv_priv;
> > +	unsigned long ofs, chipstart;
> > +	int ret = 0;
> > +	int chipnum;
> > +
> > +	*retlen = 0;
> > +	if (!len)
> > +		return 0;
> > +
> > +	chipnum = to>>  cfi->chipshift;
> > +	ofs = to - (chipnum<<  cfi->chipshift);
> > +	chipstart = cfi->chips[chipnum].start;
> > +
> > +	/* If it's not bus aligned, do the first byte write */
> > +	if (ofs&  (map_bankwidth(map) - 1)) {
> > +		unsigned long bus_ofs = ofs&  ~(map_bankwidth(map) - 1);
> > +		int i = ofs - bus_ofs;
> > +		int n = 0;
> > +		map_word tmp_buf;
> > +
> > +		ret = cfi_amdstd_panic_wait(map,&cfi->chips[chipnum], bus_ofs);
> > +		if (ret)
> > +			return ret;
> > +
> > +		/* Load 'tmp_buf' with old contents of flash */
> > +		tmp_buf = map_read(map, bus_ofs + chipstart);
> > +
> > +		/* Number of bytes to copy from buffer */
> > +		n = min_t(int, len, map_bankwidth(map) - i);
> > +
> > +		tmp_buf = map_word_load_partial(map, tmp_buf, buf, i, n);
> > +
> > +		ret = do_panic_write_oneword(map,&cfi->chips[chipnum],
> > +					     bus_ofs, tmp_buf);
> > +		if (ret)
> > +			return ret;
> > +
> > +		ofs += n;
> > +		buf += n;
> > +		(*retlen) += n;
> > +		len -= n;
> > +
> > +		if (ofs>>  cfi->chipshift) {
> > +			chipnum++;
> > +			ofs = 0;
> > +			if (chipnum == cfi->numchips)
> > +				return 0;
> > +		}
> > +	}
> > +
> > +	/* We are now aligned, write as much as possible */
> > +	while (len>= map_bankwidth(map)) {
> > +		map_word datum;
> > +
> > +		datum = map_word_load(map, buf);
> > +
> > +		ret = do_panic_write_oneword(map,&cfi->chips[chipnum],
> > +					     ofs, datum);
> > +		if (ret)
> > +			return ret;
> > +
> > +		ofs += map_bankwidth(map);
> > +		buf += map_bankwidth(map);
> > +		(*retlen) += map_bankwidth(map);
> > +		len -= map_bankwidth(map);
> > +
> > +		if (ofs>>  cfi->chipshift) {
> > +			chipnum++;
> > +			ofs = 0;
> > +			if (chipnum == cfi->numchips)
> > +				return 0;
> > +
> > +			chipstart = cfi->chips[chipnum].start;
> > +		}
> > +	}
> > +
> > +	/* Write the trailing bytes if any */
> > +	if (len&  (map_bankwidth(map) - 1)) {
> > +		map_word tmp_buf;
> > +
> > +		ret = cfi_amdstd_panic_wait(map,&cfi->chips[chipnum], ofs);
> > +		if (ret)
> > +			return ret;
> > +
> > +		tmp_buf = map_read(map, ofs + chipstart);
> > +
> > +		tmp_buf = map_word_load_partial(map, tmp_buf, buf, 0, len);
> > +
> > +		ret = do_panic_write_oneword(map,&cfi->chips[chipnum],
> > +					     ofs, tmp_buf);
> > +		if (ret)
> > +			return ret;
> > +
> > +		(*retlen) += len;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >
> >   /*
> >    * Handle devices with one erase region, that only implement
>
Artem Bityutskiy - Jan. 10, 2012, 10:04 p.m.
On Tue, 2012-01-10 at 11:26 +0100, Florian Fainelli wrote:
> Hello,
> 
> On 01/06/12 20:29, Ira W. Snyder wrote:
> > This allows the mtdoops driver to work on flash chips using the
> > AMD/Fujitsu compatible command set.
> >
> > As the code comments note, the locks used throughout the normal code
> > paths in the driver are ignored, so that the chance of writing out the
> > kernel's last messages are maximized.
> 
> This patch made me looking at the panic code, but should not this be 
> made conditionnal to the enabling/disabling of the MTD oops driver?

What do you mean? Conceptually, the driver is a low-level entity and it
provides I/O mechanisms, and it should have no idea who will use them -
be that is mtdoops or anything else.

Artem.
Artem Bityutskiy - Jan. 10, 2012, 10:06 p.m.
On Tue, 2012-01-10 at 08:38 -0800, Ira W. Snyder wrote:
> On Tue, Jan 10, 2012 at 11:26:23AM +0100, Florian Fainelli wrote:
> > Hello,
> > 
> > On 01/06/12 20:29, Ira W. Snyder wrote:
> > > This allows the mtdoops driver to work on flash chips using the
> > > AMD/Fujitsu compatible command set.
> > >
> > > As the code comments note, the locks used throughout the normal code
> > > paths in the driver are ignored, so that the chance of writing out the
> > > kernel's last messages are maximized.
> > 
> > This patch made me looking at the panic code, but should not this be 
> > made conditionnal to the enabling/disabling of the MTD oops driver?
> > 
> 
> It is reasonable to make this code conditional based on CONFIG_MTD_OOPS.
> The mtdoops driver is the only user of mtd->panic_write().

I think we should not break layering - the driver should not know about
entities like mtdoops which are at upper layers. It is like making a
disk driver conditional on file-systems.

Artem.
Artem Bityutskiy - Jan. 10, 2012, 10:13 p.m.
On Fri, 2012-01-06 at 11:29 -0800, Ira W. Snyder wrote:
> This allows the mtdoops driver to work on flash chips using the
> AMD/Fujitsu compatible command set.
> 
> As the code comments note, the locks used throughout the normal code
> paths in the driver are ignored, so that the chance of writing out the
> kernel's last messages are maximized.
> 
> Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>

Pushed to l2-mtd-2.6.git, thanks!
Florian Fainelli - Jan. 10, 2012, 10:15 p.m.
Hello,

Le 10/01/2012 23:04, Artem Bityutskiy a écrit :
> On Tue, 2012-01-10 at 11:26 +0100, Florian Fainelli wrote:
>> Hello,
>>
>> On 01/06/12 20:29, Ira W. Snyder wrote:
>>> This allows the mtdoops driver to work on flash chips using the
>>> AMD/Fujitsu compatible command set.
>>>
>>> As the code comments note, the locks used throughout the normal code
>>> paths in the driver are ignored, so that the chance of writing out the
>>> kernel's last messages are maximized.
>>
>> This patch made me looking at the panic code, but should not this be
>> made conditionnal to the enabling/disabling of the MTD oops driver?
>
> What do you mean? Conceptually, the driver is a low-level entity and it
> provides I/O mechanisms, and it should have no idea who will use them -
> be that is mtdoops or anything else.

I see you point. This adds a substantial amount of code, which is only 
used when the mtdoops driver is actually used. This is why I replied 
with a patch enabling/disabling the panic_write() code.

Now we can introduce a non visible Kconfig option which is selected by 
MTDOOPS and later other drivers.

My concern is about not adding more code which is barely used. I don't 
question the mtdoops driver, it definitively is useful.
--
Florian
Artem Bityutskiy - Jan. 10, 2012, 10:22 p.m.
On Tue, 2012-01-10 at 23:15 +0100, Florian Fainelli wrote:
> 
> My concern is about not adding more code which is barely used. I
> don't 
> question the mtdoops driver, it definitively is useful. 

I'd actually question "substential" - a tiny piece in generic parts
and few functions in drivers does not look substantial.

Artem.
Florian Fainelli - Jan. 11, 2012, 5:04 p.m.
On 01/10/12 23:22, Artem Bityutskiy wrote:
> On Tue, 2012-01-10 at 23:15 +0100, Florian Fainelli wrote:
>>
>> My concern is about not adding more code which is barely used. I
>> don't
>> question the mtdoops driver, it definitively is useful.
>
> I'd actually question "substential" - a tiny piece in generic parts
> and few functions in drivers does not look substantial.

Fair enough, though that is exactly how the kernel gets more and more 
bloated with features added and no way to get them disabled.
--
Florian
Artem Bityutskiy - Jan. 11, 2012, 5:52 p.m.
On Wed, 2012-01-11 at 18:04 +0100, Florian Fainelli wrote:
> 
> On 01/10/12 23:22, Artem Bityutskiy wrote:
> > On Tue, 2012-01-10 at 23:15 +0100, Florian Fainelli wrote:
> >>
> >> My concern is about not adding more code which is barely used. I
> >> don't
> >> question the mtdoops driver, it definitively is useful.
> >
> > I'd actually question "substential" - a tiny piece in generic parts
> > and few functions in drivers does not look substantial.
> 
> Fair enough, though that is exactly how the kernel gets more and more 
> bloated with features added and no way to get them disabled.

I think it is not that drammatic :-) In this particular case breaking
layering does not seem to overweight code size increase, in my opinion.
David Woodhouse - Jan. 11, 2012, 5:56 p.m.
On Wed, 2012-01-11 at 18:04 +0100, Florian Fainelli wrote:
> Fair enough, though that is exactly how the kernel gets more and more 
> bloated with features added and no way to get them disabled. 

On a related note, the unconditional bi-endianness support for CFI that
we just merged in commit 8e987465 gave me similar concerns.

We've traditionally been fairly good at keeping bloat out of the MTD
code — to the extent of compiling in support for specific geometries and
architecting the map code to optimise away all the conditionality in the
case where only one geometry is supported. And likewise the 'complex
mapping' bits.

Whether that level of bloat-paranoia still makes sense in Linux 3.x is
an open question. I suspect not, so I'm not *too* worried about starting
to be a little less anal about it.

Patch

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 8d70895..e2d94bb 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -59,6 +59,9 @@  static void cfi_amdstd_resume (struct mtd_info *);
 static int cfi_amdstd_reboot(struct notifier_block *, unsigned long, void *);
 static int cfi_amdstd_secsi_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
 
+static int cfi_amdstd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
+				  size_t *retlen, const u_char *buf);
+
 static void cfi_amdstd_destroy(struct mtd_info *);
 
 struct mtd_info *cfi_cmdset_0002(struct map_info *, int);
@@ -443,6 +446,7 @@  struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 	pr_debug("MTD %s(): write buffer size %d\n", __func__,
 			mtd->writebufsize);
 
+	mtd->panic_write = cfi_amdstd_panic_write;
 	mtd->reboot_notifier.notifier_call = cfi_amdstd_reboot;
 
 	if (cfi->cfi_mode==CFI_MODE_CFI){
@@ -1562,6 +1566,242 @@  static int cfi_amdstd_write_buffers(struct mtd_info *mtd, loff_t to, size_t len,
 	return 0;
 }
 
+/*
+ * Wait for the flash chip to become ready to write data
+ *
+ * This is only called during the panic_write() path. When panic_write()
+ * is called, the kernel is in the process of a panic, and will soon be
+ * dead. Therefore we don't take any locks, and attempt to get access
+ * to the chip as soon as possible.
+ */
+static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip,
+				 unsigned long adr)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	int retries = 10;
+	int i;
+
+	/*
+	 * If the driver thinks the chip is idle, and no toggle bits
+	 * are changing, then the chip is actually idle for sure.
+	 */
+	if (chip->state == FL_READY && chip_ready(map, adr))
+		return 0;
+
+	/*
+	 * Try several times to reset the chip and then wait for it
+	 * to become idle. The upper limit of a few milliseconds of
+	 * delay isn't a big problem: the kernel is dying anyway. It
+	 * is more important to save the messages.
+	 */
+	while (retries > 0) {
+		const unsigned long timeo = (HZ / 1000) + 1;
+
+		/* send the reset command */
+		map_write(map, CMD(0xF0), chip->start);
+
+		/* wait for the chip to become ready */
+		for (i = 0; i < jiffies_to_usecs(timeo); i++) {
+			if (chip_ready(map, adr))
+				return 0;
+
+			udelay(1);
+		}
+	}
+
+	/* the chip never became ready */
+	return -EBUSY;
+}
+
+/*
+ * Write out one word of data to a single flash chip during a kernel panic
+ *
+ * This is only called during the panic_write() path. When panic_write()
+ * is called, the kernel is in the process of a panic, and will soon be
+ * dead. Therefore we don't take any locks, and attempt to get access
+ * to the chip as soon as possible.
+ *
+ * The implementation of this routine is intentionally similar to
+ * do_write_oneword(), in order to ease code maintenance.
+ */
+static int do_panic_write_oneword(struct map_info *map, struct flchip *chip,
+				  unsigned long adr, map_word datum)
+{
+	const unsigned long uWriteTimeout = (HZ / 1000) + 1;
+	struct cfi_private *cfi = map->fldrv_priv;
+	int retry_cnt = 0;
+	map_word oldd;
+	int ret = 0;
+	int i;
+
+	adr += chip->start;
+
+	ret = cfi_amdstd_panic_wait(map, chip, adr);
+	if (ret)
+		return ret;
+
+	pr_debug("MTD %s(): PANIC WRITE 0x%.8lx(0x%.8lx)\n",
+			__func__, adr, datum.x[0]);
+
+	/*
+	 * Check for a NOP for the case when the datum to write is already
+	 * present - it saves time and works around buggy chips that corrupt
+	 * data at other locations when 0xff is written to a location that
+	 * already contains 0xff.
+	 */
+	oldd = map_read(map, adr);
+	if (map_word_equal(map, oldd, datum)) {
+		pr_debug("MTD %s(): NOP\n", __func__);
+		goto op_done;
+	}
+
+	ENABLE_VPP(map);
+
+retry:
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
+	cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
+	map_write(map, datum, adr);
+
+	for (i = 0; i < jiffies_to_usecs(uWriteTimeout); i++) {
+		if (chip_ready(map, adr))
+			break;
+
+		udelay(1);
+	}
+
+	if (!chip_good(map, adr, datum)) {
+		/* reset on all failures. */
+		map_write(map, CMD(0xF0), chip->start);
+		/* FIXME - should have reset delay before continuing */
+
+		if (++retry_cnt <= MAX_WORD_RETRIES)
+			goto retry;
+
+		ret = -EIO;
+	}
+
+op_done:
+	DISABLE_VPP(map);
+	return ret;
+}
+
+/*
+ * Write out some data during a kernel panic
+ *
+ * This is used by the mtdoops driver to save the dying messages from a
+ * kernel which has panic'd.
+ *
+ * This routine ignores all of the locking used throughout the rest of the
+ * driver, in order to ensure that the data gets written out no matter what
+ * state this driver (and the flash chip itself) was in when the kernel crashed.
+ *
+ * The implementation of this routine is intentionally similar to
+ * cfi_amdstd_write_words(), in order to ease code maintenance.
+ */
+static int cfi_amdstd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
+				  size_t *retlen, const u_char *buf)
+{
+	struct map_info *map = mtd->priv;
+	struct cfi_private *cfi = map->fldrv_priv;
+	unsigned long ofs, chipstart;
+	int ret = 0;
+	int chipnum;
+
+	*retlen = 0;
+	if (!len)
+		return 0;
+
+	chipnum = to >> cfi->chipshift;
+	ofs = to - (chipnum << cfi->chipshift);
+	chipstart = cfi->chips[chipnum].start;
+
+	/* If it's not bus aligned, do the first byte write */
+	if (ofs & (map_bankwidth(map) - 1)) {
+		unsigned long bus_ofs = ofs & ~(map_bankwidth(map) - 1);
+		int i = ofs - bus_ofs;
+		int n = 0;
+		map_word tmp_buf;
+
+		ret = cfi_amdstd_panic_wait(map, &cfi->chips[chipnum], bus_ofs);
+		if (ret)
+			return ret;
+
+		/* Load 'tmp_buf' with old contents of flash */
+		tmp_buf = map_read(map, bus_ofs + chipstart);
+
+		/* Number of bytes to copy from buffer */
+		n = min_t(int, len, map_bankwidth(map) - i);
+
+		tmp_buf = map_word_load_partial(map, tmp_buf, buf, i, n);
+
+		ret = do_panic_write_oneword(map, &cfi->chips[chipnum],
+					     bus_ofs, tmp_buf);
+		if (ret)
+			return ret;
+
+		ofs += n;
+		buf += n;
+		(*retlen) += n;
+		len -= n;
+
+		if (ofs >> cfi->chipshift) {
+			chipnum++;
+			ofs = 0;
+			if (chipnum == cfi->numchips)
+				return 0;
+		}
+	}
+
+	/* We are now aligned, write as much as possible */
+	while (len >= map_bankwidth(map)) {
+		map_word datum;
+
+		datum = map_word_load(map, buf);
+
+		ret = do_panic_write_oneword(map, &cfi->chips[chipnum],
+					     ofs, datum);
+		if (ret)
+			return ret;
+
+		ofs += map_bankwidth(map);
+		buf += map_bankwidth(map);
+		(*retlen) += map_bankwidth(map);
+		len -= map_bankwidth(map);
+
+		if (ofs >> cfi->chipshift) {
+			chipnum++;
+			ofs = 0;
+			if (chipnum == cfi->numchips)
+				return 0;
+
+			chipstart = cfi->chips[chipnum].start;
+		}
+	}
+
+	/* Write the trailing bytes if any */
+	if (len & (map_bankwidth(map) - 1)) {
+		map_word tmp_buf;
+
+		ret = cfi_amdstd_panic_wait(map, &cfi->chips[chipnum], ofs);
+		if (ret)
+			return ret;
+
+		tmp_buf = map_read(map, ofs + chipstart);
+
+		tmp_buf = map_word_load_partial(map, tmp_buf, buf, 0, len);
+
+		ret = do_panic_write_oneword(map, &cfi->chips[chipnum],
+					     ofs, tmp_buf);
+		if (ret)
+			return ret;
+
+		(*retlen) += len;
+	}
+
+	return 0;
+}
+
 
 /*
  * Handle devices with one erase region, that only implement