Patchwork [patch/rfc,2.6.29,1/2] MTD: driver model updates

login
register
mail settings
Submitter David Brownell
Date March 26, 2009, 7:42 a.m.
Message ID <200903260042.42091.david-b@pacbell.net>
Download mbox | patch
Permalink /patch/25129/
State New
Headers show

Comments

David Brownell - March 26, 2009, 7:42 a.m.
From: David Brownell <dbrownell@users.sourceforge.net>

Update driver model support in the MTD framework, so it fits
better into the current udev-based hotplug framework:

 - Each mtd_info now has a device node.  MTD drivers should set
   the dev.parent field to point to the physical device, before
   setting up partitions or otherwise declaring MTDs.

 - Those device nodes always map to /sys/class/mtdX device nodes,
   which no longer depend on MTD_CHARDEV.

 - Those mtdX sysfs nodes have a "starter set" of attributes;
   it's not yet sufficient to replace /proc/mtd.

 - Enabling MTD_CHARDEV provides /sys/class/mtdXro/ nodes and the
   /sys/class/mtd*/dev attributes (for udev, mdev, etc).

 - Include a MODULE_ALIAS_CHARDEV_MAJOR macro.  It'll work with
   udev creating the /dev/mtd* nodes, not just a static rootfs.

So the sysfs structure is pretty much what you'd expect, except
that readonly chardev nodes are a bit quirky.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
---
Sanity tested with NOR (physmap, omap_nor, m25p80), DataFlash,
NAND (davinci, omap), OneNand (omap); with and without mtdblock.
With converted MTD drivers, /sys/devices/virtual/mtd/* are gone.
Didn't test modular configs.

 drivers/mtd/mtd_blkdevs.c |    1 
 drivers/mtd/mtdchar.c     |   47 ++---------------
 drivers/mtd/mtdcore.c     |  115 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mtd/mtdpart.c     |    9 +++
 include/linux/mtd/mtd.h   |    7 ++
 5 files changed, 137 insertions(+), 42 deletions(-)
David Brownell - March 31, 2009, 9:18 p.m.
On Thursday 26 March 2009, David Brownell wrote:
> From: David Brownell <dbrownell@users.sourceforge.net>
> 
> Update driver model support in the MTD framework, so it fits
> better into the current udev-based hotplug framework:

Hmm, no comments?  I had understood there was interest over on
the MTD side of things in exposing more information through
sysfs, to help avoid the need to add Even More Ioctls as part
of support for things like NAND chips with 4KB pages, or which
handle more than 4GBytes ...

> 
>  - Each mtd_info now has a device node.  MTD drivers should set
>    the dev.parent field to point to the physical device, before
>    setting up partitions or otherwise declaring MTDs.
> 
>  - Those device nodes always map to /sys/class/mtdX device nodes,
>    which no longer depend on MTD_CHARDEV.
> 
>  - Those mtdX sysfs nodes have a "starter set" of attributes;
>    it's not yet sufficient to replace /proc/mtd.
> 
>  - Enabling MTD_CHARDEV provides /sys/class/mtdXro/ nodes and the
>    /sys/class/mtd*/dev attributes (for udev, mdev, etc).
> 
>  - Include a MODULE_ALIAS_CHARDEV_MAJOR macro.  It'll work with
>    udev creating the /dev/mtd* nodes, not just a static rootfs.
> 
> So the sysfs structure is pretty much what you'd expect, except
> that readonly chardev nodes are a bit quirky.
> 
> Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
> ---
> Sanity tested with NOR (physmap, omap_nor, m25p80), DataFlash,
> NAND (davinci, omap), OneNand (omap); with and without mtdblock.
> With converted MTD drivers, /sys/devices/virtual/mtd/* are gone.
> Didn't test modular configs.
> 
>  drivers/mtd/mtd_blkdevs.c |    1 
>  drivers/mtd/mtdchar.c     |   47 ++---------------
>  drivers/mtd/mtdcore.c     |  115 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/mtd/mtdpart.c     |    9 +++
>  include/linux/mtd/mtd.h   |    7 ++
>  5 files changed, 137 insertions(+), 42 deletions(-)
> 
> --- a/drivers/mtd/mtd_blkdevs.c
> +++ b/drivers/mtd/mtd_blkdevs.c
> @@ -286,6 +286,7 @@ int add_mtd_blktrans_dev(struct mtd_blkt
>  	gd->private_data = new;
>  	new->blkcore_priv = gd;
>  	gd->queue = tr->blkcore_priv->rq;
> +	gd->driverfs_dev = new->mtd->dev.parent;
>  
>  	if (new->readonly)
>  		set_disk_ro(gd, 1);
> --- a/drivers/mtd/mtdchar.c
> +++ b/drivers/mtd/mtdchar.c
> @@ -19,33 +19,6 @@
>  
>  #include <asm/uaccess.h>
>  
> -static struct class *mtd_class;
> -
> -static void mtd_notify_add(struct mtd_info* mtd)
> -{
> -	if (!mtd)
> -		return;
> -
> -	device_create(mtd_class, NULL, MKDEV(MTD_CHAR_MAJOR, mtd->index*2),
> -		      NULL, "mtd%d", mtd->index);
> -
> -	device_create(mtd_class, NULL, MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1),
> -		      NULL, "mtd%dro", mtd->index);
> -}
> -
> -static void mtd_notify_remove(struct mtd_info* mtd)
> -{
> -	if (!mtd)
> -		return;
> -
> -	device_destroy(mtd_class, MKDEV(MTD_CHAR_MAJOR, mtd->index*2));
> -	device_destroy(mtd_class, MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1));
> -}
> -
> -static struct mtd_notifier notifier = {
> -	.add	= mtd_notify_add,
> -	.remove	= mtd_notify_remove,
> -};
>  
>  /*
>   * Data structure to hold the pointer to the mtd device as well
> @@ -793,34 +766,26 @@ static const struct file_operations mtd_
>  
>  static int __init init_mtdchar(void)
>  {
> -	if (register_chrdev(MTD_CHAR_MAJOR, "mtd", &mtd_fops)) {
> +	int status;
> +
> +	status = register_chrdev(MTD_CHAR_MAJOR, "mtd", &mtd_fops);
> +	if (status < 0) {
>  		printk(KERN_NOTICE "Can't allocate major number %d for Memory Technology Devices.\n",
>  		       MTD_CHAR_MAJOR);
> -		return -EAGAIN;
>  	}
>  
> -	mtd_class = class_create(THIS_MODULE, "mtd");
> -
> -	if (IS_ERR(mtd_class)) {
> -		printk(KERN_ERR "Error creating mtd class.\n");
> -		unregister_chrdev(MTD_CHAR_MAJOR, "mtd");
> -		return PTR_ERR(mtd_class);
> -	}
> -
> -	register_mtd_user(&notifier);
> -	return 0;
> +	return status;
>  }
>  
>  static void __exit cleanup_mtdchar(void)
>  {
> -	unregister_mtd_user(&notifier);
> -	class_destroy(mtd_class);
>  	unregister_chrdev(MTD_CHAR_MAJOR, "mtd");
>  }
>  
>  module_init(init_mtdchar);
>  module_exit(cleanup_mtdchar);
>  
> +MODULE_ALIAS_CHARDEV_MAJOR(MTD_CHAR_MAJOR);
>  
>  MODULE_LICENSE("GPL");
>  MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
> --- a/drivers/mtd/mtdcore.c
> +++ b/drivers/mtd/mtdcore.c
> @@ -22,6 +22,9 @@
>  
>  #include "mtdcore.h"
>  
> +
> +static struct class *mtd_class;
> +
>  /* These are exported solely for the purpose of mtd_blkdevs.c. You
>     should not use them for _anything_ else */
>  DEFINE_MUTEX(mtd_table_mutex);
> @@ -32,6 +35,82 @@ EXPORT_SYMBOL_GPL(mtd_table);
>  
>  static LIST_HEAD(mtd_notifiers);
>  
> +
> +#if defined(CONFIG_MTD_CHAR) || defined(CONFIG_MTD_CHAR_MODULE)
> +#define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2)
> +#else
> +#define MTD_DEVT(index) 0
> +#endif
> +
> +/* REVISIT once MTD uses the driver model better, whoever allocates
> + * the mtd_info will probably want to use the release() hook...
> + */
> +static void mtd_release(struct device *dev)
> +{
> +	struct mtd_info *mtd = dev_to_mtd(dev);
> +
> +	/* remove /dev/mtdXro node if needed */
> +	if (MTD_DEVT(mtd->index))
> +		device_destroy(mtd_class, MTD_DEVT(mtd->index) + 1);
> +}
> +
> +static ssize_t mtd_type_show(struct device *dev,
> +		struct device_attribute *attr, char *buf)
> +{
> +	struct mtd_info *mtd = dev_to_mtd(dev);
> +	char *type;
> +
> +	switch (mtd->type) {
> +	case MTD_ABSENT:
> +		type = "absent";
> +		break;
> +	case MTD_RAM:
> +		type = "ram";
> +		break;
> +	case MTD_ROM:
> +		type = "rom";
> +		break;
> +	case MTD_NORFLASH:
> +		type = "nor";
> +		break;
> +	case MTD_NANDFLASH:
> +		type = "nand";
> +		break;
> +	case MTD_DATAFLASH:
> +		type = "dataflash";
> +		break;
> +	case MTD_UBIVOLUME:
> +		type = "ubi";
> +		break;
> +	default:
> +		type = "unknown";
> +	}
> +
> +	return snprintf(buf, PAGE_SIZE, "%s\n", type);
> +}
> +static DEVICE_ATTR(mtd_type, S_IRUGO, mtd_type_show, NULL);
> +
> +static struct attribute *mtd_attrs[] = {
> +	&dev_attr_mtd_type.attr,
> +	/* FIXME provide a /proc/mtd superset */
> +	NULL,
> +};
> +
> +struct attribute_group mtd_group = {
> +	.attrs		= mtd_attrs,
> +};
> +
> +struct attribute_group *mtd_groups[] = {
> +	&mtd_group,
> +	NULL,
> +};
> +
> +static struct device_type mtd_devtype = {
> +	.name		= "mtd",
> +	.groups		= mtd_groups,
> +	.release	= mtd_release,
> +};
> +
>  /**
>   *	add_mtd_device - register an MTD device
>   *	@mtd: pointer to new MTD device info structure
> @@ -40,6 +119,7 @@ static LIST_HEAD(mtd_notifiers);
>   *	notify each currently active MTD 'user' of its arrival. Returns
>   *	zero on success or 1 on failure, which currently will only happen
>   *	if the number of present devices exceeds MAX_MTD_DEVICES (i.e. 16)
> + *	or there's a sysfs error.
>   */
>  
>  int add_mtd_device(struct mtd_info *mtd)
> @@ -80,6 +160,23 @@ int add_mtd_device(struct mtd_info *mtd)
>  					       mtd->name);
>  			}
>  
> +			/* Caller should have set dev.parent to match the
> +			 * physical device.
> +			 */
> +			mtd->dev.type = &mtd_devtype;
> +			mtd->dev.class = mtd_class;
> +			mtd->dev.devt = MTD_DEVT(i);
> +			dev_set_name(&mtd->dev, "mtd%d", i);
> +			if (device_register(&mtd->dev) != 0) {
> +				mtd_table[i] = NULL;
> +				break;
> +			}
> +
> +			if (MTD_DEVT(i))
> +				device_create(mtd_class, mtd->dev.parent,
> +						MTD_DEVT(i) + 1,
> +						NULL, "mtd%dro", i);
> +
>  			DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name);
>  			/* No need to get a refcount on the module containing
>  			   the notifier, since we hold the mtd_table_mutex */
> @@ -343,6 +440,24 @@ EXPORT_SYMBOL_GPL(register_mtd_user);
>  EXPORT_SYMBOL_GPL(unregister_mtd_user);
>  EXPORT_SYMBOL_GPL(default_mtd_writev);
>  
> +static int __init mtd_setup(void)
> +{
> +	mtd_class = class_create(THIS_MODULE, "mtd");
> +
> +	if (IS_ERR(mtd_class)) {
> +		pr_err("Error creating mtd class.\n");
> +		return PTR_ERR(mtd_class);
> +	}
> +	return 0;
> +}
> +core_initcall(mtd_setup);
> +
> +static void __exit mtd_teardown(void)
> +{
> +	class_destroy(mtd_class);
> +}
> +__exitcall(mtd_teardown);
> +
>  #ifdef CONFIG_PROC_FS
>  
>  /*====================================================================*/
> --- a/drivers/mtd/mtdpart.c
> +++ b/drivers/mtd/mtdpart.c
> @@ -343,6 +343,11 @@ static struct mtd_part *add_one_partitio
>  	slave->mtd.name = part->name;
>  	slave->mtd.owner = master->owner;
>  
> +	/* NOTE:  we don't arrange MTDs as a tree; it'd be error-prone
> +	 * to have the same data be in two different partitions.
> +	 */
> +	slave->mtd.dev.parent = master->dev.parent;
> +
>  	slave->mtd.read = part_read;
>  	slave->mtd.write = part_write;
>  
> @@ -493,7 +498,9 @@ out_register:
>   * This function, given a master MTD object and a partition table, creates
>   * and registers slave MTD objects which are bound to the master according to
>   * the partition definitions.
> - * (Q: should we register the master MTD object as well?)
> + *
> + * We don't register the master, or expect the caller to have done so,
> + * for reasons of data integrity.
>   */
>  
>  int add_mtd_partitions(struct mtd_info *master,
> --- a/include/linux/mtd/mtd.h
> +++ b/include/linux/mtd/mtd.h
> @@ -11,6 +11,7 @@
>  #include <linux/module.h>
>  #include <linux/uio.h>
>  #include <linux/notifier.h>
> +#include <linux/device.h>
>  
>  #include <linux/mtd/compatmac.h>
>  #include <mtd/mtd-abi.h>
> @@ -223,6 +224,7 @@ struct mtd_info {
>  	void *priv;
>  
>  	struct module *owner;
> +	struct device dev;
>  	int usecount;
>  
>  	/* If the driver is something smart, like UBI, it may need to maintain
> @@ -233,6 +235,11 @@ struct mtd_info {
>  	void (*put_device) (struct mtd_info *mtd);
>  };
>  
> +static inline struct mtd_info *dev_to_mtd(struct device *dev)
> +{
> +	return dev ? container_of(dev, struct mtd_info, dev) : NULL;
> +}
> +
>  static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
>  {
>  	if (mtd->erasesize_shift)
>
Kay Sievers - March 31, 2009, 11:51 p.m.
On Tue, Mar 31, 2009 at 23:18, David Brownell <david-b@pacbell.net> wrote:
> On Thursday 26 March 2009, David Brownell wrote:
>> From: David Brownell <dbrownell@users.sourceforge.net>
>>
>> Update driver model support in the MTD framework, so it fits
>> better into the current udev-based hotplug framework:
>
> Hmm, no comments?  I had understood there was interest over on
> the MTD side of things in exposing more information through
> sysfs, to help avoid the need to add Even More Ioctls as part
> of support for things like NAND chips with 4KB pages, or which
> handle more than 4GBytes ...

Please have a look at this. We got asked repeatedly to provide better
hotplug/udev integration, and the patches, and having the parent
device properly assigned, would solve some of the problems people run
into currently.

Thanks,
Kay
Kevin Cernekee - April 1, 2009, 1:17 a.m.
On Tue, Mar 31, 2009 at 2:18 PM, David Brownell <david-b@pacbell.net> wrote:
> Hmm, no comments?  I had understood there was interest over on
> the MTD side of things in exposing more information through
> sysfs, to help avoid the need to add Even More Ioctls as part
> of support for things like NAND chips with 4KB pages, or which
> handle more than 4GBytes ...

Based on this post:
http://lists.infradead.org/pipermail/linux-mtd/2009-March/025060.html

I am inferring that the following attributes are wanted in the MTD
sysfs interface:

1) mtd_info_user fields - type, flags, size, erasesize, writesize,
oobsize.  So far we have "type," but it should be easy to add the
rest.

2) region_info_user fields?  Not really sure how this would work.
Maybe a separate subdirectory for each region?
David Brownell - April 1, 2009, 3:21 a.m.
On Tuesday 31 March 2009, Kevin Cernekee wrote:
> On Tue, Mar 31, 2009 at 2:18 PM, David Brownell <david-b@pacbell.net> wrote:
> > Hmm, no comments?  I had understood there was interest over on
> > the MTD side of things in exposing more information through
> > sysfs, to help avoid the need to add Even More Ioctls as part
> > of support for things like NAND chips with 4KB pages, or which
> > handle more than 4GBytes ...
> 
> Based on this post:
> http://lists.infradead.org/pipermail/linux-mtd/2009-March/025060.html
> 
> I am inferring that the following attributes are wanted in the MTD
> sysfs interface:
> 
> 1) mtd_info_user fields - type, flags, size, erasesize, writesize,
> oobsize.  So far we have "type," but it should be easy to add the
> rest.

Yes, easy to add those.  As noted in the $SUBJECT patch, the set
consisting of just "type" is a "starter set".  :)

For procfs elimination, a name/label would be needed too; that can
be an input to, or output from, cmdlinepart.

My question about the $SUBJECT patch is whether there's any reason
not to use that as the start of such driver model enhancements.
I'm thinking the answer to that seems to be "no", and that someone
more involved in *using* those sysfs attributes would be a better
choice for growing that set of attributes.


> 2) region_info_user fields?  Not really sure how this would work.
> Maybe a separate subdirectory for each region?

I'm not sure I've ever had reason to care about a "region" (whatever
that is!) with MTD hardware.

I suspect that a lot of interesting questions could come up in
the context of enhancing mtd-utils to work with sysfs and bigger
NAND chips.  Some might relate to "regions".

- Dave
Kevin Cernekee - April 1, 2009, 4:49 a.m.
On 3/31/09, David Brownell <david-b@pacbell.net> wrote:
>> 2) region_info_user fields?  Not really sure how this would work.
>> Maybe a separate subdirectory for each region?
>
> I'm not sure I've ever had reason to care about a "region" (whatever
> that is!) with MTD hardware.

Erase Block Regions.  From the CFI spec:

"x specifies the number of regions within the device containing one or
more contiguous Erase Blocks of the same size.  For example, a 128KB
device (1Mb) having blocking of 16KB, 8KB, four 2KB, two 16KB, and one
64KB is considered to have 5 Erase Block Regions."

This is fairly common on parallel NOR devices.  Probably less so on
huge NAND devices.

> I suspect that a lot of interesting questions could come up in
> the context of enhancing mtd-utils to work with sysfs and bigger
> NAND chips.  Some might relate to "regions".

Right, this sysfs requirement raises a number of issues that need to
be fully thought out in order to make sure the new interface is a
suitable replacement for the "INFO" ioctls.  For instance:

1) If each region is a subdirectory, are user programs supposed to use
readdir() to count them?  Is ioctl(MEMGETREGIONCOUNT) still the
preferred method?  Or do we make a new "regioncount" sysfs attribute?

(A somewhat related question is whether MEMGETREGIONCOUNT only exists
because it was impossible to expand the MEMGETINFO struct.  After all,
it's just copying another field from the mtd_info struct.)

2) How are user programs expected to access MTD sysfs?  Do we
introduce a new libsysfs dependency, or roll our own implementation?
Are there any past examples of ioctls being phased out in favor of
sysfs, particularly in subsystems that are popular on embedded
platforms?

3) What should the mtd-utils changes look like?  Do we define
backward-compatibility wrapper functions that try to work the same way
the ioctls used to?  New libraries and layers of abstraction?  Or
something in the middle?
David Brownell - April 1, 2009, 6:36 a.m.
On Tuesday 31 March 2009, Kevin Cernekee wrote:
> On 3/31/09, David Brownell <david-b@pacbell.net> wrote:
> >> 2) region_info_user fields?  Not really sure how this would work.
> >> Maybe a separate subdirectory for each region?
> >
> > I'm not sure I've ever had reason to care about a "region" (whatever
> > that is!) with MTD hardware.
> 
> Erase Block Regions.  From the CFI spec:
> 
> ...
> 
> This is fairly common on parallel NOR devices.  Probably less so on
> huge NAND devices.

Oh, that.  Right.  Few new boards I've seen in the past few
years have NOR; it's mostly NAND nowadays.  That gets mixed
up with bootblocks too ... as in, store u-boot parameters in
a 4K erase block (surrounded by u-boot code) instead of some
dedicated 128K block that's almost entirely unused.


> > I suspect that a lot of interesting questions could come up in
> > the context of enhancing mtd-utils to work with sysfs and bigger
> > NAND chips.  Some might relate to "regions".
> 
> Right, this sysfs requirement raises a number of issues that need to
> be fully thought out in order to make sure the new interface is a
> suitable replacement for the "INFO" ioctls.

Hmm, it's the same as the *current* sysfs model for chardevs, except
that (a) it's there even if chardevs aren't, (b) it supports proper
parent devices, and (c) it adds attributes.  So in that sense maybe
that's not the best question to ask.

Maybe you should ask a slightly different question:  what's the right
interface to build using sysfs?  Certainly let the answers be illuminated
by what current tools can do.

I suspect answering that revised question will lead to a desire for
more driver model update, exposing concerpts beyond just raw MTDs.


> For instance: 
> 
> 1) If each region is a subdirectory, are user programs supposed to use
> readdir() to count them?  Is ioctl(MEMGETREGIONCOUNT) still the
> preferred method?  Or do we make a new "regioncount" sysfs attribute?

Model-wise, it might make sense to export chips (potentially
concatentated) with their regions, as distinct from partitions.

That notion doesn't show up all that cleanly in the framework,
thoug it might be good to add it.


> (A somewhat related question is whether MEMGETREGIONCOUNT only exists
> because it was impossible to expand the MEMGETINFO struct.  After all,
> it's just copying another field from the mtd_info struct.)

There are folk who are rabidly in favor of the "one value
per attribute" model, but I've never seen that as compelling.
A "regions" attribute, with versioned header (field labels?)
and one line per region, would be a natural model for any
interface that didn't get waylaid by religious fervor.

But ... not my call.


> 2) How are user programs expected to access MTD sysfs?  Do we
> introduce a new libsysfs dependency, or roll our own implementation?
> Are there any past examples of ioctls being phased out in favor of
> sysfs, particularly in subsystems that are popular on embedded
> platforms?

I thought the idea was not to use libsysfs...
 

> 3) What should the mtd-utils changes look like?  Do we define
> backward-compatibility wrapper functions that try to work the same way
> the ioctls used to?  New libraries and layers of abstraction?  Or
> something in the middle?

Up to mtd-utils maintainers.  I'd expect some period of backward
compatibility would be required.  The "carrot" might be that new
support for 4+ GB chips/partitions might depend on sysfs, while
smaller chips can be supported without it (using existing tools).
Ricard Wanderlof - April 1, 2009, 7:29 a.m.
On Tue, 31 Mar 2009, David Brownell wrote:

> Hmm, no comments?  I had understood there was interest over on
> the MTD side of things in exposing more information through
> sysfs, to help avoid the need to add Even More Ioctls as part
> of support for things like NAND chips with 4KB pages, or which
> handle more than 4GBytes ...

I sense some ambiguity when it comes to sysfs. dwmw2 and others seem to 
feel this is the route to go, yet no one really seems interested and the 
only patches that people produce are for new ioctls. Admittedly, moving to 
sysfs requires some form of high level specification before implementation 
can be done, but still...

Could it be that the relevant interfaces would only be used, basically, 
for mtdtools, which are quite simple in nature and an ioctl interface 
works well. There isn't that much performance tuning to be done and not 
very much information which humans are interested in. Most people want to 
mount their device and go.

Indeed, from a user application perspective, sysfs seems a bit clumsy to 
me, you have to open a file and read and write text strings (although 
binary files are possible but, I suspect, frowned upon), rather than just 
fire off an ioctl after filling in a struct.

While there are up- and downwards compatibility issues, careful design of 
the ioctls can minimize the impact.

I'm not suggesting we go one way or the other, just an observation that 
few mtd _users_ seem to be eager to go with sysfs. I invite anyone to 
prove me wrong...

/Ricard
--
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30
Artem Bityutskiy - April 1, 2009, 7:51 a.m.
On Wed, 2009-04-01 at 09:29 +0200, Ricard Wanderlof wrote:
> On Tue, 31 Mar 2009, David Brownell wrote:
> 
> > Hmm, no comments?  I had understood there was interest over on
> > the MTD side of things in exposing more information through
> > sysfs, to help avoid the need to add Even More Ioctls as part
> > of support for things like NAND chips with 4KB pages, or which
> > handle more than 4GBytes ...
> 
> I sense some ambiguity when it comes to sysfs. dwmw2 and others seem to 
> feel this is the route to go, yet no one really seems interested and the 
> only patches that people produce are for new ioctls. Admittedly, moving to 
> sysfs requires some form of high level specification before implementation 
> can be done, but still...
> 
> Could it be that the relevant interfaces would only be used, basically, 
> for mtdtools, which are quite simple in nature and an ioctl interface 
> works well. There isn't that much performance tuning to be done and not 
> very much information which humans are interested in. Most people want to 
> mount their device and go.
> 
> Indeed, from a user application perspective, sysfs seems a bit clumsy to 
> me, you have to open a file and read and write text strings (although 
> binary files are possible but, I suspect, frowned upon), rather than just 
> fire off an ioctl after filling in a struct.

On the other hand, you can easily see everything from your shell.
And in programs, it is not a big deal to write few functions which
fetch info from the sysfs files. We do this for UBI in libubi.c

Anyway, I have practical problems.

1. UBI utilities need to know sub-page size in case of NAND. And I
   have zero possibility to extend the existing ioctls. If they were
   sysfs interfaces, I would just add one more sysfs file.

2. We want to provide various statistics like count of reads, writes,
   erases. Should we create yet another ioctl for this? And for
   statistics, sysfs is just better because I can just do
   "cat /sys/class/mtd0/stats/reads_count". And I can easilly do
   this in shell scripts.

My _practical_ problems may be nicely solved with sysfs. And they are
not nicely solved with ioctls. And I am a real, existing MTD user.
David Brownell - April 1, 2009, 8:05 a.m.
On Wednesday 01 April 2009, Ricard Wanderlof wrote:
> 
> On Tue, 31 Mar 2009, David Brownell wrote:
> 
> > Hmm, no comments?  I had understood there was interest over on
> > the MTD side of things in exposing more information through
> > sysfs, to help avoid the need to add Even More Ioctls as part
> > of support for things like NAND chips with 4KB pages, or which
> > handle more than 4GBytes ...
> 
> I sense some ambiguity when it comes to sysfs. dwmw2 and others seem to 
> feel this is the route to go, yet no one really seems interested and the 
> only patches that people produce are for new ioctls. Admittedly, moving to 
> sysfs requires some form of high level specification before implementation 
> can be done, but still...

Yeah, design inertia.  Folk understand ioctls (more or less),
and not so much with sysfs.  Having to re-think anything is
a kind of obstacle.  (Part of why I made it especially easy
to add more attributes!)

Plus, if you dive into it ... you'll start noticing glitches
in the MTD framework models.  Object lifetime and all that;
arguably there should be new MTD calls and an altered object
lifecycle.  Such issues come up a lot with legacy interfaces,
and MTD counts as one.


> Could it be that the relevant interfaces would only be used, basically, 
> for mtdtools, which are quite simple in nature and an ioctl interface 
> works well. There isn't that much performance tuning to be done and not 
> very much information which humans are interested in. Most people want to 
> mount their device and go.

True.  Unless I need JFFS2 I don't enable mtd block devices;
and I don't use mtd-utils most of the time either...


> Indeed, from a user application perspective, sysfs seems a bit clumsy to 
> me, you have to open a file and read and write text strings (although 
> binary files are possible but, I suspect, frowned upon), rather than just 
> fire off an ioctl after filling in a struct.
> 
> While there are up- and downwards compatibility issues, careful design of 
> the ioctls can minimize the impact.

As they say, "your mileage may vary" ... but ability to just
browse sysfs with "cat" and such is a big win.  No need to
operate any ioctls.

Erasing an MTD partition may require an ioctl forever.  :)


> I'm not suggesting we go one way or the other, just an observation that 
> few mtd _users_ seem to be eager to go with sysfs. I invite anyone to 
> prove me wrong...

Users just want to mount-and-go.
Sysadmins may use mtd-utils for repair/install.
Most system developers are just users with bloodier hardware.

MTD developers themselves probably have strong opinions and
needs, but that's not a large community... adaptable, though!

- Dave

 
> /Ricard
> --
> Ricard Wolf Wanderlöf                           ricardw(at)axis.com
> Axis Communications AB, Lund, Sweden            www.axis.com
> Phone +46 46 272 2016                           Fax +46 46 13 61 30
> 
>
Ricard Wanderlof - April 1, 2009, 8:25 a.m.
On Wed, 1 Apr 2009, David Brownell wrote:

> Erasing an MTD partition may require an ioctl forever.  :)

Well, I can imagine that one actually being accomplished with a simple 
write to a sysfs file ... but we might not want to make it that simple.

/Ricard
--
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30
Artem Bityutskiy - April 1, 2009, 8:28 a.m.
On Wed, 2009-04-01 at 10:25 +0200, Ricard Wanderlof wrote:
> On Wed, 1 Apr 2009, David Brownell wrote:
> 
> > Erasing an MTD partition may require an ioctl forever.  :)
> 
> Well, I can imagine that one actually being accomplished with a simple 
> write to a sysfs file ... but we might not want to make it that simple.

Of course we are talking only about _informational_ interfaces, when
MTD provides various information - size, name, type, etc.
Of course erasing, marking blocks bad, etc should be implemented
as ioctl.
Juergen Beisert - April 1, 2009, 1:43 p.m.
Hi David, Kay,

On Mittwoch, 1. April 2009, Kay Sievers wrote:
> On Tue, Mar 31, 2009 at 23:18, David Brownell <david-b@pacbell.net> wrote:
> > On Thursday 26 March 2009, David Brownell wrote:
> >> From: David Brownell <dbrownell@users.sourceforge.net>
> >>
> >> Update driver model support in the MTD framework, so it fits
> >> better into the current udev-based hotplug framework:
> >
> > Hmm, no comments?  I had understood there was interest over on
> > the MTD side of things in exposing more information through
> > sysfs, to help avoid the need to add Even More Ioctls as part
> > of support for things like NAND chips with 4KB pages, or which
> > handle more than 4GBytes ...
>
> Please have a look at this. We got asked repeatedly to provide better
> hotplug/udev integration, and the patches, and having the parent
> device properly assigned, would solve some of the problems people run
> into currently.

Without patch:
--------------

$ udevadm info -a -p /sys/block/mtdblock0
[...]
  looking at device '/devices/virtual/block/mtdblock0':
    KERNEL=="mtdblock0"
    SUBSYSTEM=="block"
    DRIVER==""
    ATTR{range}=="1"
    ATTR{removable}=="0"
    ATTR{size}=="256"
    ATTR{capability}=="10"
    ATTR{stat}=="       0        0        0        0        0        0        0        0        0        0        0"

And nearly the same data for the other flash device. No chance to detect if
this one is the NOR or the NAND type...

With the patch:
---------------

$ udevadm info -a -p /sys/block/mtdblock0
[...]
  looking at parent device '/devices/platform/physmap-flash.0':
    KERNELS=="physmap-flash.0"
    SUBSYSTEMS=="platform"
    DRIVERS=="physmap-flash"
    ATTRS{modalias}=="platform:physmap-flash"

The second flash device is of NAND type and 'udevadm' shows:

$ udevadm info -a -p /sys/block/mtdblock4
[...]
  looking at parent device '/devices/platform/mxc_nand.0':
    KERNELS=="mxc_nand.0"
    SUBSYSTEMS=="platform"
    DRIVERS=="mxc_nand"
    ATTRS{modalias}=="platform:mxc_nand"

\o/

I will try now to define some udev rules to match for my different flash
memories.

Thank you,
Juergen
David Woodhouse - April 3, 2009, 10:04 a.m.
On Thu, 2009-03-26 at 00:42 -0700, David Brownell wrote:
> 
> @@ -343,6 +343,11 @@ static struct mtd_part *add_one_partitio
>         slave->mtd.name = part->name;
>         slave->mtd.owner = master->owner;
>  
> +       /* NOTE:  we don't arrange MTDs as a tree; it'd be error-prone
> +        * to have the same data be in two different partitions.
> +        */
> +       slave->mtd.dev.parent = master->dev.parent;

Can you elaborate on that? I think we _do_ want to arrange partitions as
sub-devices of the master, don't we? And I'd rather not change the way
they appear at a later date; I'd prefer them to be that way from the
beginning.

>         slave->mtd.read = part_read;
>         slave->mtd.write = part_write;
>  
> @@ -493,7 +498,9 @@ out_register:
>   * This function, given a master MTD object and a partition table, creates
>   * and registers slave MTD objects which are bound to the master according to
>   * the partition definitions.
> - * (Q: should we register the master MTD object as well?)
> + *
> + * We don't register the master, or expect the caller to have done so,
> + * for reasons of data integrity.
>   */

Again, can you elaborate?

A lot of devices do just that. Where you have a partition table of some
kind that's actually stored on the flash, that might be the only way to
access it.

I really don't like the way our partitioning works at the moment.
David Brownell - April 3, 2009, 5:24 p.m.
On Friday 03 April 2009, David Woodhouse wrote:
> On Thu, 2009-03-26 at 00:42 -0700, David Brownell wrote:
> > 
> > @@ -343,6 +343,11 @@ static struct mtd_part *add_one_partitio
> >         slave->mtd.name = part->name;
> >         slave->mtd.owner = master->owner;
> >  
> > +       /* NOTE:  we don't arrange MTDs as a tree; it'd be error-prone
> > +        * to have the same data be in two different partitions.
> > +        */
> > +       slave->mtd.dev.parent = master->dev.parent;
> 
> Can you elaborate on that? I think we _do_ want to arrange partitions as
> sub-devices of the master, don't we? 

They're part of a tree, and are subdevices of the physical flash
node, so those partitions get nodes like:

	.../physical_flashdev
		/block/mtdblock*
		/mtd/mtd*
		/mtd/mtd*ro

If that were "slave->mtd.dev.parent = &master->dev" instead
(you could try it!), not only would most MTD drivers need to
change to register that master->dev ("mtd0" here), but the
tree would look something like (from memory):

	.../physical_flashdev
		/block/mtdblock*
		/mtd/mtd0
			/mtd/mtd*
			/mtd/mtd*ro
		/mtd/mtd0ro

which is at the very least ugly, confusing, counter-intuitive,
and internally inconsistent (why do all the block nodes show
up where you'd expect, but not all the MTD/chardev ones).


> And I'd rather not change the way 
> they appear at a later date; I'd prefer them to be that way from the
> beginning.

Agreed.  The focus of this patch was getting a model that
would evolve later ... attributes and tool support being
the most apparent issues.

 
> >         slave->mtd.read = part_read;
> >         slave->mtd.write = part_write;
> >  
> > @@ -493,7 +498,9 @@ out_register:
> >   * This function, given a master MTD object and a partition table, creates
> >   * and registers slave MTD objects which are bound to the master according to
> >   * the partition definitions.
> > - * (Q: should we register the master MTD object as well?)
> > + *
> > + * We don't register the master, or expect the caller to have done so,
> > + * for reasons of data integrity.
> >   */
> 
> Again, can you elaborate?

Same point as above ... presuming an A to that long-standing Q.


> A lot of devices do just that. Where you have a partition table of some
> kind that's actually stored on the flash, that might be the only way to
> access it.

I happen never to have come across such a flash layout;
that's presumably what "RedBoot" does (eCOS)?

As I'm sure you're aware, MTDs don't need to be registered
to be used.  There's no innate need to "do just that" just
to support RedBoot-style internal partition tables.

As a rule I've seen partitioning be provided by Linux, or
cmdlinepart.  Systems using u-boot or proprietary loaders
just "know" where they, their data, and the kernel are to
be found; Linux tables either declare them as read-only
partitions or, less often, only list writable parts of
the flash chip.

One way to model RedBoot tables that might be to have a
partition table node "device_type" that's distinct from
the MTD node type, even if it's still coupled to the
same generic "mtd_info".  I'm not sure what to make of
that technique, but it might be useful elsewhere in MTD.

Another way to model RedBoot tables is ... just hide them.
Don't expose an MTD with that data (or "just that data").
Can Linux contine to operate if some tool modifies that
partition data, or must it reboot?


> I really don't like the way our partitioning works at the moment.

Arguably fixing that would be a prerequisite to making
stable driver model support ... which is part of the
reason this was a "patch/RFC".  :)

I will confess to trying to avoid opening the can of
worms too far, and suspecting that parts of the MTD
stack I've never needed would expose more issues.

- Dave

Patch

--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -286,6 +286,7 @@  int add_mtd_blktrans_dev(struct mtd_blkt
 	gd->private_data = new;
 	new->blkcore_priv = gd;
 	gd->queue = tr->blkcore_priv->rq;
+	gd->driverfs_dev = new->mtd->dev.parent;
 
 	if (new->readonly)
 		set_disk_ro(gd, 1);
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -19,33 +19,6 @@ 
 
 #include <asm/uaccess.h>
 
-static struct class *mtd_class;
-
-static void mtd_notify_add(struct mtd_info* mtd)
-{
-	if (!mtd)
-		return;
-
-	device_create(mtd_class, NULL, MKDEV(MTD_CHAR_MAJOR, mtd->index*2),
-		      NULL, "mtd%d", mtd->index);
-
-	device_create(mtd_class, NULL, MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1),
-		      NULL, "mtd%dro", mtd->index);
-}
-
-static void mtd_notify_remove(struct mtd_info* mtd)
-{
-	if (!mtd)
-		return;
-
-	device_destroy(mtd_class, MKDEV(MTD_CHAR_MAJOR, mtd->index*2));
-	device_destroy(mtd_class, MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1));
-}
-
-static struct mtd_notifier notifier = {
-	.add	= mtd_notify_add,
-	.remove	= mtd_notify_remove,
-};
 
 /*
  * Data structure to hold the pointer to the mtd device as well
@@ -793,34 +766,26 @@  static const struct file_operations mtd_
 
 static int __init init_mtdchar(void)
 {
-	if (register_chrdev(MTD_CHAR_MAJOR, "mtd", &mtd_fops)) {
+	int status;
+
+	status = register_chrdev(MTD_CHAR_MAJOR, "mtd", &mtd_fops);
+	if (status < 0) {
 		printk(KERN_NOTICE "Can't allocate major number %d for Memory Technology Devices.\n",
 		       MTD_CHAR_MAJOR);
-		return -EAGAIN;
 	}
 
-	mtd_class = class_create(THIS_MODULE, "mtd");
-
-	if (IS_ERR(mtd_class)) {
-		printk(KERN_ERR "Error creating mtd class.\n");
-		unregister_chrdev(MTD_CHAR_MAJOR, "mtd");
-		return PTR_ERR(mtd_class);
-	}
-
-	register_mtd_user(&notifier);
-	return 0;
+	return status;
 }
 
 static void __exit cleanup_mtdchar(void)
 {
-	unregister_mtd_user(&notifier);
-	class_destroy(mtd_class);
 	unregister_chrdev(MTD_CHAR_MAJOR, "mtd");
 }
 
 module_init(init_mtdchar);
 module_exit(cleanup_mtdchar);
 
+MODULE_ALIAS_CHARDEV_MAJOR(MTD_CHAR_MAJOR);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -22,6 +22,9 @@ 
 
 #include "mtdcore.h"
 
+
+static struct class *mtd_class;
+
 /* These are exported solely for the purpose of mtd_blkdevs.c. You
    should not use them for _anything_ else */
 DEFINE_MUTEX(mtd_table_mutex);
@@ -32,6 +35,82 @@  EXPORT_SYMBOL_GPL(mtd_table);
 
 static LIST_HEAD(mtd_notifiers);
 
+
+#if defined(CONFIG_MTD_CHAR) || defined(CONFIG_MTD_CHAR_MODULE)
+#define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2)
+#else
+#define MTD_DEVT(index) 0
+#endif
+
+/* REVISIT once MTD uses the driver model better, whoever allocates
+ * the mtd_info will probably want to use the release() hook...
+ */
+static void mtd_release(struct device *dev)
+{
+	struct mtd_info *mtd = dev_to_mtd(dev);
+
+	/* remove /dev/mtdXro node if needed */
+	if (MTD_DEVT(mtd->index))
+		device_destroy(mtd_class, MTD_DEVT(mtd->index) + 1);
+}
+
+static ssize_t mtd_type_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mtd_info *mtd = dev_to_mtd(dev);
+	char *type;
+
+	switch (mtd->type) {
+	case MTD_ABSENT:
+		type = "absent";
+		break;
+	case MTD_RAM:
+		type = "ram";
+		break;
+	case MTD_ROM:
+		type = "rom";
+		break;
+	case MTD_NORFLASH:
+		type = "nor";
+		break;
+	case MTD_NANDFLASH:
+		type = "nand";
+		break;
+	case MTD_DATAFLASH:
+		type = "dataflash";
+		break;
+	case MTD_UBIVOLUME:
+		type = "ubi";
+		break;
+	default:
+		type = "unknown";
+	}
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", type);
+}
+static DEVICE_ATTR(mtd_type, S_IRUGO, mtd_type_show, NULL);
+
+static struct attribute *mtd_attrs[] = {
+	&dev_attr_mtd_type.attr,
+	/* FIXME provide a /proc/mtd superset */
+	NULL,
+};
+
+struct attribute_group mtd_group = {
+	.attrs		= mtd_attrs,
+};
+
+struct attribute_group *mtd_groups[] = {
+	&mtd_group,
+	NULL,
+};
+
+static struct device_type mtd_devtype = {
+	.name		= "mtd",
+	.groups		= mtd_groups,
+	.release	= mtd_release,
+};
+
 /**
  *	add_mtd_device - register an MTD device
  *	@mtd: pointer to new MTD device info structure
@@ -40,6 +119,7 @@  static LIST_HEAD(mtd_notifiers);
  *	notify each currently active MTD 'user' of its arrival. Returns
  *	zero on success or 1 on failure, which currently will only happen
  *	if the number of present devices exceeds MAX_MTD_DEVICES (i.e. 16)
+ *	or there's a sysfs error.
  */
 
 int add_mtd_device(struct mtd_info *mtd)
@@ -80,6 +160,23 @@  int add_mtd_device(struct mtd_info *mtd)
 					       mtd->name);
 			}
 
+			/* Caller should have set dev.parent to match the
+			 * physical device.
+			 */
+			mtd->dev.type = &mtd_devtype;
+			mtd->dev.class = mtd_class;
+			mtd->dev.devt = MTD_DEVT(i);
+			dev_set_name(&mtd->dev, "mtd%d", i);
+			if (device_register(&mtd->dev) != 0) {
+				mtd_table[i] = NULL;
+				break;
+			}
+
+			if (MTD_DEVT(i))
+				device_create(mtd_class, mtd->dev.parent,
+						MTD_DEVT(i) + 1,
+						NULL, "mtd%dro", i);
+
 			DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name);
 			/* No need to get a refcount on the module containing
 			   the notifier, since we hold the mtd_table_mutex */
@@ -343,6 +440,24 @@  EXPORT_SYMBOL_GPL(register_mtd_user);
 EXPORT_SYMBOL_GPL(unregister_mtd_user);
 EXPORT_SYMBOL_GPL(default_mtd_writev);
 
+static int __init mtd_setup(void)
+{
+	mtd_class = class_create(THIS_MODULE, "mtd");
+
+	if (IS_ERR(mtd_class)) {
+		pr_err("Error creating mtd class.\n");
+		return PTR_ERR(mtd_class);
+	}
+	return 0;
+}
+core_initcall(mtd_setup);
+
+static void __exit mtd_teardown(void)
+{
+	class_destroy(mtd_class);
+}
+__exitcall(mtd_teardown);
+
 #ifdef CONFIG_PROC_FS
 
 /*====================================================================*/
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -343,6 +343,11 @@  static struct mtd_part *add_one_partitio
 	slave->mtd.name = part->name;
 	slave->mtd.owner = master->owner;
 
+	/* NOTE:  we don't arrange MTDs as a tree; it'd be error-prone
+	 * to have the same data be in two different partitions.
+	 */
+	slave->mtd.dev.parent = master->dev.parent;
+
 	slave->mtd.read = part_read;
 	slave->mtd.write = part_write;
 
@@ -493,7 +498,9 @@  out_register:
  * This function, given a master MTD object and a partition table, creates
  * and registers slave MTD objects which are bound to the master according to
  * the partition definitions.
- * (Q: should we register the master MTD object as well?)
+ *
+ * We don't register the master, or expect the caller to have done so,
+ * for reasons of data integrity.
  */
 
 int add_mtd_partitions(struct mtd_info *master,
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -11,6 +11,7 @@ 
 #include <linux/module.h>
 #include <linux/uio.h>
 #include <linux/notifier.h>
+#include <linux/device.h>
 
 #include <linux/mtd/compatmac.h>
 #include <mtd/mtd-abi.h>
@@ -223,6 +224,7 @@  struct mtd_info {
 	void *priv;
 
 	struct module *owner;
+	struct device dev;
 	int usecount;
 
 	/* If the driver is something smart, like UBI, it may need to maintain
@@ -233,6 +235,11 @@  struct mtd_info {
 	void (*put_device) (struct mtd_info *mtd);
 };
 
+static inline struct mtd_info *dev_to_mtd(struct device *dev)
+{
+	return dev ? container_of(dev, struct mtd_info, dev) : NULL;
+}
+
 static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
 {
 	if (mtd->erasesize_shift)