Patchwork [RFC/PATCH,v2] ubi: Add ubiblock read-write driver

login
register
mail settings
Submitter Ezequiel Garcia
Date Dec. 12, 2012, 12:21 p.m.
Message ID <1355314912-9321-1-git-send-email-elezegarcia@gmail.com>
Download mbox | patch
Permalink /patch/205497/
State Changes Requested
Headers show

Comments

Ezequiel Garcia - Dec. 12, 2012, 12:21 p.m.
From: Ezequiel Garcia <ezequiel@free-electrons.com>

Block device emulation on top of ubi volumes with read/write support.
Block devices are created upon user request through the
'vol' module parameter.

For instance,

  $ modprobe ubiblock vol=/dev/ubi0_0
  $ modprobe ubiblock vol=0,rootfs

Read/write access is expected to work fairly well because the
request queue at block elevator orders block transfers to achieve
space-locality.
In other words, it's expected that reads and writes gets ordered
to address the same LEB.

To help this and reduce access to the UBI volume,
two 1-LEB size caches have been implemented:
one for reading and one for writing.

Every read and every write goes through one of these caches.
Read requests can be satisfied through the read cache or the write cache.
Write requests always fill the write cache (possibly flushing it).
This fill is done through the read cache, if this is possible.
If the requested leb is not cached, it's loaded to the associated cache.

Flash device is written when write cache is flushed on:
* ubiblock device release
* a different than cached leb is accessed
* io-barrier is received through a REQ_FLUSH request

By creating two caches we decouple read access from write access,
in an effort to improve wear leveling.

Both caches are 1-LEB bytes, vmalloced at open() and freed at release();
in addition, each ubiblock has a workqueue associated (not a thread).
An unused ubiblock shouldn't waste much resources.

Signed-off-by: Ezequiel Garcia <ezequiel@free-electrons.com>
---
Changes from v1:
 * Switch to 2-caches: one for reading, one for writing
   as suggested by Richard Weinberger.
 * IO barriers supported using REQ_FLUSH as requested by Artem Bityutskiy.
 * Block devices are attached to ubi volumes using a parameter.

 drivers/mtd/ubi/Kconfig    |   16 +
 drivers/mtd/ubi/Makefile   |    1 +
 drivers/mtd/ubi/ubiblock.c |  830 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 847 insertions(+), 0 deletions(-)
 create mode 100644 drivers/mtd/ubi/ubiblock.c
richard -rw- weinberger - Dec. 12, 2012, 3:21 p.m.
Hi!

A few comments...

On Wed, Dec 12, 2012 at 1:21 PM, Ezequiel Garcia <elezegarcia@gmail.com> wrote:
> --- /dev/null
> +++ b/drivers/mtd/ubi/ubiblock.c
> @@ -0,0 +1,830 @@
> +/*
> + * Copyright (c) 2012 Ezequiel Garcia
> + * Copyright (c) 2011 Free Electrons
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.

Linux is GPLv2 and not v2 or any later version...

> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
> + * the GNU General Public License for more details.
> + *
> + * TODO: Add parameter for autoloading
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/err.h>
> +#include <linux/kernel.h>
> +#include <linux/list.h>
> +#include <linux/mutex.h>
> +#include <linux/slab.h>
> +#include <linux/mtd/ubi.h>
> +#include <linux/workqueue.h>
> +#include <linux/blkdev.h>
> +#include <linux/hdreg.h>
> +
> +#include "ubi-media.h"
> +
> +#if 0
> +static bool auto_vol;
> +module_param(auto_vol, int, 0644);
> +MODULE_PARM_DESC(auto_vol, "Automatically attach a block layer to each volume")
> +#endif

Please remove dead code.

> +/* Maximum number of supported devices */
> +#define UBIBLOCK_MAX_DEVICES 32
> +
> +/* Maximum length of the 'vol=' parameter */
> +#define UBIBLOCK_VOL_PARAM_LEN 64
> +
> +/* Maximum number of comma-separated items in the 'vol=' parameter */
> +#define UBIBLOCK_VOL_PARAM_COUNT 2
> +
> +struct ubiblock_vol_param {
> +       int ubi_num;
> +       int vol_id;
> +       char name[UBIBLOCK_VOL_PARAM_LEN];
> +};
> +
> +/* Numbers of elements set in the @ubiblock_vol_param array */
> +static int ubiblock_devs;
> +
> +/* MTD devices specification parameters */
> +static struct ubiblock_vol_param ubiblock_vol_param[UBIBLOCK_MAX_DEVICES];
> +
> +struct ubiblock_cache {
> +       char *buffer;
> +       enum { STATE_EMPTY, STATE_CLEAN, STATE_DIRTY } state;
> +       int leb_num;
> +};
> +
> +struct ubiblock {
> +       struct ubi_volume_desc *desc;
> +       struct ubi_volume_info *vi;
> +       int ubi_num;
> +       int vol_id;
> +       int refcnt;
> +
> +       struct gendisk *gd;
> +       struct request_queue *rq;
> +
> +       struct workqueue_struct *wq;
> +       struct work_struct work;
> +
> +       struct mutex vol_mutex;
> +       spinlock_t queue_lock;
> +       struct list_head list;
> +
> +       int leb_size;
> +       struct ubiblock_cache read_cache;
> +       struct ubiblock_cache write_cache;
> +};
> +
> +/* Linked list of all ubiblock instances */
> +static LIST_HEAD(ubiblock_devices);
> +static DEFINE_MUTEX(devices_mutex);
> +static int ubiblock_major;

Why is ubiblock_major not embedded in struct ubiblock?

> +/*
> + * Ugh, this parameter parsing code is simply awful :(
> + */
> +static int ubiblock_set_vol_param(const char *val,
> +                                 const struct kernel_param *kp)
> +{
> +       int len, i, ret;
> +       struct ubiblock_vol_param *param;
> +       char buf[UBIBLOCK_VOL_PARAM_LEN];
> +       char *pbuf = &buf[0];
> +       char *tokens[UBIBLOCK_VOL_PARAM_COUNT];
> +
> +       if (!val)
> +               return -EINVAL;
> +
> +       len = strnlen(val, UBIBLOCK_VOL_PARAM_LEN);
> +       if (len == 0) {
> +               pr_warn("empty 'vol=' parameter - ignored\n");
> +               return 0;
> +       }
> +
> +       if (len == UBIBLOCK_VOL_PARAM_LEN) {
> +               pr_err("parameter \"%s\" is too long, max. is %d\n",
> +                       val, UBIBLOCK_VOL_PARAM_LEN);
> +               return -EINVAL;
> +       }
> +
> +       strcpy(buf, val);
> +
> +       /* Get rid of the final newline */
> +       if (buf[len - 1] == '\n')
> +               buf[len - 1] = '\0';
> +
> +       for (i = 0; i < UBIBLOCK_VOL_PARAM_COUNT; i++)
> +               tokens[i] = strsep(&pbuf, ",");
> +
> +       param = &ubiblock_vol_param[ubiblock_devs];
> +       if (tokens[1]) {
> +               /* Two parameters: can be 'ubi, vol_id' or 'ubi, vol_name' */
> +               ret = kstrtoint(tokens[0], 10, &param->ubi_num);
> +               if (ret < 0)
> +                       return -EINVAL;
> +
> +               /* Second param can be a number or a name */
> +               ret = kstrtoint(tokens[1], 10, &param->vol_id);
> +               if (ret < 0) {
> +                       param->vol_id = -1;
> +                       strcpy(param->name, tokens[1]);
> +               }
> +
> +       } else {
> +               /* One parameter: must be device path */
> +               strcpy(param->name, tokens[0]);
> +               param->ubi_num = -1;
> +               param->vol_id = -1;
> +       }
> +
> +       ubiblock_devs++;

No locking? (I didn't check all callers)

> +       return 0;
> +}
> +
> +static const struct kernel_param_ops ubiblock_param_ops = {
> +        .set    = ubiblock_set_vol_param,
> +};
> +module_param_cb(vol, &ubiblock_param_ops, NULL, 0644);
> +
> +static struct ubiblock *find_dev_nolock(int ubi_num, int vol_id)
> +{
> +       struct ubiblock *dev;
> +
> +       list_for_each_entry(dev, &ubiblock_devices, list)
> +               if (dev->ubi_num == ubi_num && dev->vol_id == vol_id)
> +                       return dev;
> +       return NULL;
> +}
> +
> +static bool leb_on_cache(struct ubiblock_cache *cache, int leb_num)
> +{
> +       return cache->leb_num == leb_num;
> +}
> +
> +static int ubiblock_fill_cache(struct ubiblock *dev, int leb_num,
> +                              struct ubiblock_cache *cache,
> +                              struct ubiblock_cache *aux_cache)
> +{
> +       int ret;
> +
> +       /* Warn if we fill cache while being dirty */
> +       WARN_ON(cache->state == STATE_DIRTY);
> +
> +       cache->leb_num = leb_num;
> +       cache->state = STATE_CLEAN;
> +
> +       /*
> +        * If leb is on auxiliary cache, we use it to fill
> +        * the cache. This auxiliary cache needs to be invalidated.
> +        */
> +       if (aux_cache && leb_on_cache(aux_cache, leb_num)) {
> +

Why is there a blank line?

> +               aux_cache->leb_num = -1;
> +               aux_cache->state = STATE_EMPTY;
> +               memcpy(cache->buffer, aux_cache->buffer, dev->leb_size);
> +       } else {
> +

Too.

> +               ret = ubi_read(dev->desc, leb_num, cache->buffer, 0,
> +                              dev->leb_size);
> +               if (ret) {
> +                       dev_err(disk_to_dev(dev->gd), "ubi_read error %d\n",
> +                               ret);
> +                       return ret;
> +               }
> +       }
> +       return 0;
> +}
> +
> +static int ubiblock_flush(struct ubiblock *dev, bool sync)
> +{
> +       struct ubiblock_cache* cache = &dev->write_cache;

struct ubiblock_cache *cache ...
please. Didn't checkpatch.pl note that?

> +       int ret = 0;

There is no need to initialize ret.

> +       if (cache->state != STATE_DIRTY)
> +               return 0;
> +
> +       /*
> +        * TODO: mtdblock sets STATE_EMPTY, arguing that it prevents the
> +        * underlying media to get changed without notice.
> +        * I'm not fully convinced, so I just put STATE_CLEAN.
> +        */
> +       cache->state = STATE_CLEAN;
> +
> +       /* Atomically change leb with buffer contents */
> +       ret = ubi_leb_change(dev->desc, cache->leb_num,
> +                            cache->buffer, dev->leb_size);
> +       if (ret) {
> +               dev_err(disk_to_dev(dev->gd), "ubi_leb_change error %d\n", ret);
> +               return ret;
> +       }
> +
> +       /* Sync ubi device when device is released and on block flush ioctl */
> +       if (sync)
> +               ret = ubi_sync(dev->ubi_num);
> +
> +       return ret;
> +}
> +
> +static int ubiblock_read(struct ubiblock *dev, char *buffer,
> +                        int pos, int len)
> +{
> +       int leb, offset, ret;
> +       int bytes_left = len;
> +       int to_read = len;
>
> +       char *cache_buffer;
> +
> +       /* Get leb:offset address to read from */
> +       leb = pos / dev->leb_size;
> +       offset = pos % dev->leb_size;
> +
> +       while (bytes_left) {
> +

Empty line.

> +               /*
> +                * We can only read one leb at a time.
> +                * Therefore if the read length is larger than
> +                * one leb size, we split the operation.
> +                */
> +               if (offset + to_read > dev->leb_size)
> +                       to_read = dev->leb_size - offset;
> +
> +               /*
> +                * 1) First try on read cache, if not there...
> +                * 2) then try on write cache, if not there...
> +                * 3) finally load this leb on read_cache
> +                *
> +                * Note that reading never flushes to disk!
> +                */
> +               if (leb_on_cache(&dev->read_cache, leb)) {
> +                       cache_buffer = dev->read_cache.buffer;
> +
> +               } else if (leb_on_cache(&dev->write_cache, leb)) {
> +                       cache_buffer = dev->write_cache.buffer;
> +
> +               } else {
> +                       /* Leb is not in any cache: fill read_cache */
> +                       ret = ubiblock_fill_cache(dev, leb,
> +                               &dev->read_cache, NULL);
> +                       if (ret)
> +                               return ret;
> +                       cache_buffer = dev->read_cache.buffer;
> +               }
> +
> +               memcpy(buffer, cache_buffer + offset, to_read);
> +               buffer += to_read;
> +               bytes_left -= to_read;
> +               to_read = bytes_left;
> +               leb++;
> +               offset = 0;
> +       }
> +       return 0;
> +}
> +
> +static int ubiblock_write(struct ubiblock *dev, const char *buffer,
> +                        int pos, int len)
> +{
> +       int leb, offset, ret;
> +       int bytes_left = len;
> +       int to_write = len;
> +       struct ubiblock_cache* cache = &dev->write_cache;
> +
> +       /* Get (leb:offset) address to write */
> +       leb = pos / dev->leb_size;
> +       offset = pos % dev->leb_size;
> +
> +       while (bytes_left) {
> +               /*
> +                * We can only write one leb at a time.
> +                * Therefore if the write length is larger than
> +                * one leb size, we split the operation.
> +                */
> +               if (offset + to_write > dev->leb_size)
> +                       to_write = dev->leb_size - offset;
> +
> +               /*
> +                * If leb is not on write cache, we flush current cached
> +                * leb to disk. Cache contents will be filled either
> +                * by using read cache or by reading device.
> +                */
> +               if (!leb_on_cache(cache, leb)) {
> +
> +                       ret = ubiblock_flush(dev, false);
> +                       if (ret)
> +                               return ret;
> +
> +                       ret = ubiblock_fill_cache(dev, leb,
> +                               cache, &dev->read_cache);
> +                       if (ret)
> +                               return ret;
> +               }
> +
> +               memcpy(cache->buffer + offset, buffer, to_write);
> +
> +               /* This is the only place where we dirt the write cache */
> +               cache->state = STATE_DIRTY;
> +
> +               buffer += to_write;
> +               bytes_left -= to_write;
> +               to_write = bytes_left;
> +               offset = 0;
> +               leb++;
> +       }
> +       return 0;
> +}
> +
> +static int do_ubiblock_request(struct ubiblock *dev, struct request *req)
> +{
> +       int pos, len;
> +
> +       if (req->cmd_flags & REQ_FLUSH)
> +               return ubiblock_flush(dev, true);
> +
> +       if (req->cmd_type != REQ_TYPE_FS)
> +               return -EIO;
> +
> +       if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
> +           get_capacity(req->rq_disk))
> +               return -EIO;
> +
> +       pos = blk_rq_pos(req) << 9;
> +       len = blk_rq_cur_bytes(req);
> +
> +       switch (rq_data_dir(req)) {
> +       case READ:
> +               return ubiblock_read(dev, req->buffer, pos, len);
> +       case WRITE:
> +               return ubiblock_write(dev, req->buffer, pos, len);
> +       default:
> +               return -EIO;
> +       }
> +
> +       return 0;
> +}
> +
> +static void ubiblock_do_work(struct work_struct *work)
> +{
> +       struct ubiblock *dev =
> +               container_of(work, struct ubiblock, work);
> +       struct request_queue *rq = dev->rq;
> +       struct request *req;
> +       int res;
> +
> +       spin_lock_irq(rq->queue_lock);
> +
> +       req = blk_fetch_request(rq);
> +       while (req) {
> +
> +               spin_unlock_irq(rq->queue_lock);
> +
> +               mutex_lock(&dev->vol_mutex);
> +               res = do_ubiblock_request(dev, req);
> +               mutex_unlock(&dev->vol_mutex);
> +
> +               spin_lock_irq(rq->queue_lock);
> +
> +               /*
> +                * If we're done with this request,
> +                * we need to fetch a new one
> +                */
> +               if (!__blk_end_request_cur(req, res))
> +                       req = blk_fetch_request(rq);
> +       }
> +
> +       spin_unlock_irq(rq->queue_lock);
> +}
> +
> +static void ubiblock_request(struct request_queue *rq)
> +{
> +       struct ubiblock *dev;
> +       struct request *req;
> +
> +       dev = rq->queuedata;
> +
> +       if (!dev)
> +               while ((req = blk_fetch_request(rq)) != NULL)
> +                       __blk_end_request_all(req, -ENODEV);
> +       else
> +               queue_work(dev->wq, &dev->work);
> +}
> +
> +static int ubiblock_alloc_cache(struct ubiblock_cache *cache, int size)
> +{
> +       cache->state = STATE_EMPTY;
> +       cache->leb_num = -1;
> +       cache->buffer = vmalloc(size);
> +       if (!cache->buffer)
> +               return -ENOMEM;
> +       return 0;
> +}
> +
> +static void ubiblock_free_cache(struct ubiblock_cache *cache)
> +{
> +       cache->leb_num = -1;
> +       cache->state = STATE_EMPTY;
> +       vfree(cache->buffer);
> +}
> +
> +static int ubiblock_open(struct block_device *bdev, fmode_t mode)
> +{
> +       struct ubiblock *dev = bdev->bd_disk->private_data;
> +       int ubi_mode = UBI_READONLY;
> +       int ret;
> +
> +       mutex_lock(&dev->vol_mutex);
> +       if (dev->refcnt > 0) {
> +               /*
> +                * The volume is already opened,
> +                * just increase the reference counter
> +                */
> +               dev->refcnt++;
> +               mutex_unlock(&dev->vol_mutex);
> +               return 0;
> +       }
> +
> +       if (mode & FMODE_WRITE)
> +               ubi_mode = UBI_READWRITE;
> +
> +       dev->desc = ubi_open_volume(dev->ubi_num, dev->vol_id, ubi_mode);
> +       if (IS_ERR(dev->desc)) {
> +               dev_err(disk_to_dev(dev->gd),
> +                       "failed to open ubi volume %d_%d\n",
> +                       dev->ubi_num, dev->vol_id);
> +
> +               ret = PTR_ERR(dev->desc);
> +               dev->desc = NULL;
> +               goto out_unlock;
> +       }
> +
> +       dev->vi = kzalloc(sizeof(struct ubi_volume_info), GFP_KERNEL);
> +       if (!dev->vi) {
> +               ret = -ENOMEM;
> +               goto out_close;
> +       }
> +       ubi_get_volume_info(dev->desc, dev->vi);
> +       dev->leb_size = dev->vi->usable_leb_size;
> +
> +       ret = ubiblock_alloc_cache(&dev->read_cache, dev->leb_size);
> +       if (ret)
> +               goto out_free;
> +
> +       ret = ubiblock_alloc_cache(&dev->write_cache, dev->leb_size);
> +       if (ret)
> +               goto out_free_cache;
> +
> +       dev->refcnt++;
> +       mutex_unlock(&dev->vol_mutex);
> +       return 0;
> +
> +out_free_cache:
> +       ubiblock_free_cache(&dev->read_cache);
> +out_free:
> +       kfree(dev->vi);
> +out_close:
> +       ubi_close_volume(dev->desc);
> +       dev->desc = NULL;
> +out_unlock:
> +       mutex_unlock(&dev->vol_mutex);
> +       return ret;
> +}
> +
> +static int ubiblock_release(struct gendisk *gd, fmode_t mode)
> +{
> +       struct ubiblock *dev = gd->private_data;
> +
> +       mutex_lock(&dev->vol_mutex);
> +
> +       dev->refcnt--;
> +       if (dev->refcnt == 0) {
> +               ubiblock_flush(dev, true);
> +
> +               ubiblock_free_cache(&dev->read_cache);
> +               ubiblock_free_cache(&dev->write_cache);
> +
> +               kfree(dev->vi);
> +               ubi_close_volume(dev->desc);
> +
> +               dev->vi = NULL;
> +               dev->desc = NULL;
> +       }
> +
> +       mutex_unlock(&dev->vol_mutex);
> +       return 0;
> +}
> +
> +static int ubiblock_ioctl(struct block_device *bdev, fmode_t mode,
> +                             unsigned int cmd, unsigned long arg)
> +{
> +       struct ubiblock *dev = bdev->bd_disk->private_data;
> +       int ret = -ENXIO;

No need to initialize ret.

> +       if (!dev)
> +               return ret;
> +
> +       mutex_lock(&dev->vol_mutex);
> +
> +       /* I can't get this to get called. What's going on? */
> +       switch (cmd) {
> +       case BLKFLSBUF:
> +               ret = ubiblock_flush(dev, true);
> +               break;
> +       default:
> +               ret = -ENOTTY;
> +       }
> +
> +       mutex_unlock(&dev->vol_mutex);
> +       return ret;
> +}
> +
> +static int ubiblock_getgeo(struct block_device *bdev, struct hd_geometry *geo)
> +{
> +       /* Some tools might require this information */
> +       geo->heads = 1;
> +       geo->cylinders = 1;
> +       geo->sectors = get_capacity(bdev->bd_disk);
> +       geo->start = 0;
> +       return 0;
> +}
> +
> +static const struct block_device_operations ubiblock_ops = {
> +       .owner = THIS_MODULE,
> +       .open = ubiblock_open,
> +       .release = ubiblock_release,
> +       .ioctl = ubiblock_ioctl,
> +       .getgeo = ubiblock_getgeo,
> +};
> +
> +static int ubiblock_add(struct ubi_volume_info *vi)
> +{
> +       struct ubiblock *dev;
> +       struct gendisk *gd;
> +       int disk_capacity;
> +       int ret;

Please but variables of equal type in the same line.

> +       /* Check that the volume isn't already handled */
> +       mutex_lock(&devices_mutex);
> +       if (find_dev_nolock(vi->ubi_num, vi->vol_id)) {
> +               mutex_unlock(&devices_mutex);
> +               return -EEXIST;
> +       }
> +       mutex_unlock(&devices_mutex);
> +
> +       dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL);
> +       if (!dev)
> +               return -ENOMEM;
> +
> +       mutex_init(&dev->vol_mutex);
> +
> +       dev->ubi_num = vi->ubi_num;
> +       dev->vol_id = vi->vol_id;
> +
> +       /* Initialize the gendisk of this ubiblock device */
> +       gd = alloc_disk(1);
> +       if (!gd) {
> +               pr_err("alloc_disk failed\n");
> +               ret = -ENODEV;
> +               goto out_free_dev;
> +       }
> +
> +       gd->fops = &ubiblock_ops;
> +       gd->major = ubiblock_major;
> +       gd->first_minor = dev->ubi_num * UBI_MAX_VOLUMES + dev->vol_id;
> +       gd->private_data = dev;
> +       sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
> +       disk_capacity = (vi->size * vi->usable_leb_size) >> 9;
> +       set_capacity(gd, disk_capacity);
> +       dev->gd = gd;
> +
> +       spin_lock_init(&dev->queue_lock);
> +       dev->rq = blk_init_queue(ubiblock_request, &dev->queue_lock);
> +       if (!dev->rq) {
> +               pr_err("blk_init_queue failed\n");
> +               ret = -ENODEV;
> +               goto out_put_disk;
> +       }
> +
> +       dev->rq->queuedata = dev;
> +       dev->gd->queue = dev->rq;
> +
> +       blk_queue_flush(dev->rq, REQ_FLUSH);
> +
> +       /* TODO: Is performance better or worse with this flag? */

Find out yourself. Run some benchmarks. :)

> +       /* queue_flag_set_unlocked(QUEUE_FLAG_NONROT, dev->rq);*/
> +
> +       /*
> +        * Create one workqueue per volume (per registered block device).
> +        * Rembember workqueues are cheap, they're not threads.
> +        */
> +       dev->wq = alloc_workqueue(gd->disk_name, 0, 0);
> +       if (!dev->wq)
> +               goto out_free_queue;
> +       INIT_WORK(&dev->work, ubiblock_do_work);
> +
> +       mutex_lock(&devices_mutex);
> +       list_add_tail(&dev->list, &ubiblock_devices);
> +       mutex_unlock(&devices_mutex);
> +
> +       /* Must be the last step: anyone can call file ops from now on */
> +       add_disk(dev->gd);
> +
> +       dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)\n",
> +                dev->ubi_num, dev->vol_id, vi->name);
> +
> +       return 0;
> +
> +out_free_queue:
> +       blk_cleanup_queue(dev->rq);
> +out_put_disk:
> +       put_disk(dev->gd);
> +out_free_dev:
> +       kfree(dev);
> +
> +       return ret;
> +}
> +
> +static void ubiblock_cleanup(struct ubiblock *dev)
> +{
> +       del_gendisk(dev->gd);
> +       blk_cleanup_queue(dev->rq);
> +       put_disk(dev->gd);
> +}
> +
> +static void ubiblock_del(struct ubi_volume_info *vi)
> +{
> +       struct ubiblock *dev;
> +
> +       mutex_lock(&devices_mutex);
> +       dev = find_dev_nolock(vi->ubi_num, vi->vol_id);
> +       if (!dev) {
> +               mutex_unlock(&devices_mutex);
> +               return;
> +       }
> +       /* Remove from device list */
> +       list_del(&dev->list);
> +       mutex_unlock(&devices_mutex);
> +
> +       /* Flush pending work and stop this workqueue */
> +       destroy_workqueue(dev->wq);
> +
> +       mutex_lock(&dev->vol_mutex);
> +
> +       /*
> +        * This means that ubiblock device is opened and in usage.
> +        * However, this shouldn't happen, since we have
> +        * called ubi_open_volume() at open() time, thus preventing
> +        * volume removal.
> +        */
> +       WARN_ON(dev->desc);
> +       ubiblock_cleanup(dev);
> +
> +       mutex_unlock(&dev->vol_mutex);
> +
> +       kfree(dev);
> +}
> +
> +static void ubiblock_resize(struct ubi_volume_info *vi)
> +{
> +       struct ubiblock *dev;
> +       int disk_capacity;
> +
> +       /*
> +        * We don't touch the list, but we better lock it: it could be that the
> +        * device gets removed between the time the device has been found and
> +        * the time we access dev->gd
> +        */
> +       mutex_lock(&devices_mutex);
> +       dev = find_dev_nolock(vi->ubi_num, vi->vol_id);
> +       if (!dev) {
> +               mutex_unlock(&devices_mutex);
> +               return;
> +       }
> +       mutex_unlock(&devices_mutex);

You can unlock the mutex directl after find_dev_nolock().
No need to write it twice.

> +       mutex_lock(&dev->vol_mutex);
> +       disk_capacity = (vi->size * vi->usable_leb_size) >> 9;
> +       set_capacity(dev->gd, disk_capacity);
> +       dev_dbg(disk_to_dev(dev->gd), "resized to %d LEBs\n", vi->size);
> +       mutex_unlock(&dev->vol_mutex);
> +}
> +
> +static int ubiblock_notify(struct notifier_block *nb,
> +                        unsigned long notification_type, void *ns_ptr)
> +{
> +       struct ubi_notification *nt = ns_ptr;
> +
> +       switch (notification_type) {
> +       case UBI_VOLUME_ADDED:
> +               if (0)
> +                       ubiblock_add(&nt->vi);

if (0) !?

> +               break;
> +       case UBI_VOLUME_REMOVED:
> +               ubiblock_del(&nt->vi);
> +               break;
> +       case UBI_VOLUME_RESIZED:
> +               ubiblock_resize(&nt->vi);
> +               break;
> +       default:
> +               break;
> +       }
> +       return NOTIFY_OK;
> +}
> +
> +static struct notifier_block ubiblock_notifier = {
> +       .notifier_call = ubiblock_notify,
> +};
> +
> +static struct ubi_volume_desc *
> +open_volume_desc(const char *name, int ubi_num, int vol_id)
> +{
> +       if (ubi_num == -1)
> +               /* No ubi num, name must be a vol device path */
> +               return ubi_open_volume_path(name, UBI_READONLY);
> +       else if (vol_id == -1)
> +               /* No vol_id, must be vol_name */
> +               return ubi_open_volume_nm(ubi_num, name, UBI_READONLY);
> +       else
> +               return ubi_open_volume(ubi_num, vol_id, UBI_READONLY);
> +}
> +
> +static void ubiblock_register_vol_param(void)
> +{
> +       int i, ret;
> +       struct ubiblock_vol_param *p;
> +       struct ubi_volume_desc *desc;
> +       struct ubi_volume_info vi;
> +
> +       for (i = 0; i < ubiblock_devs; i++) {
> +               p = &ubiblock_vol_param[i];
> +
> +               desc = open_volume_desc(p->name, p->ubi_num, p->vol_id);
> +               if (IS_ERR(desc))
> +                       continue;
> +
> +               ubi_get_volume_info(desc, &vi);
> +               ret = ubiblock_add(&vi);
> +               if (ret)
> +                       pr_warn("can't add '%s' volume, err=%d\n",
> +                               vi.name, ret);
> +
> +               ubi_close_volume(desc);
> +       }
> +}
> +
> +static int __init ubiblock_init(void)
> +{
> +       ubiblock_major = register_blkdev(0, "ubiblock");
> +       if (ubiblock_major < 0)
> +               return ubiblock_major;
> +
> +       ubiblock_register_vol_param();
> +
> +       /*
> +        * Blocks will get registered dynamically.
> +        * Each ubi volume will get a corresponding block device.
> +        */
> +       return ubi_register_volume_notifier(&ubiblock_notifier, 1);
> +}
> +
> +static void __exit ubiblock_exit(void)
> +{
> +       struct ubiblock *next;
> +       struct ubiblock *dev;

No need to write two lines. Put next and dev in the same one.

> +       ubi_unregister_volume_notifier(&ubiblock_notifier);
> +
> +       list_for_each_entry_safe(dev, next, &ubiblock_devices, list) {
> +
> +               /* Flush pending work and stop workqueue */
> +               destroy_workqueue(dev->wq);
> +
> +               /* The module is being forcefully removed */
> +               WARN_ON(dev->desc);
> +
> +               /* Remove from device list */
> +               list_del(&dev->list);
> +
> +               ubiblock_cleanup(dev);
> +
> +               kfree(dev);
> +       }
> +
> +       unregister_blkdev(ubiblock_major, "ubiblock");
> +}
> +
> +module_init(ubiblock_init);
> +module_exit(ubiblock_exit);
> +
> +MODULE_DESCRIPTION("Block device emulation access to UBI volumes");

I'm not a native speaker but this sentence sound strange to me.
Gregory CLEMENT - Dec. 12, 2012, 3:50 p.m.
2012/12/12 richard -rw- weinberger <richard.weinberger@gmail.com>:
> Hi!
>
> A few comments...
>
> On Wed, Dec 12, 2012 at 1:21 PM, Ezequiel Garcia <elezegarcia@gmail.com> wrote:
>> --- /dev/null
>> +++ b/drivers/mtd/ubi/ubiblock.c
>> @@ -0,0 +1,830 @@
>> +/*
>> + * Copyright (c) 2012 Ezequiel Garcia
>> + * Copyright (c) 2011 Free Electrons
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>
> Linux is GPLv2 and not v2 or any later version...

I think it is more complicate than this.
In the kernel you have code under GPLv2, GPLv2+ and even BSD licenses.
All these licenses are compatible but if you want to distribute all the sources
of the kernel then you are under the more restrictive license: GPLv2.
So we can say that kernel is GPLv2, but it doesn't prevent to submit code
under GPLv2+.

There seems to have a couple of files under GPLv2:
git grep "(at your option) any later version" | wc -l
6800

Regards,
richard -rw- weinberger - Dec. 12, 2012, 4:14 p.m.
On Wed, Dec 12, 2012 at 4:50 PM, Gregory CLEMENT <gclement00@gmail.com> wrote:
> 2012/12/12 richard -rw- weinberger <richard.weinberger@gmail.com>:
>> Hi!
>>
>> A few comments...
>>
>> On Wed, Dec 12, 2012 at 1:21 PM, Ezequiel Garcia <elezegarcia@gmail.com> wrote:
>>> --- /dev/null
>>> +++ b/drivers/mtd/ubi/ubiblock.c
>>> @@ -0,0 +1,830 @@
>>> +/*
>>> + * Copyright (c) 2012 Ezequiel Garcia
>>> + * Copyright (c) 2011 Free Electrons
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License as published by
>>> + * the Free Software Foundation; either version 2 of the License, or
>>> + * (at your option) any later version.
>>
>> Linux is GPLv2 and not v2 or any later version...
>
> I think it is more complicate than this.
> In the kernel you have code under GPLv2, GPLv2+ and even BSD licenses.
> All these licenses are compatible but if you want to distribute all the sources
> of the kernel then you are under the more restrictive license: GPLv2.
> So we can say that kernel is GPLv2, but it doesn't prevent to submit code
> under GPLv2+.

The kernel is GPLv2. period.

> There seems to have a couple of files under GPLv2:
> git grep "(at your option) any later version" | wc -l
> 6800

These lines are wrong.
Ezequiel Garcia - Dec. 12, 2012, 4:18 p.m.
Hi Richard,

Thanks for the feedback.
Yeah I forgot the remove some clumsy or work-in-progress code. Shame on me :-(

Anyway, I wanted to post the new cache shape and the new module parameter
to get some feedback. I'd like to decide if write support is useful or not.

On Wed, Dec 12, 2012 at 12:21 PM, richard -rw- weinberger
<richard.weinberger@gmail.com> wrote:
> Hi!
>
> A few comments...
>
> On Wed, Dec 12, 2012 at 1:21 PM, Ezequiel Garcia <elezegarcia@gmail.com> wrote:
>> --- /dev/null
>> +++ b/drivers/mtd/ubi/ubiblock.c
>> @@ -0,0 +1,830 @@
>> +/*
>> + * Copyright (c) 2012 Ezequiel Garcia
>> + * Copyright (c) 2011 Free Electrons
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>
> Linux is GPLv2 and not v2 or any later version...
>

True.

>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
>> + * the GNU General Public License for more details.
>> + *
>> + * TODO: Add parameter for autoloading
>> + */
>> +
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +
>> +#include <linux/module.h>
>> +#include <linux/init.h>
>> +#include <linux/err.h>
>> +#include <linux/kernel.h>
>> +#include <linux/list.h>
>> +#include <linux/mutex.h>
>> +#include <linux/slab.h>
>> +#include <linux/mtd/ubi.h>
>> +#include <linux/workqueue.h>
>> +#include <linux/blkdev.h>
>> +#include <linux/hdreg.h>
>> +
>> +#include "ubi-media.h"
>> +
>> +#if 0
>> +static bool auto_vol;
>> +module_param(auto_vol, int, 0644);
>> +MODULE_PARM_DESC(auto_vol, "Automatically attach a block layer to each volume")
>> +#endif
>
> Please remove dead code.
>

Actually, I wonder if this parameter auto_vol, is useful or not.
When 'true' it would make ubiblock driver create an ubiblock device
automatically
per ubi volume created.

Ideas?

>> +/* Maximum number of supported devices */
>> +#define UBIBLOCK_MAX_DEVICES 32
>> +
>> +/* Maximum length of the 'vol=' parameter */
>> +#define UBIBLOCK_VOL_PARAM_LEN 64
>> +
>> +/* Maximum number of comma-separated items in the 'vol=' parameter */
>> +#define UBIBLOCK_VOL_PARAM_COUNT 2
>> +
>> +struct ubiblock_vol_param {
>> +       int ubi_num;
>> +       int vol_id;
>> +       char name[UBIBLOCK_VOL_PARAM_LEN];
>> +};
>> +
>> +/* Numbers of elements set in the @ubiblock_vol_param array */
>> +static int ubiblock_devs;
>> +
>> +/* MTD devices specification parameters */
>> +static struct ubiblock_vol_param ubiblock_vol_param[UBIBLOCK_MAX_DEVICES];
>> +
>> +struct ubiblock_cache {
>> +       char *buffer;
>> +       enum { STATE_EMPTY, STATE_CLEAN, STATE_DIRTY } state;
>> +       int leb_num;
>> +};
>> +
>> +struct ubiblock {
>> +       struct ubi_volume_desc *desc;
>> +       struct ubi_volume_info *vi;
>> +       int ubi_num;
>> +       int vol_id;
>> +       int refcnt;
>> +
>> +       struct gendisk *gd;
>> +       struct request_queue *rq;
>> +
>> +       struct workqueue_struct *wq;
>> +       struct work_struct work;
>> +
>> +       struct mutex vol_mutex;
>> +       spinlock_t queue_lock;
>> +       struct list_head list;
>> +
>> +       int leb_size;
>> +       struct ubiblock_cache read_cache;
>> +       struct ubiblock_cache write_cache;
>> +};
>> +
>> +/* Linked list of all ubiblock instances */
>> +static LIST_HEAD(ubiblock_devices);
>> +static DEFINE_MUTEX(devices_mutex);
>> +static int ubiblock_major;
>
> Why is ubiblock_major not embedded in struct ubiblock?
>

Because it's block device wide. When the block driver is registered
there is no ubiblock device allocated. ubiblock device is latter created,
one per ubi volume. See register_blkdev below.

>> +/*
>> + * Ugh, this parameter parsing code is simply awful :(
>> + */
>> +static int ubiblock_set_vol_param(const char *val,
>> +                                 const struct kernel_param *kp)
>> +{
>> +       int len, i, ret;
>> +       struct ubiblock_vol_param *param;
>> +       char buf[UBIBLOCK_VOL_PARAM_LEN];
>> +       char *pbuf = &buf[0];
>> +       char *tokens[UBIBLOCK_VOL_PARAM_COUNT];
>> +
>> +       if (!val)
>> +               return -EINVAL;
>> +
>> +       len = strnlen(val, UBIBLOCK_VOL_PARAM_LEN);
>> +       if (len == 0) {
>> +               pr_warn("empty 'vol=' parameter - ignored\n");
>> +               return 0;
>> +       }
>> +
>> +       if (len == UBIBLOCK_VOL_PARAM_LEN) {
>> +               pr_err("parameter \"%s\" is too long, max. is %d\n",
>> +                       val, UBIBLOCK_VOL_PARAM_LEN);
>> +               return -EINVAL;
>> +       }
>> +
>> +       strcpy(buf, val);
>> +
>> +       /* Get rid of the final newline */
>> +       if (buf[len - 1] == '\n')
>> +               buf[len - 1] = '\0';
>> +
>> +       for (i = 0; i < UBIBLOCK_VOL_PARAM_COUNT; i++)
>> +               tokens[i] = strsep(&pbuf, ",");
>> +
>> +       param = &ubiblock_vol_param[ubiblock_devs];
>> +       if (tokens[1]) {
>> +               /* Two parameters: can be 'ubi, vol_id' or 'ubi, vol_name' */
>> +               ret = kstrtoint(tokens[0], 10, &param->ubi_num);
>> +               if (ret < 0)
>> +                       return -EINVAL;
>> +
>> +               /* Second param can be a number or a name */
>> +               ret = kstrtoint(tokens[1], 10, &param->vol_id);
>> +               if (ret < 0) {
>> +                       param->vol_id = -1;
>> +                       strcpy(param->name, tokens[1]);
>> +               }
>> +
>> +       } else {
>> +               /* One parameter: must be device path */
>> +               strcpy(param->name, tokens[0]);
>> +               param->ubi_num = -1;
>> +               param->vol_id = -1;
>> +       }
>> +
>> +       ubiblock_devs++;
>
> No locking? (I didn't check all callers)
>

This parameter should be set only on module loading, unless I'm
missing something.
I guess you can set it later, but it won't have any effect.
Anyway, this needs some more tought.

>> +       return 0;
>> +}
>> +
>> +static const struct kernel_param_ops ubiblock_param_ops = {
>> +        .set    = ubiblock_set_vol_param,
>> +};
>> +module_param_cb(vol, &ubiblock_param_ops, NULL, 0644);
>> +
>> +static struct ubiblock *find_dev_nolock(int ubi_num, int vol_id)
>> +{
>> +       struct ubiblock *dev;
>> +
>> +       list_for_each_entry(dev, &ubiblock_devices, list)
>> +               if (dev->ubi_num == ubi_num && dev->vol_id == vol_id)
>> +                       return dev;
>> +       return NULL;
>> +}
>> +
>> +static bool leb_on_cache(struct ubiblock_cache *cache, int leb_num)
>> +{
>> +       return cache->leb_num == leb_num;
>> +}
>> +
>> +static int ubiblock_fill_cache(struct ubiblock *dev, int leb_num,
>> +                              struct ubiblock_cache *cache,
>> +                              struct ubiblock_cache *aux_cache)
>> +{
>> +       int ret;
>> +
>> +       /* Warn if we fill cache while being dirty */
>> +       WARN_ON(cache->state == STATE_DIRTY);
>> +
>> +       cache->leb_num = leb_num;
>> +       cache->state = STATE_CLEAN;
>> +
>> +       /*
>> +        * If leb is on auxiliary cache, we use it to fill
>> +        * the cache. This auxiliary cache needs to be invalidated.
>> +        */
>> +       if (aux_cache && leb_on_cache(aux_cache, leb_num)) {
>> +
>
> Why is there a blank line?
>

Looks nicer to me. Does it break any rule?


>> +               aux_cache->leb_num = -1;
>> +               aux_cache->state = STATE_EMPTY;
>> +               memcpy(cache->buffer, aux_cache->buffer, dev->leb_size);
>> +       } else {
>> +
>
> Too.
>

Ditto.

>> +               ret = ubi_read(dev->desc, leb_num, cache->buffer, 0,
>> +                              dev->leb_size);
>> +               if (ret) {
>> +                       dev_err(disk_to_dev(dev->gd), "ubi_read error %d\n",
>> +                               ret);
>> +                       return ret;
>> +               }
>> +       }
>> +       return 0;
>> +}
>> +
>> +static int ubiblock_flush(struct ubiblock *dev, bool sync)
>> +{
>> +       struct ubiblock_cache* cache = &dev->write_cache;
>
> struct ubiblock_cache *cache ...
> please. Didn't checkpatch.pl note that?
>

Oops.

>> +       int ret = 0;
>
> There is no need to initialize ret.
>

True.

>> +       if (cache->state != STATE_DIRTY)
>> +               return 0;
>> +
>> +       /*
>> +        * TODO: mtdblock sets STATE_EMPTY, arguing that it prevents the
>> +        * underlying media to get changed without notice.
>> +        * I'm not fully convinced, so I just put STATE_CLEAN.
>> +        */
>> +       cache->state = STATE_CLEAN;
>> +
>> +       /* Atomically change leb with buffer contents */
>> +       ret = ubi_leb_change(dev->desc, cache->leb_num,
>> +                            cache->buffer, dev->leb_size);
>> +       if (ret) {
>> +               dev_err(disk_to_dev(dev->gd), "ubi_leb_change error %d\n", ret);
>> +               return ret;
>> +       }
>> +
>> +       /* Sync ubi device when device is released and on block flush ioctl */
>> +       if (sync)
>> +               ret = ubi_sync(dev->ubi_num);
>> +
>> +       return ret;
>> +}
>> +
>> +static int ubiblock_read(struct ubiblock *dev, char *buffer,
>> +                        int pos, int len)
>> +{
>> +       int leb, offset, ret;
>> +       int bytes_left = len;
>> +       int to_read = len;
>>
>> +       char *cache_buffer;
>> +
>> +       /* Get leb:offset address to read from */
>> +       leb = pos / dev->leb_size;
>> +       offset = pos % dev->leb_size;
>> +
>> +       while (bytes_left) {
>> +
>
> Empty line.
>

Ditto. I'll re-read CodingStyle to see if I missed something.


>> +               /*
>> +                * We can only read one leb at a time.
>> +                * Therefore if the read length is larger than
>> +                * one leb size, we split the operation.
>> +                */
>> +               if (offset + to_read > dev->leb_size)
>> +                       to_read = dev->leb_size - offset;
>> +
>> +               /*
>> +                * 1) First try on read cache, if not there...
>> +                * 2) then try on write cache, if not there...
>> +                * 3) finally load this leb on read_cache
>> +                *
>> +                * Note that reading never flushes to disk!
>> +                */
>> +               if (leb_on_cache(&dev->read_cache, leb)) {
>> +                       cache_buffer = dev->read_cache.buffer;
>> +
>> +               } else if (leb_on_cache(&dev->write_cache, leb)) {
>> +                       cache_buffer = dev->write_cache.buffer;
>> +
>> +               } else {
>> +                       /* Leb is not in any cache: fill read_cache */
>> +                       ret = ubiblock_fill_cache(dev, leb,
>> +                               &dev->read_cache, NULL);
>> +                       if (ret)
>> +                               return ret;
>> +                       cache_buffer = dev->read_cache.buffer;
>> +               }
>> +
>> +               memcpy(buffer, cache_buffer + offset, to_read);
>> +               buffer += to_read;
>> +               bytes_left -= to_read;
>> +               to_read = bytes_left;
>> +               leb++;
>> +               offset = 0;
>> +       }
>> +       return 0;
>> +}
>> +
>> +static int ubiblock_write(struct ubiblock *dev, const char *buffer,
>> +                        int pos, int len)
>> +{
>> +       int leb, offset, ret;
>> +       int bytes_left = len;
>> +       int to_write = len;
>> +       struct ubiblock_cache* cache = &dev->write_cache;
>> +
>> +       /* Get (leb:offset) address to write */
>> +       leb = pos / dev->leb_size;
>> +       offset = pos % dev->leb_size;
>> +
>> +       while (bytes_left) {
>> +               /*
>> +                * We can only write one leb at a time.
>> +                * Therefore if the write length is larger than
>> +                * one leb size, we split the operation.
>> +                */
>> +               if (offset + to_write > dev->leb_size)
>> +                       to_write = dev->leb_size - offset;
>> +
>> +               /*
>> +                * If leb is not on write cache, we flush current cached
>> +                * leb to disk. Cache contents will be filled either
>> +                * by using read cache or by reading device.
>> +                */
>> +               if (!leb_on_cache(cache, leb)) {
>> +
>> +                       ret = ubiblock_flush(dev, false);
>> +                       if (ret)
>> +                               return ret;
>> +
>> +                       ret = ubiblock_fill_cache(dev, leb,
>> +                               cache, &dev->read_cache);
>> +                       if (ret)
>> +                               return ret;
>> +               }
>> +
>> +               memcpy(cache->buffer + offset, buffer, to_write);
>> +
>> +               /* This is the only place where we dirt the write cache */
>> +               cache->state = STATE_DIRTY;
>> +
>> +               buffer += to_write;
>> +               bytes_left -= to_write;
>> +               to_write = bytes_left;
>> +               offset = 0;
>> +               leb++;
>> +       }
>> +       return 0;
>> +}
>> +
>> +static int do_ubiblock_request(struct ubiblock *dev, struct request *req)
>> +{
>> +       int pos, len;
>> +
>> +       if (req->cmd_flags & REQ_FLUSH)
>> +               return ubiblock_flush(dev, true);
>> +
>> +       if (req->cmd_type != REQ_TYPE_FS)
>> +               return -EIO;
>> +
>> +       if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
>> +           get_capacity(req->rq_disk))
>> +               return -EIO;
>> +
>> +       pos = blk_rq_pos(req) << 9;
>> +       len = blk_rq_cur_bytes(req);
>> +
>> +       switch (rq_data_dir(req)) {
>> +       case READ:
>> +               return ubiblock_read(dev, req->buffer, pos, len);
>> +       case WRITE:
>> +               return ubiblock_write(dev, req->buffer, pos, len);
>> +       default:
>> +               return -EIO;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static void ubiblock_do_work(struct work_struct *work)
>> +{
>> +       struct ubiblock *dev =
>> +               container_of(work, struct ubiblock, work);
>> +       struct request_queue *rq = dev->rq;
>> +       struct request *req;
>> +       int res;
>> +
>> +       spin_lock_irq(rq->queue_lock);
>> +
>> +       req = blk_fetch_request(rq);
>> +       while (req) {
>> +
>> +               spin_unlock_irq(rq->queue_lock);
>> +
>> +               mutex_lock(&dev->vol_mutex);
>> +               res = do_ubiblock_request(dev, req);
>> +               mutex_unlock(&dev->vol_mutex);
>> +
>> +               spin_lock_irq(rq->queue_lock);
>> +
>> +               /*
>> +                * If we're done with this request,
>> +                * we need to fetch a new one
>> +                */
>> +               if (!__blk_end_request_cur(req, res))
>> +                       req = blk_fetch_request(rq);
>> +       }
>> +
>> +       spin_unlock_irq(rq->queue_lock);
>> +}
>> +
>> +static void ubiblock_request(struct request_queue *rq)
>> +{
>> +       struct ubiblock *dev;
>> +       struct request *req;
>> +
>> +       dev = rq->queuedata;
>> +
>> +       if (!dev)
>> +               while ((req = blk_fetch_request(rq)) != NULL)
>> +                       __blk_end_request_all(req, -ENODEV);
>> +       else
>> +               queue_work(dev->wq, &dev->work);
>> +}
>> +
>> +static int ubiblock_alloc_cache(struct ubiblock_cache *cache, int size)
>> +{
>> +       cache->state = STATE_EMPTY;
>> +       cache->leb_num = -1;
>> +       cache->buffer = vmalloc(size);
>> +       if (!cache->buffer)
>> +               return -ENOMEM;
>> +       return 0;
>> +}
>> +
>> +static void ubiblock_free_cache(struct ubiblock_cache *cache)
>> +{
>> +       cache->leb_num = -1;
>> +       cache->state = STATE_EMPTY;
>> +       vfree(cache->buffer);
>> +}
>> +
>> +static int ubiblock_open(struct block_device *bdev, fmode_t mode)
>> +{
>> +       struct ubiblock *dev = bdev->bd_disk->private_data;
>> +       int ubi_mode = UBI_READONLY;
>> +       int ret;
>> +
>> +       mutex_lock(&dev->vol_mutex);
>> +       if (dev->refcnt > 0) {
>> +               /*
>> +                * The volume is already opened,
>> +                * just increase the reference counter
>> +                */
>> +               dev->refcnt++;
>> +               mutex_unlock(&dev->vol_mutex);
>> +               return 0;
>> +       }
>> +
>> +       if (mode & FMODE_WRITE)
>> +               ubi_mode = UBI_READWRITE;
>> +
>> +       dev->desc = ubi_open_volume(dev->ubi_num, dev->vol_id, ubi_mode);
>> +       if (IS_ERR(dev->desc)) {
>> +               dev_err(disk_to_dev(dev->gd),
>> +                       "failed to open ubi volume %d_%d\n",
>> +                       dev->ubi_num, dev->vol_id);
>> +
>> +               ret = PTR_ERR(dev->desc);
>> +               dev->desc = NULL;
>> +               goto out_unlock;
>> +       }
>> +
>> +       dev->vi = kzalloc(sizeof(struct ubi_volume_info), GFP_KERNEL);
>> +       if (!dev->vi) {
>> +               ret = -ENOMEM;
>> +               goto out_close;
>> +       }
>> +       ubi_get_volume_info(dev->desc, dev->vi);
>> +       dev->leb_size = dev->vi->usable_leb_size;
>> +
>> +       ret = ubiblock_alloc_cache(&dev->read_cache, dev->leb_size);
>> +       if (ret)
>> +               goto out_free;
>> +
>> +       ret = ubiblock_alloc_cache(&dev->write_cache, dev->leb_size);
>> +       if (ret)
>> +               goto out_free_cache;
>> +
>> +       dev->refcnt++;
>> +       mutex_unlock(&dev->vol_mutex);
>> +       return 0;
>> +
>> +out_free_cache:
>> +       ubiblock_free_cache(&dev->read_cache);
>> +out_free:
>> +       kfree(dev->vi);
>> +out_close:
>> +       ubi_close_volume(dev->desc);
>> +       dev->desc = NULL;
>> +out_unlock:
>> +       mutex_unlock(&dev->vol_mutex);
>> +       return ret;
>> +}
>> +
>> +static int ubiblock_release(struct gendisk *gd, fmode_t mode)
>> +{
>> +       struct ubiblock *dev = gd->private_data;
>> +
>> +       mutex_lock(&dev->vol_mutex);
>> +
>> +       dev->refcnt--;
>> +       if (dev->refcnt == 0) {
>> +               ubiblock_flush(dev, true);
>> +
>> +               ubiblock_free_cache(&dev->read_cache);
>> +               ubiblock_free_cache(&dev->write_cache);
>> +
>> +               kfree(dev->vi);
>> +               ubi_close_volume(dev->desc);
>> +
>> +               dev->vi = NULL;
>> +               dev->desc = NULL;
>> +       }
>> +
>> +       mutex_unlock(&dev->vol_mutex);
>> +       return 0;
>> +}
>> +
>> +static int ubiblock_ioctl(struct block_device *bdev, fmode_t mode,
>> +                             unsigned int cmd, unsigned long arg)
>> +{
>> +       struct ubiblock *dev = bdev->bd_disk->private_data;
>> +       int ret = -ENXIO;
>
> No need to initialize ret.
>

Actually, that's needed because Il return it below. I can change that
to return -ENXIO, of course.

>> +       if (!dev)
>> +               return ret;
>> +
>> +       mutex_lock(&dev->vol_mutex);
>> +
>> +       /* I can't get this to get called. What's going on? */
>> +       switch (cmd) {
>> +       case BLKFLSBUF:
>> +               ret = ubiblock_flush(dev, true);
>> +               break;
>> +       default:
>> +               ret = -ENOTTY;
>> +       }
>> +
>> +       mutex_unlock(&dev->vol_mutex);
>> +       return ret;
>> +}
>> +
>> +static int ubiblock_getgeo(struct block_device *bdev, struct hd_geometry *geo)
>> +{
>> +       /* Some tools might require this information */
>> +       geo->heads = 1;
>> +       geo->cylinders = 1;
>> +       geo->sectors = get_capacity(bdev->bd_disk);
>> +       geo->start = 0;
>> +       return 0;
>> +}
>> +
>> +static const struct block_device_operations ubiblock_ops = {
>> +       .owner = THIS_MODULE,
>> +       .open = ubiblock_open,
>> +       .release = ubiblock_release,
>> +       .ioctl = ubiblock_ioctl,
>> +       .getgeo = ubiblock_getgeo,
>> +};
>> +
>> +static int ubiblock_add(struct ubi_volume_info *vi)
>> +{
>> +       struct ubiblock *dev;
>> +       struct gendisk *gd;
>> +       int disk_capacity;
>> +       int ret;
>
> Please but variables of equal type in the same line.
>

Yes. But is this a CodingStyle rule?

>> +       /* Check that the volume isn't already handled */
>> +       mutex_lock(&devices_mutex);
>> +       if (find_dev_nolock(vi->ubi_num, vi->vol_id)) {
>> +               mutex_unlock(&devices_mutex);
>> +               return -EEXIST;
>> +       }
>> +       mutex_unlock(&devices_mutex);
>> +
>> +       dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL);
>> +       if (!dev)
>> +               return -ENOMEM;
>> +
>> +       mutex_init(&dev->vol_mutex);
>> +
>> +       dev->ubi_num = vi->ubi_num;
>> +       dev->vol_id = vi->vol_id;
>> +
>> +       /* Initialize the gendisk of this ubiblock device */
>> +       gd = alloc_disk(1);
>> +       if (!gd) {
>> +               pr_err("alloc_disk failed\n");
>> +               ret = -ENODEV;
>> +               goto out_free_dev;
>> +       }
>> +
>> +       gd->fops = &ubiblock_ops;
>> +       gd->major = ubiblock_major;
>> +       gd->first_minor = dev->ubi_num * UBI_MAX_VOLUMES + dev->vol_id;
>> +       gd->private_data = dev;
>> +       sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
>> +       disk_capacity = (vi->size * vi->usable_leb_size) >> 9;
>> +       set_capacity(gd, disk_capacity);
>> +       dev->gd = gd;
>> +
>> +       spin_lock_init(&dev->queue_lock);
>> +       dev->rq = blk_init_queue(ubiblock_request, &dev->queue_lock);
>> +       if (!dev->rq) {
>> +               pr_err("blk_init_queue failed\n");
>> +               ret = -ENODEV;
>> +               goto out_put_disk;
>> +       }
>> +
>> +       dev->rq->queuedata = dev;
>> +       dev->gd->queue = dev->rq;
>> +
>> +       blk_queue_flush(dev->rq, REQ_FLUSH);
>> +
>> +       /* TODO: Is performance better or worse with this flag? */
>
> Find out yourself. Run some benchmarks. :)
>
>> +       /* queue_flag_set_unlocked(QUEUE_FLAG_NONROT, dev->rq);*/
>> +
>> +       /*
>> +        * Create one workqueue per volume (per registered block device).
>> +        * Rembember workqueues are cheap, they're not threads.
>> +        */
>> +       dev->wq = alloc_workqueue(gd->disk_name, 0, 0);
>> +       if (!dev->wq)
>> +               goto out_free_queue;
>> +       INIT_WORK(&dev->work, ubiblock_do_work);
>> +
>> +       mutex_lock(&devices_mutex);
>> +       list_add_tail(&dev->list, &ubiblock_devices);
>> +       mutex_unlock(&devices_mutex);
>> +
>> +       /* Must be the last step: anyone can call file ops from now on */
>> +       add_disk(dev->gd);
>> +
>> +       dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)\n",
>> +                dev->ubi_num, dev->vol_id, vi->name);
>> +
>> +       return 0;
>> +
>> +out_free_queue:
>> +       blk_cleanup_queue(dev->rq);
>> +out_put_disk:
>> +       put_disk(dev->gd);
>> +out_free_dev:
>> +       kfree(dev);
>> +
>> +       return ret;
>> +}
>> +
>> +static void ubiblock_cleanup(struct ubiblock *dev)
>> +{
>> +       del_gendisk(dev->gd);
>> +       blk_cleanup_queue(dev->rq);
>> +       put_disk(dev->gd);
>> +}
>> +
>> +static void ubiblock_del(struct ubi_volume_info *vi)
>> +{
>> +       struct ubiblock *dev;
>> +
>> +       mutex_lock(&devices_mutex);
>> +       dev = find_dev_nolock(vi->ubi_num, vi->vol_id);
>> +       if (!dev) {
>> +               mutex_unlock(&devices_mutex);
>> +               return;
>> +       }
>> +       /* Remove from device list */
>> +       list_del(&dev->list);
>> +       mutex_unlock(&devices_mutex);
>> +
>> +       /* Flush pending work and stop this workqueue */
>> +       destroy_workqueue(dev->wq);
>> +
>> +       mutex_lock(&dev->vol_mutex);
>> +
>> +       /*
>> +        * This means that ubiblock device is opened and in usage.
>> +        * However, this shouldn't happen, since we have
>> +        * called ubi_open_volume() at open() time, thus preventing
>> +        * volume removal.
>> +        */
>> +       WARN_ON(dev->desc);
>> +       ubiblock_cleanup(dev);
>> +
>> +       mutex_unlock(&dev->vol_mutex);
>> +
>> +       kfree(dev);
>> +}
>> +
>> +static void ubiblock_resize(struct ubi_volume_info *vi)
>> +{
>> +       struct ubiblock *dev;
>> +       int disk_capacity;
>> +
>> +       /*
>> +        * We don't touch the list, but we better lock it: it could be that the
>> +        * device gets removed between the time the device has been found and
>> +        * the time we access dev->gd
>> +        */
>> +       mutex_lock(&devices_mutex);
>> +       dev = find_dev_nolock(vi->ubi_num, vi->vol_id);
>> +       if (!dev) {
>> +               mutex_unlock(&devices_mutex);
>> +               return;
>> +       }
>> +       mutex_unlock(&devices_mutex);
>
> You can unlock the mutex directl after find_dev_nolock().
> No need to write it twice.
>

True.

>> +       mutex_lock(&dev->vol_mutex);
>> +       disk_capacity = (vi->size * vi->usable_leb_size) >> 9;
>> +       set_capacity(dev->gd, disk_capacity);
>> +       dev_dbg(disk_to_dev(dev->gd), "resized to %d LEBs\n", vi->size);
>> +       mutex_unlock(&dev->vol_mutex);
>> +}
>> +
>> +static int ubiblock_notify(struct notifier_block *nb,
>> +                        unsigned long notification_type, void *ns_ptr)
>> +{
>> +       struct ubi_notification *nt = ns_ptr;
>> +
>> +       switch (notification_type) {
>> +       case UBI_VOLUME_ADDED:
>> +               if (0)
>> +                       ubiblock_add(&nt->vi);
>
> if (0) !?
>

Oops, forgot to remove this one.

>> +               break;
>> +       case UBI_VOLUME_REMOVED:
>> +               ubiblock_del(&nt->vi);
>> +               break;
>> +       case UBI_VOLUME_RESIZED:
>> +               ubiblock_resize(&nt->vi);
>> +               break;
>> +       default:
>> +               break;
>> +       }
>> +       return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block ubiblock_notifier = {
>> +       .notifier_call = ubiblock_notify,
>> +};
>> +
>> +static struct ubi_volume_desc *
>> +open_volume_desc(const char *name, int ubi_num, int vol_id)
>> +{
>> +       if (ubi_num == -1)
>> +               /* No ubi num, name must be a vol device path */
>> +               return ubi_open_volume_path(name, UBI_READONLY);
>> +       else if (vol_id == -1)
>> +               /* No vol_id, must be vol_name */
>> +               return ubi_open_volume_nm(ubi_num, name, UBI_READONLY);
>> +       else
>> +               return ubi_open_volume(ubi_num, vol_id, UBI_READONLY);
>> +}
>> +
>> +static void ubiblock_register_vol_param(void)
>> +{
>> +       int i, ret;
>> +       struct ubiblock_vol_param *p;
>> +       struct ubi_volume_desc *desc;
>> +       struct ubi_volume_info vi;
>> +
>> +       for (i = 0; i < ubiblock_devs; i++) {
>> +               p = &ubiblock_vol_param[i];
>> +
>> +               desc = open_volume_desc(p->name, p->ubi_num, p->vol_id);
>> +               if (IS_ERR(desc))
>> +                       continue;
>> +
>> +               ubi_get_volume_info(desc, &vi);
>> +               ret = ubiblock_add(&vi);
>> +               if (ret)
>> +                       pr_warn("can't add '%s' volume, err=%d\n",
>> +                               vi.name, ret);
>> +
>> +               ubi_close_volume(desc);
>> +       }
>> +}
>> +
>> +static int __init ubiblock_init(void)
>> +{
>> +       ubiblock_major = register_blkdev(0, "ubiblock");
>> +       if (ubiblock_major < 0)
>> +               return ubiblock_major;
>> +
>> +       ubiblock_register_vol_param();
>> +
>> +       /*
>> +        * Blocks will get registered dynamically.
>> +        * Each ubi volume will get a corresponding block device.
>> +        */
>> +       return ubi_register_volume_notifier(&ubiblock_notifier, 1);
>> +}
>> +
>> +static void __exit ubiblock_exit(void)
>> +{
>> +       struct ubiblock *next;
>> +       struct ubiblock *dev;
>
> No need to write two lines. Put next and dev in the same one.
>

True.

>> +       ubi_unregister_volume_notifier(&ubiblock_notifier);
>> +
>> +       list_for_each_entry_safe(dev, next, &ubiblock_devices, list) {
>> +
>> +               /* Flush pending work and stop workqueue */
>> +               destroy_workqueue(dev->wq);
>> +
>> +               /* The module is being forcefully removed */
>> +               WARN_ON(dev->desc);
>> +
>> +               /* Remove from device list */
>> +               list_del(&dev->list);
>> +
>> +               ubiblock_cleanup(dev);
>> +
>> +               kfree(dev);
>> +       }
>> +
>> +       unregister_blkdev(ubiblock_major, "ubiblock");
>> +}
>> +
>> +module_init(ubiblock_init);
>> +module_exit(ubiblock_exit);
>> +
>> +MODULE_DESCRIPTION("Block device emulation access to UBI volumes");
>
> I'm not a native speaker but this sentence sound strange to me.
>

Could be. Any suggestion?

Thanks!

    Ezequiel
Greg KH - Dec. 12, 2012, 4:30 p.m.
On Wed, Dec 12, 2012 at 05:14:41PM +0100, richard -rw- weinberger wrote:
> On Wed, Dec 12, 2012 at 4:50 PM, Gregory CLEMENT <gclement00@gmail.com> wrote:
> > 2012/12/12 richard -rw- weinberger <richard.weinberger@gmail.com>:
> >> Hi!
> >>
> >> A few comments...
> >>
> >> On Wed, Dec 12, 2012 at 1:21 PM, Ezequiel Garcia <elezegarcia@gmail.com> wrote:
> >>> --- /dev/null
> >>> +++ b/drivers/mtd/ubi/ubiblock.c
> >>> @@ -0,0 +1,830 @@
> >>> +/*
> >>> + * Copyright (c) 2012 Ezequiel Garcia
> >>> + * Copyright (c) 2011 Free Electrons
> >>> + *
> >>> + * This program is free software; you can redistribute it and/or modify
> >>> + * it under the terms of the GNU General Public License as published by
> >>> + * the Free Software Foundation; either version 2 of the License, or
> >>> + * (at your option) any later version.
> >>
> >> Linux is GPLv2 and not v2 or any later version...
> >
> > I think it is more complicate than this.
> > In the kernel you have code under GPLv2, GPLv2+ and even BSD licenses.
> > All these licenses are compatible but if you want to distribute all the sources
> > of the kernel then you are under the more restrictive license: GPLv2.
> > So we can say that kernel is GPLv2, but it doesn't prevent to submit code
> > under GPLv2+.
> 
> The kernel is GPLv2. period.

The overall kernel is, yes, but individual files can also have
additional licenses on them.

> > There seems to have a couple of files under GPLv2:
> > git grep "(at your option) any later version" | wc -l
> > 6800
> 
> These lines are wrong.

Not true at all, please consult a lawyer if you are unsure.

greg k-h
richard -rw- weinberger - Dec. 12, 2012, 4:32 p.m.
On Wed, Dec 12, 2012 at 5:30 PM, Greg KH <gregkh@linuxfoundation.org> wrote:
> The overall kernel is, yes, but individual files can also have
> additional licenses on them.

Okay, that is new to me. Thanks for explaining!
Ezequiel Garcia - Dec. 12, 2012, 4:35 p.m.
On Wed, Dec 12, 2012 at 1:18 PM, Ezequiel Garcia <elezegarcia@gmail.com> wrote:
> Hi Richard,
>
> Thanks for the feedback.
> Yeah I forgot the remove some clumsy or work-in-progress code. Shame on me :-(
>
> Anyway, I wanted to post the new cache shape and the new module parameter
> to get some feedback. I'd like to decide if write support is useful or not.
>
> On Wed, Dec 12, 2012 at 12:21 PM, richard -rw- weinberger
> <richard.weinberger@gmail.com> wrote:
>> Hi!
>>
>> A few comments...
>>
>> On Wed, Dec 12, 2012 at 1:21 PM, Ezequiel Garcia <elezegarcia@gmail.com> wrote:
>>> --- /dev/null
>>> +++ b/drivers/mtd/ubi/ubiblock.c
>>> @@ -0,0 +1,830 @@
>>> +/*
>>> + * Copyright (c) 2012 Ezequiel Garcia
>>> + * Copyright (c) 2011 Free Electrons
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License as published by
>>> + * the Free Software Foundation; either version 2 of the License, or
>>> + * (at your option) any later version.
>>
>> Linux is GPLv2 and not v2 or any later version...
>>
>
> True.
>

On a second thought and considering this code is originally
copyrighted by Free Electrons,
I'd guess I need their permission to make a license change (right?).

So considering Greg's comments, I'll leave it as it is and let Free
Electrons decide.

Thanks,

    Ezequiel
Michael Opdenacker - Dec. 12, 2012, 5:26 p.m.
On 12/12/2012 05:35 PM, Ezequiel Garcia wrote:
> On a second thought and considering this code is originally
> copyrighted by Free Electrons,
> I'd guess I need their permission to make a license change (right?).
>
> So considering Greg's comments, I'll leave it as it is and let Free
> Electrons decide.

OK for GPLv2 only. You have Free Electron's permission.

Thanks for asking!

Michael.
Tim Bird - Dec. 12, 2012, 7:50 p.m.
On 12/12/2012 09:26 AM, Michael Opdenacker wrote:
> On 12/12/2012 05:35 PM, Ezequiel Garcia wrote:
>> On a second thought and considering this code is originally
>> copyrighted by Free Electrons,
>> I'd guess I need their permission to make a license change (right?).
>>
>> So considering Greg's comments, I'll leave it as it is and let Free
>> Electrons decide.
> 
> OK for GPLv2 only. You have Free Electron's permission.
> 
> Thanks for asking!

I'm not sure if this code is related to the UBI code that
Free Electrons worked on for CELF some time ago or not -- but
CELF's preference (and CE Workgroup's preference also)
for any code contributions to the kernel is GPLv2 only.

Just FYI.
 -- Tim

=============================
Tim Bird
Architecture Group Chair, CE Workgroup of the Linux Foundation
Senior Staff Engineer, Sony Network Entertainment
=============================
Mike Frysinger - April 18, 2013, 8:30 p.m.
On Wednesday 12 December 2012 07:21:52 Ezequiel Garcia wrote:
> Block device emulation on top of ubi volumes with read/write support.
> Block devices are created upon user request through the
> 'vol' module parameter.
> 
> For instance,
> 
>   $ modprobe ubiblock vol=/dev/ubi0_0
>   $ modprobe ubiblock vol=0,rootfs

i played around with ubiblk before finding this newer version.  one thing i 
think this is missing that ubiblk had is an ioctl interface for adding new 
block volumes on the fly.  you can attach ubi volumes at runtime, but the only 
way to attach ubiblocks is by loading/unloading the module, or by rebooting 
and tweaking the command line.

imo, that support needs to be re-added.  it'd be great if we could do it via 
the existing /dev/ubi_ctrl knob, but maybe that'll only work if ubi+ubiblock 
are built into the kernel, or if ubiblock is merged with ubi ?

> Read/write access is expected to work fairly well because the
> request queue at block elevator orders block transfers to achieve
> space-locality.
> In other words, it's expected that reads and writes gets ordered
> to address the same LEB.

i wonder if the write support should be put behind a CONFIG option.  
personally, the write support is kind of neat and semi-useful for development, 
but i don't plan on shipping anything on that :).  i just want read-only 
support to load an ext2 fs on top of UBI.

> +static int ubiblock_open(struct block_device *bdev, fmode_t mode)
> +{
> +	struct ubiblock *dev = bdev->bd_disk->private_data;
> +	int ubi_mode = UBI_READONLY;
> +	int ret;
> +
> +	mutex_lock(&dev->vol_mutex);
> +	if (dev->refcnt > 0) {
> +		/*
> +		 * The volume is already opened,
> +		 * just increase the reference counter
> +		 */
> +		dev->refcnt++;
> +		mutex_unlock(&dev->vol_mutex);
> +		return 0;
> +	}
> +
> +	if (mode & FMODE_WRITE)
> +		ubi_mode = UBI_READWRITE;

hmm, you handle ro vs rw here ...

> +	ret = ubiblock_alloc_cache(&dev->read_cache, dev->leb_size);
> +	if (ret)
> +		goto out_free;
> +
> +	ret = ubiblock_alloc_cache(&dev->write_cache, dev->leb_size);
> +	if (ret)
> +		goto out_free_cache;

... but you always alloc a write cache even when it's mounted ro ?
-mike
Peter Korsgaard - April 18, 2013, 8:58 p.m.
>>>>> "Mike" == Mike Frysinger <vapier@gentoo.org> writes:

Hi,

 Mike> i wonder if the write support should be put behind a CONFIG
 Mike> option.  personally, the write support is kind of neat and
 Mike> semi-useful for development, but i don't plan on shipping
 Mike> anything on that :).  i just want read-only support to load an
 Mike> ext2 fs on top of UBI.

Agreed, I think most people would just use ubifs if they want a
read/write fs on ubi, and ubiblk for ro ext/squashfs/..
Ezequiel Garcia - April 19, 2013, 12:13 a.m.
Hi Mike,

On Thu, Apr 18, 2013 at 04:30:55PM -0400, Mike Frysinger wrote:
> On Wednesday 12 December 2012 07:21:52 Ezequiel Garcia wrote:
> > Block device emulation on top of ubi volumes with read/write support.
> > Block devices are created upon user request through the
> > 'vol' module parameter.
> > 
> > For instance,
> > 
> >   $ modprobe ubiblock vol=/dev/ubi0_0
> >   $ modprobe ubiblock vol=0,rootfs
> 
> i played around with ubiblk before finding this newer version.  one thing i 
> think this is missing that ubiblk had is an ioctl interface for adding new 
> block volumes on the fly.  you can attach ubi volumes at runtime, but the only 
> way to attach ubiblocks is by loading/unloading the module, or by rebooting 
> and tweaking the command line.
> 
> imo, that support needs to be re-added.  it'd be great if we could do it via 
> the existing /dev/ubi_ctrl knob, but maybe that'll only work if ubi+ubiblock 
> are built into the kernel, or if ubiblock is merged with ubi ?
>

Yes, such support should be re-added. I'll think about it.

> > Read/write access is expected to work fairly well because the
> > request queue at block elevator orders block transfers to achieve
> > space-locality.
> > In other words, it's expected that reads and writes gets ordered
> > to address the same LEB.
> 
> i wonder if the write support should be put behind a CONFIG option.  
> personally, the write support is kind of neat and semi-useful for development, 
> but i don't plan on shipping anything on that :).  i just want read-only 
> support to load an ext2 fs on top of UBI.
> 

Mmm... good input. Maybe putting write support behind a CONFIG and
showing a big fat warning when the module loads will do?
(something to prevent regular users from using this carelessly).

May I ask why would you want to put ext2 fs? Have you considered f2fs?

> > +static int ubiblock_open(struct block_device *bdev, fmode_t mode)
> > +{
> > +	struct ubiblock *dev = bdev->bd_disk->private_data;
> > +	int ubi_mode = UBI_READONLY;
> > +	int ret;
> > +
> > +	mutex_lock(&dev->vol_mutex);
> > +	if (dev->refcnt > 0) {
> > +		/*
> > +		 * The volume is already opened,
> > +		 * just increase the reference counter
> > +		 */
> > +		dev->refcnt++;
> > +		mutex_unlock(&dev->vol_mutex);
> > +		return 0;
> > +	}
> > +
> > +	if (mode & FMODE_WRITE)
> > +		ubi_mode = UBI_READWRITE;
> 
> hmm, you handle ro vs rw here ...
> 
> > +	ret = ubiblock_alloc_cache(&dev->read_cache, dev->leb_size);
> > +	if (ret)
> > +		goto out_free;
> > +
> > +	ret = ubiblock_alloc_cache(&dev->write_cache, dev->leb_size);
> > +	if (ret)
> > +		goto out_free_cache;
> 
> ... but you always alloc a write cache even when it's mounted ro ?

Good catch.

I'll see if I can cook a v3 one of these days.
Mike Frysinger - April 19, 2013, 1:28 a.m.
On Thursday 18 April 2013 20:13:00 Ezequiel Garcia wrote:
> On Thu, Apr 18, 2013 at 04:30:55PM -0400, Mike Frysinger wrote:
> > i wonder if the write support should be put behind a CONFIG option.
> > personally, the write support is kind of neat and semi-useful for
> > development, but i don't plan on shipping anything on that :).  i just
> > want read-only support to load an ext2 fs on top of UBI.
> 
> Mmm... good input. Maybe putting write support behind a CONFIG and
> showing a big fat warning when the module loads will do?
> (something to prevent regular users from using this carelessly).
> 
> May I ask why would you want to put ext2 fs? Have you considered f2fs?

well, unless i misread things, f2fs is designed for consumer flash 
(mmc/cf/etc...) where the flash (e.g. NAND) is behind a FTL.  the device i'm 
interested isn't behind a FTL (the NAND is connected directly to the NAND 
controller in the SoC), so i want to run UBI on it to handle the bad blocks 
and junk.  otherwise, the ext2 is always read-only, so the write 
characteristics don't matter.
-mike
pekon gupta - April 19, 2013, 7:10 a.m.
> On Thursday 18 April 2013 20:13:00 Ezequiel Garcia wrote:
> > On Thu, Apr 18, 2013 at 04:30:55PM -0400, Mike Frysinger wrote:
> > > i wonder if the write support should be put behind a CONFIG option.
> > > personally, the write support is kind of neat and semi-useful for
> > > development, but i don't plan on shipping anything on that :).  i just
> > > want read-only support to load an ext2 fs on top of UBI.
> >
> > Mmm... good input. Maybe putting write support behind a CONFIG
> and
> > showing a big fat warning when the module loads will do?
> > (something to prevent regular users from using this carelessly).
> >
[Pekon]: Permanent disabling of WRITE(s) may not be good approach. 
As it constrains you from updating on-field device remotely, especially
when your boot-loaders are not in your control.
OR Unless you have a back-door mechanism or loop-hole, which you 
can also share with your customers (unlikely). 

[Pekon]: Other options for S/W driven Write-access-control are:
(1) Using Write-Protect pin on NAND device, controlled via secure GPIO
(2) Using mtd_lock and mtd_unlock mechanism.
	(a) your device should support this feature,
	(b) I think this is not supported for MTDBLOCK devices, and
	(c) need to populate following functions accordingly.
	drivers/mtd/nand/nand_base.c
	int nand_scan_tail(struct mtd_info *mtd)
	-       mtd->_lock = NULL;
	-       mtd->_unlock = NULL;
	+       mtd->_lock = nand_lock;
	+       mtd->_unlock = nand_unlock;

-- Bit away from topic of discussion --
Currently for MTDBLOCK devices the readonly setting is decided
at device addition time.
drivers/mtd/mtdblock.c : mtdblock_add_mtd()
if (!(mtd->flags & MTD_WRITEABLE))
	dev->mbd.readonly = 1;

But I think a similar write-protection mechanism may be added 
for MTDBLOCK devices also, thereby giving low-level access control
Example:
if (mtd->_lock && mtd->_unlock)
	dev->mbd.supports_write_protection = 1;

Does anyone see a benefit in this?


with regards, pekon
Mike Frysinger - April 19, 2013, 11:57 a.m.
On Friday 19 April 2013 03:10:23 Gupta, Pekon wrote:
> > On Thursday 18 April 2013 20:13:00 Ezequiel Garcia wrote:
> > > On Thu, Apr 18, 2013 at 04:30:55PM -0400, Mike Frysinger wrote:
> > > > i wonder if the write support should be put behind a CONFIG option.
> > > > personally, the write support is kind of neat and semi-useful for
> > > > development, but i don't plan on shipping anything on that :).  i
> > > > just want read-only support to load an ext2 fs on top of UBI.
> > > 
> > > Mmm... good input. Maybe putting write support behind a CONFIG
> > > and showing a big fat warning when the module loads will do?
> > > (something to prevent regular users from using this carelessly).
> 
> [Pekon]: Permanent disabling of WRITE(s) may not be good approach.
> As it constrains you from updating on-field device remotely, especially
> when your boot-loaders are not in your control.

err, no it doesn't.  we're talking about write support at the UBI *block* 
layer.  nothing is stopping you from writing to the UBI volume directly like 
normal (e.g. ubiupdatevol/etc...).

the reason we're talking about not allowing write support at the *block* layer 
is because it's questionable how many people actually want this, the 
performance isn't good (compared to native flash filesystems), and because the 
write/wear characteristics are unknown.

> [Pekon]: Other options for S/W driven Write-access-control are:

hence, these are all unrelated issues.
-mike
Ezequiel Garcia - April 19, 2013, 12:27 p.m.
On Thu, Apr 18, 2013 at 09:28:16PM -0400, Mike Frysinger wrote:
> On Thursday 18 April 2013 20:13:00 Ezequiel Garcia wrote:
> > On Thu, Apr 18, 2013 at 04:30:55PM -0400, Mike Frysinger wrote:
> > > i wonder if the write support should be put behind a CONFIG option.
> > > personally, the write support is kind of neat and semi-useful for
> > > development, but i don't plan on shipping anything on that :).  i just
> > > want read-only support to load an ext2 fs on top of UBI.
> > 
> > Mmm... good input. Maybe putting write support behind a CONFIG and
> > showing a big fat warning when the module loads will do?
> > (something to prevent regular users from using this carelessly).
> > 
> > May I ask why would you want to put ext2 fs? Have you considered f2fs?
> 
> well, unless i misread things, f2fs is designed for consumer flash 
> (mmc/cf/etc...) where the flash (e.g. NAND) is behind a FTL.  the device i'm 
> interested isn't behind a FTL (the NAND is connected directly to the NAND 
> controller in the SoC),

Indeed f2fs is designed for flash behind FTL. The thing is the ubi+ubiblock
should act as a FTL, providing bad block management and wear leveling.

So that's why I've been wondering about using f2fs. Probably I should
give it a try and post the results, instead of just wondering.
Artem Bityutskiy - April 19, 2013, 12:31 p.m.
On Fri, 2013-04-19 at 07:57 -0400, Mike Frysinger wrote:
> the reason we're talking about not allowing write support at the
> *block* layer 
> is because it's questionable how many people actually want this, the 
> performance isn't good (compared to native flash filesystems), and
> because the 
> write/wear characteristics are unknown.

My opinion that unless people demonstrate that they need this, e.g. in a
product, etc, we should not merge this stuff.

We recently merged fastmap, which is a big chunk of code, and it looks
like no one really needs it. There was a problem report, and Richard
promised to look, but did not. I do not blame him, he is a busy guy. But
this shoes that this feature is not really needed, while adds
maintenance burden.

To put it differently, I do recommend to merge more UBI-related code
without a solid user-base.
Bityutskiy, Artem - April 19, 2013, 12:31 p.m.
On Fri, 2013-04-19 at 15:31 +0300, Artem Bityutskiy wrote:
> To put it differently, I do recommend to merge more UBI-related code
> without a solid user-base.

Sorry, not my day. Of course I meant *do not* recommend.

We really have enough of dead MDT-related stuff. Look at logfs - a large
code base which oopses if you stress-test it and has no users. We do not
really need more of stuff like that.
Mike Frysinger - April 19, 2013, 12:53 p.m.
On Friday 19 April 2013 08:31:13 Artem Bityutskiy wrote:
> On Fri, 2013-04-19 at 07:57 -0400, Mike Frysinger wrote:
> > the reason we're talking about not allowing write support at the
> > *block* layer
> > is because it's questionable how many people actually want this, the
> > performance isn't good (compared to native flash filesystems), and
> > because the
> > write/wear characteristics are unknown.
> 
> My opinion that unless people demonstrate that they need this, e.g. in a
> product, etc, we should not merge this stuff.
> 
> We recently merged fastmap, which is a big chunk of code, and it looks
> like no one really needs it. There was a problem report, and Richard
> promised to look, but did not. I do not blame him, he is a busy guy. But
> this shoes that this feature is not really needed, while adds
> maintenance burden.
> 
> To put it differently, I do recommend to merge more UBI-related code
> without a solid user-base.

also to be clear, i plan on using UBI block in read-only mode, so i'd like to 
see an ubiblock driver merged
-mike
Matthieu CASTET - April 19, 2013, 12:55 p.m.
Mike Frysinger a écrit :
> On Friday 19 April 2013 08:31:13 Artem Bityutskiy wrote:
>> On Fri, 2013-04-19 at 07:57 -0400, Mike Frysinger wrote:
>>> the reason we're talking about not allowing write support at the
>>> *block* layer
>>> is because it's questionable how many people actually want this, the
>>> performance isn't good (compared to native flash filesystems), and
>>> because the
>>> write/wear characteristics are unknown.
>> My opinion that unless people demonstrate that they need this, e.g. in a
>> product, etc, we should not merge this stuff.
>>
>> We recently merged fastmap, which is a big chunk of code, and it looks
>> like no one really needs it. There was a problem report, and Richard
>> promised to look, but did not. I do not blame him, he is a busy guy. But
>> this shoes that this feature is not really needed, while adds
>> maintenance burden.
>>
>> To put it differently, I do recommend to merge more UBI-related code
>> without a solid user-base.
> 
> also to be clear, i plan on using UBI block in read-only mode, so i'd like to 
> see an ubiblock driver merged
>
Why can't you use gluebi + mtdblock ?

Too much overhead ?


Matthieu
Artem Bityutskiy - April 19, 2013, 12:56 p.m.
On Fri, 2013-04-19 at 08:53 -0400, Mike Frysinger wrote:
> also to be clear, i plan on using UBI block in read-only mode, so i'd
> like to 
> see an ubiblock driver merged

OK, sounds good!
Ezequiel Garcia - April 19, 2013, 3:02 p.m.
Hi Artem,

On Fri, Apr 19, 2013 at 03:31:13PM +0300, Artem Bityutskiy wrote:
> On Fri, 2013-04-19 at 07:57 -0400, Mike Frysinger wrote:
> > the reason we're talking about not allowing write support at the
> > *block* layer 
> > is because it's questionable how many people actually want this, the 
> > performance isn't good (compared to native flash filesystems), and
> > because the 
> > write/wear characteristics are unknown.
> 
> My opinion that unless people demonstrate that they need this, e.g. in a
> product, etc, we should not merge this stuff.
> 
> We recently merged fastmap, which is a big chunk of code, and it looks
> like no one really needs it. There was a problem report, and Richard
> promised to look, but did not. I do not blame him, he is a busy guy. But
> this shoes that this feature is not really needed, while adds
> maintenance burden.
> 
> To put it differently, I do recommend to merge more UBI-related code
> without a solid user-base.
>

(I know you meant don't)

Of course, I agree that we shouldn't merge stuff just for the sake of it.
I'm always against adding churn.

However, IMHO, fastmap and ubiblock are not comparable **at all**:

  1. Ubiblock is *extremely* simple and *completely* independent from mtd/ubi
     stack.

  2. Fastmap is a cool UBI feature, that's pretty tightly integrated
     with the UBI core and -at least to me- not very easy to understand
     and follow.
Tim Bird - April 19, 2013, 5:05 p.m.
On 04/19/2013 05:31 AM, Artem Bityutskiy wrote:
> We recently merged fastmap, which is a big chunk of code, and it looks
> like no one really needs it. There was a problem report, and Richard
> promised to look, but did not. I do not blame him, he is a busy guy. But
> this shows that this feature is not really needed, while adds
> maintenance burden.

Whoa there!  Just because it doesn't look like people are using fastmap
right now, don't think it won't get used in the future.   Really,
the people interested in fastmap are AFAIK mostly TV vendors, who are
just now migrating to 3.0 or 3.4 kernels.  I can assure you that fastmap
will get more attention when the major CE companies move to more
recent kernels (probably in about 1 to 2 years).

Likely, anyone who needs fastmap today has backported it, or plans
to backport it to their current product-facing kernel.  Unfortunately,
that means that bug reports and fixes usually don't show up on
public e-mail lists (due to version gap).

Richard (/Linutronix) was hired to do this work by the CE Workgroup.
If money is needed to get fastmap bugfixes and maintenance on his
priority list, please let us know and maybe we can do something.
 -- Tim

=============================
Tim Bird
Architecture Group Chair, CE Workgroup of the Linux Foundation
Senior Staff Engineer, Sony Network Entertainment
=============================
richard -rw- weinberger - April 20, 2013, 7:50 a.m.
On Fri, Apr 19, 2013 at 2:31 PM, Artem Bityutskiy <dedekind1@gmail.com> wrote:
> On Fri, 2013-04-19 at 07:57 -0400, Mike Frysinger wrote:
>> the reason we're talking about not allowing write support at the
>> *block* layer
>> is because it's questionable how many people actually want this, the
>> performance isn't good (compared to native flash filesystems), and
>> because the
>> write/wear characteristics are unknown.
>
> My opinion that unless people demonstrate that they need this, e.g. in a
> product, etc, we should not merge this stuff.
>
> We recently merged fastmap, which is a big chunk of code, and it looks
> like no one really needs it. There was a problem report, and Richard
> promised to look, but did not. I do not blame him, he is a busy guy. But
> this shoes that this feature is not really needed, while adds
> maintenance burden.

You mean "ubi fastmap memory leak"? No need to worry, it is on my radar.
But as the issue is not a show-stopper I have to take care first of
problems with
higher priority.

Thanks,
//richard
Brian Norris - April 20, 2013, 6:12 p.m.
On Fri, Apr 19, 2013 at 10:05 AM, Tim Bird <tim.bird@am.sony.com> wrote:
> On 04/19/2013 05:31 AM, Artem Bityutskiy wrote:
>> We recently merged fastmap, which is a big chunk of code, and it looks
>> like no one really needs it. There was a problem report, and Richard
>> promised to look, but did not. I do not blame him, he is a busy guy. But
>> this shows that this feature is not really needed, while adds
>> maintenance burden.
>
> Whoa there!  Just because it doesn't look like people are using fastmap
> right now, don't think it won't get used in the future.   Really,
> the people interested in fastmap are AFAIK mostly TV vendors, who are
> just now migrating to 3.0 or 3.4 kernels.  I can assure you that fastmap
> will get more attention when the major CE companies move to more
> recent kernels (probably in about 1 to 2 years).

I'll second Tim's point here. We will likely use fastmap here
(Broadcom) soon. We're currently migrating to a 3.8 kernel, and
smaller mount/boot times are always a bonus for us. Don't jump ship on
this feature too soon!

Brian
Mike Frysinger - April 24, 2013, 4:27 p.m.
On Friday 19 April 2013 08:55:31 Matthieu CASTET wrote:
> Mike Frysinger a écrit :
> > On Friday 19 April 2013 08:31:13 Artem Bityutskiy wrote:
> >> On Fri, 2013-04-19 at 07:57 -0400, Mike Frysinger wrote:
> >>> the reason we're talking about not allowing write support at the
> >>> *block* layer
> >>> is because it's questionable how many people actually want this, the
> >>> performance isn't good (compared to native flash filesystems), and
> >>> because the
> >>> write/wear characteristics are unknown.
> >> 
> >> My opinion that unless people demonstrate that they need this, e.g. in a
> >> product, etc, we should not merge this stuff.
> >> 
> >> We recently merged fastmap, which is a big chunk of code, and it looks
> >> like no one really needs it. There was a problem report, and Richard
> >> promised to look, but did not. I do not blame him, he is a busy guy. But
> >> this shoes that this feature is not really needed, while adds
> >> maintenance burden.
> >> 
> >> To put it differently, I do recommend to merge more UBI-related code
> >> without a solid user-base.
> > 
> > also to be clear, i plan on using UBI block in read-only mode, so i'd
> > like to see an ubiblock driver merged
> 
> Why can't you use gluebi + mtdblock ?
> 
> Too much overhead ?

a poor man's read-only test on this one system shows ubiblock & mtdblock 
provide about the same read speeds.  but this was just reading the raw block 
device directly.

the mtd faq suggests that gluebi is meant only for devices that actually want 
mtd behavior (read/write/erase/etc...) like jffs2 rather than for flash unaware 
filesystems.  i'm not sure if gluebi will work if you want to run in read/write 
mode w/a fs like ext2.
-mike
Artem Bityutskiy - May 13, 2013, 7:28 a.m.
On Thu, 2013-04-18 at 16:30 -0400, Mike Frysinger wrote:
> 
> imo, that support needs to be re-added.  it'd be great if we could do
> it via 
> the existing /dev/ubi_ctrl knob, but maybe that'll only work if ubi
> +ubiblock 
> are built into the kernel, or if ubiblock is merged with ubi ?

Yeah, re-using the ubi_ctl device makes sense. And UBI can probably
invoke 'request_module()' if needed, so that we avoid any of "work only
if both built-in" conditions.
Artem Bityutskiy - May 13, 2013, 7:44 a.m.
On Fri, 2013-04-19 at 10:05 -0700, Tim Bird wrote:
> On 04/19/2013 05:31 AM, Artem Bityutskiy wrote:
> > We recently merged fastmap, which is a big chunk of code, and it looks
> > like no one really needs it. There was a problem report, and Richard
> > promised to look, but did not. I do not blame him, he is a busy guy. But
> > this shows that this feature is not really needed, while adds
> > maintenance burden.
> 
> Whoa there!  Just because it doesn't look like people are using fastmap
> right now, don't think it won't get used in the future.   Really,
> the people interested in fastmap are AFAIK mostly TV vendors, who are
> just now migrating to 3.0 or 3.4 kernels.  I can assure you that fastmap
> will get more attention when the major CE companies move to more
> recent kernels (probably in about 1 to 2 years).

Fair enough!
Artem Bityutskiy - May 13, 2013, 7:49 a.m.
On Sat, 2013-04-20 at 09:50 +0200, richard -rw- weinberger wrote:
> You mean "ubi fastmap memory leak"? No need to worry, it is on my radar.
> But as the issue is not a show-stopper I have to take care first of
> problems with
> higher priority.

I actually meant this one:
http://lists.infradead.org/pipermail/linux-mtd/2012-December/045184.html
richard -rw- weinberger - May 13, 2013, 9:23 a.m.
On Mon, May 13, 2013 at 9:49 AM, Artem Bityutskiy <dedekind1@gmail.com> wrote:
> On Sat, 2013-04-20 at 09:50 +0200, richard -rw- weinberger wrote:
>> You mean "ubi fastmap memory leak"? No need to worry, it is on my radar.
>> But as the issue is not a show-stopper I have to take care first of
>> problems with
>> higher priority.
>
> I actually meant this one:
> http://lists.infradead.org/pipermail/linux-mtd/2012-December/045184.html

Fair point. The issue really got lost under my workload.
This can happen in open source development.

Anyway, replying to Philipp now...

--
Thanks,
//richard

Patch

diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig
index 36663af..12ffa2d 100644
--- a/drivers/mtd/ubi/Kconfig
+++ b/drivers/mtd/ubi/Kconfig
@@ -87,4 +87,20 @@  config MTD_UBI_GLUEBI
 	   work on top of UBI. Do not enable this unless you use legacy
 	   software.
 
+config MTD_UBI_BLOCK
+	tristate "Caching block device access to UBI volumes"
+	help
+	   Since UBI already takes care of eraseblock wear leveling
+	   and bad block handling, it's possible to implement a block
+	   device on top of it and therefore mount regular filesystems
+	   (i.e. not flash-oriented, as ext4).
+
+	   In other words, this is a software flash translation layer.
+
+	   This is a *very* experimental feature. In particular, it is
+	   yet not known how heavily a regular block-oriented filesystem
+	   might impact on a raw flash wear.
+
+	   If in doubt, say "N".
+
 endif # MTD_UBI
diff --git a/drivers/mtd/ubi/Makefile b/drivers/mtd/ubi/Makefile
index b46b0c97..1578733 100644
--- a/drivers/mtd/ubi/Makefile
+++ b/drivers/mtd/ubi/Makefile
@@ -5,3 +5,4 @@  ubi-y += misc.o debug.o
 ubi-$(CONFIG_MTD_UBI_FASTMAP) += fastmap.o
 
 obj-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
+obj-$(CONFIG_MTD_UBI_BLOCK) += ubiblock.o
diff --git a/drivers/mtd/ubi/ubiblock.c b/drivers/mtd/ubi/ubiblock.c
new file mode 100644
index 0000000..16a545e
--- /dev/null
+++ b/drivers/mtd/ubi/ubiblock.c
@@ -0,0 +1,830 @@ 
+/*
+ * Copyright (c) 2012 Ezequiel Garcia
+ * Copyright (c) 2011 Free Electrons
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * TODO: Add parameter for autoloading
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/mtd/ubi.h>
+#include <linux/workqueue.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+
+#include "ubi-media.h"
+
+#if 0
+static bool auto_vol;
+module_param(auto_vol, int, 0644);
+MODULE_PARM_DESC(auto_vol, "Automatically attach a block layer to each volume")
+#endif
+
+/* Maximum number of supported devices */
+#define UBIBLOCK_MAX_DEVICES 32
+
+/* Maximum length of the 'vol=' parameter */
+#define UBIBLOCK_VOL_PARAM_LEN 64
+
+/* Maximum number of comma-separated items in the 'vol=' parameter */
+#define UBIBLOCK_VOL_PARAM_COUNT 2
+
+struct ubiblock_vol_param {
+	int ubi_num;
+	int vol_id;
+	char name[UBIBLOCK_VOL_PARAM_LEN];
+};
+
+/* Numbers of elements set in the @ubiblock_vol_param array */
+static int ubiblock_devs;
+
+/* MTD devices specification parameters */
+static struct ubiblock_vol_param ubiblock_vol_param[UBIBLOCK_MAX_DEVICES];
+
+struct ubiblock_cache {
+	char *buffer;
+	enum { STATE_EMPTY, STATE_CLEAN, STATE_DIRTY } state;
+	int leb_num;
+};
+
+struct ubiblock {
+	struct ubi_volume_desc *desc;
+	struct ubi_volume_info *vi;
+	int ubi_num;
+	int vol_id;
+	int refcnt;
+
+	struct gendisk *gd;
+	struct request_queue *rq;
+
+	struct workqueue_struct *wq;
+	struct work_struct work;
+
+	struct mutex vol_mutex;
+	spinlock_t queue_lock;
+	struct list_head list;
+
+	int leb_size;
+	struct ubiblock_cache read_cache;
+	struct ubiblock_cache write_cache;
+};
+
+/* Linked list of all ubiblock instances */
+static LIST_HEAD(ubiblock_devices);
+static DEFINE_MUTEX(devices_mutex);
+static int ubiblock_major;
+
+/*
+ * Ugh, this parameter parsing code is simply awful :(
+ */
+static int ubiblock_set_vol_param(const char *val,
+				  const struct kernel_param *kp)
+{
+	int len, i, ret;
+	struct ubiblock_vol_param *param;
+	char buf[UBIBLOCK_VOL_PARAM_LEN];
+	char *pbuf = &buf[0];
+	char *tokens[UBIBLOCK_VOL_PARAM_COUNT];
+
+	if (!val)
+		return -EINVAL;
+
+	len = strnlen(val, UBIBLOCK_VOL_PARAM_LEN);
+	if (len == 0) {
+		pr_warn("empty 'vol=' parameter - ignored\n");
+		return 0;
+	}
+
+	if (len == UBIBLOCK_VOL_PARAM_LEN) {
+		pr_err("parameter \"%s\" is too long, max. is %d\n",
+			val, UBIBLOCK_VOL_PARAM_LEN);
+		return -EINVAL;
+	}
+
+	strcpy(buf, val);
+
+	/* Get rid of the final newline */
+	if (buf[len - 1] == '\n')
+		buf[len - 1] = '\0';
+
+	for (i = 0; i < UBIBLOCK_VOL_PARAM_COUNT; i++)
+		tokens[i] = strsep(&pbuf, ",");
+
+	param = &ubiblock_vol_param[ubiblock_devs];
+	if (tokens[1]) {
+		/* Two parameters: can be 'ubi, vol_id' or 'ubi, vol_name' */
+		ret = kstrtoint(tokens[0], 10, &param->ubi_num);
+		if (ret < 0)
+			return -EINVAL;
+
+		/* Second param can be a number or a name */
+		ret = kstrtoint(tokens[1], 10, &param->vol_id);
+		if (ret < 0) {
+			param->vol_id = -1;
+			strcpy(param->name, tokens[1]);
+		}
+
+	} else {
+		/* One parameter: must be device path */
+		strcpy(param->name, tokens[0]);
+		param->ubi_num = -1;
+		param->vol_id = -1;
+	}
+
+	ubiblock_devs++;
+
+	return 0;
+}
+
+static const struct kernel_param_ops ubiblock_param_ops = {
+        .set    = ubiblock_set_vol_param,
+};
+module_param_cb(vol, &ubiblock_param_ops, NULL, 0644);
+
+static struct ubiblock *find_dev_nolock(int ubi_num, int vol_id)
+{
+	struct ubiblock *dev;
+
+	list_for_each_entry(dev, &ubiblock_devices, list)
+		if (dev->ubi_num == ubi_num && dev->vol_id == vol_id)
+			return dev;
+	return NULL;
+}
+
+static bool leb_on_cache(struct ubiblock_cache *cache, int leb_num)
+{
+	return cache->leb_num == leb_num;
+}
+
+static int ubiblock_fill_cache(struct ubiblock *dev, int leb_num,
+			       struct ubiblock_cache *cache,
+			       struct ubiblock_cache *aux_cache)
+{
+	int ret;
+
+	/* Warn if we fill cache while being dirty */
+	WARN_ON(cache->state == STATE_DIRTY);
+
+	cache->leb_num = leb_num;
+	cache->state = STATE_CLEAN;
+
+	/*
+	 * If leb is on auxiliary cache, we use it to fill
+	 * the cache. This auxiliary cache needs to be invalidated.
+	 */
+	if (aux_cache && leb_on_cache(aux_cache, leb_num)) {
+
+		aux_cache->leb_num = -1;
+		aux_cache->state = STATE_EMPTY;
+		memcpy(cache->buffer, aux_cache->buffer, dev->leb_size);
+	} else {
+
+		ret = ubi_read(dev->desc, leb_num, cache->buffer, 0,
+			       dev->leb_size);
+		if (ret) {
+			dev_err(disk_to_dev(dev->gd), "ubi_read error %d\n",
+				ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int ubiblock_flush(struct ubiblock *dev, bool sync)
+{
+	struct ubiblock_cache* cache = &dev->write_cache;
+	int ret = 0;
+
+	if (cache->state != STATE_DIRTY)
+		return 0;
+
+	/*
+	 * TODO: mtdblock sets STATE_EMPTY, arguing that it prevents the
+	 * underlying media to get changed without notice.
+	 * I'm not fully convinced, so I just put STATE_CLEAN.
+	 */
+	cache->state = STATE_CLEAN;
+
+	/* Atomically change leb with buffer contents */
+	ret = ubi_leb_change(dev->desc, cache->leb_num,
+			     cache->buffer, dev->leb_size);
+	if (ret) {
+		dev_err(disk_to_dev(dev->gd), "ubi_leb_change error %d\n", ret);
+		return ret;
+	}
+
+	/* Sync ubi device when device is released and on block flush ioctl */
+	if (sync)
+		ret = ubi_sync(dev->ubi_num);
+
+	return ret;
+}
+
+static int ubiblock_read(struct ubiblock *dev, char *buffer,
+			 int pos, int len)
+{
+	int leb, offset, ret;
+	int bytes_left = len;
+	int to_read = len;
+	char *cache_buffer;
+
+	/* Get leb:offset address to read from */
+	leb = pos / dev->leb_size;
+	offset = pos % dev->leb_size;
+
+	while (bytes_left) {
+
+		/*
+		 * We can only read one leb at a time.
+		 * Therefore if the read length is larger than
+		 * one leb size, we split the operation.
+		 */
+		if (offset + to_read > dev->leb_size)
+			to_read = dev->leb_size - offset;
+
+		/*
+		 * 1) First try on read cache, if not there...
+		 * 2) then try on write cache, if not there...
+		 * 3) finally load this leb on read_cache
+		 *
+		 * Note that reading never flushes to disk!
+		 */
+		if (leb_on_cache(&dev->read_cache, leb)) {
+			cache_buffer = dev->read_cache.buffer;
+
+		} else if (leb_on_cache(&dev->write_cache, leb)) {
+			cache_buffer = dev->write_cache.buffer;
+
+		} else {
+			/* Leb is not in any cache: fill read_cache */
+			ret = ubiblock_fill_cache(dev, leb,
+				&dev->read_cache, NULL);
+			if (ret)
+				return ret;
+			cache_buffer = dev->read_cache.buffer;
+		}
+
+		memcpy(buffer, cache_buffer + offset, to_read);
+		buffer += to_read;
+		bytes_left -= to_read;
+		to_read = bytes_left;
+		leb++;
+		offset = 0;
+	}
+	return 0;
+}
+
+static int ubiblock_write(struct ubiblock *dev, const char *buffer,
+			 int pos, int len)
+{
+	int leb, offset, ret;
+	int bytes_left = len;
+	int to_write = len;
+	struct ubiblock_cache* cache = &dev->write_cache;
+
+	/* Get (leb:offset) address to write */
+	leb = pos / dev->leb_size;
+	offset = pos % dev->leb_size;
+
+	while (bytes_left) {
+		/*
+		 * We can only write one leb at a time.
+		 * Therefore if the write length is larger than
+		 * one leb size, we split the operation.
+		 */
+		if (offset + to_write > dev->leb_size)
+			to_write = dev->leb_size - offset;
+
+		/*
+		 * If leb is not on write cache, we flush current cached
+		 * leb to disk. Cache contents will be filled either
+		 * by using read cache or by reading device.
+		 */
+		if (!leb_on_cache(cache, leb)) {
+
+			ret = ubiblock_flush(dev, false);
+			if (ret)
+				return ret;
+
+			ret = ubiblock_fill_cache(dev, leb,
+				cache, &dev->read_cache);
+			if (ret)
+				return ret;
+		}
+
+		memcpy(cache->buffer + offset, buffer, to_write);
+
+		/* This is the only place where we dirt the write cache */
+		cache->state = STATE_DIRTY;
+
+		buffer += to_write;
+		bytes_left -= to_write;
+		to_write = bytes_left;
+		offset = 0;
+		leb++;
+	}
+	return 0;
+}
+
+static int do_ubiblock_request(struct ubiblock *dev, struct request *req)
+{
+	int pos, len;
+
+	if (req->cmd_flags & REQ_FLUSH)
+		return ubiblock_flush(dev, true);
+
+	if (req->cmd_type != REQ_TYPE_FS)
+		return -EIO;
+
+	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
+	    get_capacity(req->rq_disk))
+		return -EIO;
+
+	pos = blk_rq_pos(req) << 9;
+	len = blk_rq_cur_bytes(req);
+
+	switch (rq_data_dir(req)) {
+	case READ:
+		return ubiblock_read(dev, req->buffer, pos, len);
+	case WRITE:
+		return ubiblock_write(dev, req->buffer, pos, len);
+	default:
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void ubiblock_do_work(struct work_struct *work)
+{
+	struct ubiblock *dev =
+		container_of(work, struct ubiblock, work);
+	struct request_queue *rq = dev->rq;
+	struct request *req;
+	int res;
+
+	spin_lock_irq(rq->queue_lock);
+
+	req = blk_fetch_request(rq);
+	while (req) {
+
+		spin_unlock_irq(rq->queue_lock);
+
+		mutex_lock(&dev->vol_mutex);
+		res = do_ubiblock_request(dev, req);
+		mutex_unlock(&dev->vol_mutex);
+
+		spin_lock_irq(rq->queue_lock);
+
+		/*
+		 * If we're done with this request,
+		 * we need to fetch a new one
+		 */
+		if (!__blk_end_request_cur(req, res))
+			req = blk_fetch_request(rq);
+	}
+
+	spin_unlock_irq(rq->queue_lock);
+}
+
+static void ubiblock_request(struct request_queue *rq)
+{
+	struct ubiblock *dev;
+	struct request *req;
+
+	dev = rq->queuedata;
+
+	if (!dev)
+		while ((req = blk_fetch_request(rq)) != NULL)
+			__blk_end_request_all(req, -ENODEV);
+	else
+		queue_work(dev->wq, &dev->work);
+}
+
+static int ubiblock_alloc_cache(struct ubiblock_cache *cache, int size)
+{
+	cache->state = STATE_EMPTY;
+	cache->leb_num = -1;
+	cache->buffer = vmalloc(size);
+	if (!cache->buffer)
+		return -ENOMEM;
+	return 0;
+}
+
+static void ubiblock_free_cache(struct ubiblock_cache *cache)
+{
+	cache->leb_num = -1;
+	cache->state = STATE_EMPTY;
+	vfree(cache->buffer);
+}
+
+static int ubiblock_open(struct block_device *bdev, fmode_t mode)
+{
+	struct ubiblock *dev = bdev->bd_disk->private_data;
+	int ubi_mode = UBI_READONLY;
+	int ret;
+
+	mutex_lock(&dev->vol_mutex);
+	if (dev->refcnt > 0) {
+		/*
+		 * The volume is already opened,
+		 * just increase the reference counter
+		 */
+		dev->refcnt++;
+		mutex_unlock(&dev->vol_mutex);
+		return 0;
+	}
+
+	if (mode & FMODE_WRITE)
+		ubi_mode = UBI_READWRITE;
+
+	dev->desc = ubi_open_volume(dev->ubi_num, dev->vol_id, ubi_mode);
+	if (IS_ERR(dev->desc)) {
+		dev_err(disk_to_dev(dev->gd),
+			"failed to open ubi volume %d_%d\n",
+			dev->ubi_num, dev->vol_id);
+
+		ret = PTR_ERR(dev->desc);
+		dev->desc = NULL;
+		goto out_unlock;
+	}
+
+	dev->vi = kzalloc(sizeof(struct ubi_volume_info), GFP_KERNEL);
+	if (!dev->vi) {
+		ret = -ENOMEM;
+		goto out_close;
+	}
+	ubi_get_volume_info(dev->desc, dev->vi);
+	dev->leb_size = dev->vi->usable_leb_size;
+
+	ret = ubiblock_alloc_cache(&dev->read_cache, dev->leb_size);
+	if (ret)
+		goto out_free;
+
+	ret = ubiblock_alloc_cache(&dev->write_cache, dev->leb_size);
+	if (ret)
+		goto out_free_cache;
+
+	dev->refcnt++;
+	mutex_unlock(&dev->vol_mutex);
+	return 0;
+
+out_free_cache:
+	ubiblock_free_cache(&dev->read_cache);
+out_free:
+	kfree(dev->vi);
+out_close:
+	ubi_close_volume(dev->desc);
+	dev->desc = NULL;
+out_unlock:
+	mutex_unlock(&dev->vol_mutex);
+	return ret;
+}
+
+static int ubiblock_release(struct gendisk *gd, fmode_t mode)
+{
+	struct ubiblock *dev = gd->private_data;
+
+	mutex_lock(&dev->vol_mutex);
+
+	dev->refcnt--;
+	if (dev->refcnt == 0) {
+		ubiblock_flush(dev, true);
+
+		ubiblock_free_cache(&dev->read_cache);
+		ubiblock_free_cache(&dev->write_cache);
+
+		kfree(dev->vi);
+		ubi_close_volume(dev->desc);
+
+		dev->vi = NULL;
+		dev->desc = NULL;
+	}
+
+	mutex_unlock(&dev->vol_mutex);
+	return 0;
+}
+
+static int ubiblock_ioctl(struct block_device *bdev, fmode_t mode,
+			      unsigned int cmd, unsigned long arg)
+{
+	struct ubiblock *dev = bdev->bd_disk->private_data;
+	int ret = -ENXIO;
+
+	if (!dev)
+		return ret;
+
+	mutex_lock(&dev->vol_mutex);
+
+	/* I can't get this to get called. What's going on? */
+	switch (cmd) {
+	case BLKFLSBUF:
+		ret = ubiblock_flush(dev, true);
+		break;
+	default:
+		ret = -ENOTTY;
+	}
+
+	mutex_unlock(&dev->vol_mutex);
+	return ret;
+}
+
+static int ubiblock_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	/* Some tools might require this information */
+	geo->heads = 1;
+	geo->cylinders = 1;
+	geo->sectors = get_capacity(bdev->bd_disk);
+	geo->start = 0;
+	return 0;
+}
+
+static const struct block_device_operations ubiblock_ops = {
+	.owner = THIS_MODULE,
+	.open = ubiblock_open,
+	.release = ubiblock_release,
+	.ioctl = ubiblock_ioctl,
+	.getgeo	= ubiblock_getgeo,
+};
+
+static int ubiblock_add(struct ubi_volume_info *vi)
+{
+	struct ubiblock *dev;
+	struct gendisk *gd;
+	int disk_capacity;
+	int ret;
+
+	/* Check that the volume isn't already handled */
+	mutex_lock(&devices_mutex);
+	if (find_dev_nolock(vi->ubi_num, vi->vol_id)) {
+		mutex_unlock(&devices_mutex);
+		return -EEXIST;
+	}
+	mutex_unlock(&devices_mutex);
+
+	dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	mutex_init(&dev->vol_mutex);
+
+	dev->ubi_num = vi->ubi_num;
+	dev->vol_id = vi->vol_id;
+
+	/* Initialize the gendisk of this ubiblock device */
+	gd = alloc_disk(1);
+	if (!gd) {
+		pr_err("alloc_disk failed\n");
+		ret = -ENODEV;
+		goto out_free_dev;
+	}
+
+	gd->fops = &ubiblock_ops;
+	gd->major = ubiblock_major;
+	gd->first_minor = dev->ubi_num * UBI_MAX_VOLUMES + dev->vol_id;
+	gd->private_data = dev;
+	sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
+	disk_capacity = (vi->size * vi->usable_leb_size) >> 9;
+	set_capacity(gd, disk_capacity);
+	dev->gd = gd;
+
+	spin_lock_init(&dev->queue_lock);
+	dev->rq = blk_init_queue(ubiblock_request, &dev->queue_lock);
+	if (!dev->rq) {
+		pr_err("blk_init_queue failed\n");
+		ret = -ENODEV;
+		goto out_put_disk;
+	}
+
+	dev->rq->queuedata = dev;
+	dev->gd->queue = dev->rq;
+
+	blk_queue_flush(dev->rq, REQ_FLUSH);
+
+	/* TODO: Is performance better or worse with this flag? */
+	/* queue_flag_set_unlocked(QUEUE_FLAG_NONROT, dev->rq);*/
+
+	/*
+	 * Create one workqueue per volume (per registered block device).
+	 * Rembember workqueues are cheap, they're not threads.
+	 */
+	dev->wq = alloc_workqueue(gd->disk_name, 0, 0);
+	if (!dev->wq)
+		goto out_free_queue;
+	INIT_WORK(&dev->work, ubiblock_do_work);
+
+	mutex_lock(&devices_mutex);
+	list_add_tail(&dev->list, &ubiblock_devices);
+	mutex_unlock(&devices_mutex);
+
+	/* Must be the last step: anyone can call file ops from now on */
+	add_disk(dev->gd);
+
+	dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)\n",
+		 dev->ubi_num, dev->vol_id, vi->name);
+
+	return 0;
+
+out_free_queue:
+	blk_cleanup_queue(dev->rq);
+out_put_disk:
+	put_disk(dev->gd);
+out_free_dev:
+	kfree(dev);
+
+	return ret;
+}
+
+static void ubiblock_cleanup(struct ubiblock *dev)
+{
+	del_gendisk(dev->gd);
+	blk_cleanup_queue(dev->rq);
+	put_disk(dev->gd);
+}
+
+static void ubiblock_del(struct ubi_volume_info *vi)
+{
+	struct ubiblock *dev;
+
+	mutex_lock(&devices_mutex);
+	dev = find_dev_nolock(vi->ubi_num, vi->vol_id);
+	if (!dev) {
+		mutex_unlock(&devices_mutex);
+		return;
+	}
+	/* Remove from device list */
+	list_del(&dev->list);
+	mutex_unlock(&devices_mutex);
+
+	/* Flush pending work and stop this workqueue */
+	destroy_workqueue(dev->wq);
+
+	mutex_lock(&dev->vol_mutex);
+
+	/*
+	 * This means that ubiblock device is opened and in usage.
+	 * However, this shouldn't happen, since we have
+	 * called ubi_open_volume() at open() time, thus preventing
+	 * volume removal.
+	 */
+	WARN_ON(dev->desc);
+	ubiblock_cleanup(dev);
+
+	mutex_unlock(&dev->vol_mutex);
+
+	kfree(dev);
+}
+
+static void ubiblock_resize(struct ubi_volume_info *vi)
+{
+	struct ubiblock *dev;
+	int disk_capacity;
+
+	/*
+	 * We don't touch the list, but we better lock it: it could be that the
+	 * device gets removed between the time the device has been found and
+	 * the time we access dev->gd
+	 */
+	mutex_lock(&devices_mutex);
+	dev = find_dev_nolock(vi->ubi_num, vi->vol_id);
+	if (!dev) {
+		mutex_unlock(&devices_mutex);
+		return;
+	}
+	mutex_unlock(&devices_mutex);
+
+	mutex_lock(&dev->vol_mutex);
+	disk_capacity = (vi->size * vi->usable_leb_size) >> 9;
+	set_capacity(dev->gd, disk_capacity);
+	dev_dbg(disk_to_dev(dev->gd), "resized to %d LEBs\n", vi->size);
+	mutex_unlock(&dev->vol_mutex);
+}
+
+static int ubiblock_notify(struct notifier_block *nb,
+			 unsigned long notification_type, void *ns_ptr)
+{
+	struct ubi_notification *nt = ns_ptr;
+
+	switch (notification_type) {
+	case UBI_VOLUME_ADDED:
+		if (0)
+			ubiblock_add(&nt->vi);
+		break;
+	case UBI_VOLUME_REMOVED:
+		ubiblock_del(&nt->vi);
+		break;
+	case UBI_VOLUME_RESIZED:
+		ubiblock_resize(&nt->vi);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ubiblock_notifier = {
+	.notifier_call = ubiblock_notify,
+};
+
+static struct ubi_volume_desc *
+open_volume_desc(const char *name, int ubi_num, int vol_id)
+{
+	if (ubi_num == -1)
+		/* No ubi num, name must be a vol device path */
+		return ubi_open_volume_path(name, UBI_READONLY);
+	else if (vol_id == -1)
+		/* No vol_id, must be vol_name */
+		return ubi_open_volume_nm(ubi_num, name, UBI_READONLY);
+	else
+		return ubi_open_volume(ubi_num, vol_id, UBI_READONLY);
+}
+
+static void ubiblock_register_vol_param(void)
+{
+	int i, ret;
+	struct ubiblock_vol_param *p;
+	struct ubi_volume_desc *desc;
+	struct ubi_volume_info vi;
+
+	for (i = 0; i < ubiblock_devs; i++) {
+		p = &ubiblock_vol_param[i];
+
+		desc = open_volume_desc(p->name, p->ubi_num, p->vol_id);
+		if (IS_ERR(desc))
+			continue;
+
+		ubi_get_volume_info(desc, &vi);
+		ret = ubiblock_add(&vi);
+		if (ret)
+			pr_warn("can't add '%s' volume, err=%d\n",
+				vi.name, ret);
+
+		ubi_close_volume(desc);
+	}
+}
+
+static int __init ubiblock_init(void)
+{
+	ubiblock_major = register_blkdev(0, "ubiblock");
+	if (ubiblock_major < 0)
+		return ubiblock_major;
+
+	ubiblock_register_vol_param();
+
+	/*
+	 * Blocks will get registered dynamically.
+	 * Each ubi volume will get a corresponding block device.
+	 */
+	return ubi_register_volume_notifier(&ubiblock_notifier, 1);
+}
+
+static void __exit ubiblock_exit(void)
+{
+	struct ubiblock *next;
+	struct ubiblock *dev;
+
+	ubi_unregister_volume_notifier(&ubiblock_notifier);
+
+	list_for_each_entry_safe(dev, next, &ubiblock_devices, list) {
+
+		/* Flush pending work and stop workqueue */
+		destroy_workqueue(dev->wq);
+
+		/* The module is being forcefully removed */
+		WARN_ON(dev->desc);
+
+		/* Remove from device list */
+		list_del(&dev->list);
+
+		ubiblock_cleanup(dev);
+
+		kfree(dev);
+	}
+
+	unregister_blkdev(ubiblock_major, "ubiblock");
+}
+
+module_init(ubiblock_init);
+module_exit(ubiblock_exit);
+
+MODULE_DESCRIPTION("Block device emulation access to UBI volumes");
+MODULE_AUTHOR("David Wagner");
+MODULE_AUTHOR("Ezequiel Garcia <ezequiel@free-electrons.com>");
+MODULE_LICENSE("GPL");