diff mbox

[RFC,v2] UBI: New ioctl() to support ubidump

Message ID 53D768B5.6090409@huawei.com
State RFC
Headers show

Commit Message

hujianyang July 29, 2014, 9:26 a.m. UTC
An ioctl() return pnum of a specified leb.


Signed-off-by: hujianyang <hujianyang@huawei.com>
---
 drivers/mtd/ubi/cdev.c      | 26 ++++++++++++++++++++++++++
 include/uapi/mtd/ubi-user.h | 12 ++++++++++++
 2 files changed, 38 insertions(+)

Comments

Bill Pringlemeir July 29, 2014, 2:48 p.m. UTC | #1
On 29 Jul 2014, hujianyang@huawei.com wrote:

> An ioctl() return pnum of a specified leb.

> Signed-off-by: hujianyang <hujianyang@huawei.com>
> ---
> drivers/mtd/ubi/cdev.c      | 26 ++++++++++++++++++++++++++
> include/uapi/mtd/ubi-user.h | 12 ++++++++++++
> 2 files changed, 38 insertions(+)

Please look at 'ubi_scan()', 

  http://sourcecodebrowser.com/mtd-utils/20110107/libscan_8c.html
  http://sourcecodebrowser.com/mtd-utils/20110107/libscan_8h.html

After this call info->ec[pnum] is,

  EB_EMPTY      (ffffffff) - erased
  EB_CORRUPTED  (fffffffe) - inconsistent UBI data.
  EB_ALIEN      (fffffffd) - non-ubi erase sector
  EB_BAD        (fffffffc) - bad block
  or the LNUM.

To write your code, you can either scan the array (~32-2048 entries)
each time you want an 'lnum' or you could run through the array and
construct the opposite table; a 'logical index' gives a 'physical eb';
this would require keeping track of the 'generation' count.

I think a patch to 'ubi_scan()' to create an 'pmap' array might be
better or more accepted than a Linux/MTD/UBI patch?  Then only the
'ubidump' code is needed and not a properly configured/versioned kernel;
or at least only the nandsim module which is similar to some other
utilities.  If you had a 'ubi_scan()' which has an 'info->pmap[leb]'
which had,

  EB_EMPTY      (ffffffff) - not mapped
  or the PNUM.

Would you need to patch the kernel?

Fwiw,
Bill Pringlemeir.
Richard Weinberger July 29, 2014, 4:37 p.m. UTC | #2
On Tue, Jul 29, 2014 at 11:26 AM, hujianyang <hujianyang@huawei.com> wrote:
> An ioctl() return pnum of a specified leb.
>
>
> Signed-off-by: hujianyang <hujianyang@huawei.com>
> ---
>  drivers/mtd/ubi/cdev.c      | 26 ++++++++++++++++++++++++++
>  include/uapi/mtd/ubi-user.h | 12 ++++++++++++
>  2 files changed, 38 insertions(+)
>
> diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
> index 7646220..6fa7346 100644
> --- a/drivers/mtd/ubi/cdev.c
> +++ b/drivers/mtd/ubi/cdev.c
> @@ -581,6 +581,32 @@ static long vol_cdev_ioctl(struct file *file, unsigned int cmd,
>                 break;
>         }
>
> +       /* Get pnum of a specified leb command */
> +       case UBI_IOCEBGETPNUM:
> +       {
> +               struct ubi_lnum2pnum_req req;
> +               int pnum;
> +
> +               err = copy_from_user(&req, argp,
> +                               sizeof(struct ubi_lnum2pnum_req));
> +               if (err) {
> +                       err = -EFAULT;
> +                       break;
> +               }
> +
> +               err = ubi_is_mapped(desc, req.lnum);
> +               if (err <= 0)
> +                       break;
> +               pnum = vol->eba_tbl[req.lnum];
> +               req.pnum = pnum;

Isn't this racy?
i.e. If a LEB change happens between ubi_is_mapped() and "pnum =
vol->eba_tbl[req.lnum]".

> +               err = copy_to_user(argp, &req,
> +                               sizeof(struct ubi_lnum2pnum_req));
> +               if (err)
> +                       err = -EFAULT;
> +               break;
> +       }
> +
>         default:
>                 err = -ENOTTY;
>                 break;
> diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h
> index 1927b0d..fc41ddb 100644
> --- a/include/uapi/mtd/ubi-user.h
> +++ b/include/uapi/mtd/ubi-user.h
> @@ -205,6 +205,8 @@
>  #define UBI_IOCVOLCRBLK _IOW(UBI_VOL_IOC_MAGIC, 7, struct ubi_blkcreate_req)
>  /* Remove the R/O block device */
>  #define UBI_IOCVOLRMBLK _IO(UBI_VOL_IOC_MAGIC, 8)
> +/* Get pnum of a specified leb */
> +#define UBI_IOCEBGETPNUM _IOW(UBI_VOL_IOC_MAGIC, 9, struct ubi_lnum2pnum_req)
>
>  /* Maximum MTD device name length supported by UBI */
>  #define MAX_UBI_MTD_NAME_LEN 127
> @@ -442,4 +444,14 @@ struct ubi_blkcreate_req {
>         __s8  padding[128];
>  }  __packed;
>
> +/**
> + * struct ubi_lnum2pnum_req - a data structure used in lnum translate requests.
> + * @lnum: logical eraseblock num to translate
> + * @pnum: physical eraseblock num @lnum mapped
> + */
> +struct ubi_lnum2pnum_req {
> +       __s32 lnum;
> +       __s32 pnum;
> +}  __packed;
> +
>  #endif /* __UBI_USER_H__ */
> --
> 1.8.1.4
>
>
> ______________________________________________________
> Linux MTD discussion mailing list
> http://lists.infradead.org/mailman/listinfo/linux-mtd/
hujianyang July 30, 2014, 2:01 a.m. UTC | #3
> 
> I think a patch to 'ubi_scan()' to create an 'pmap' array might be
> better or more accepted than a Linux/MTD/UBI patch?  Then only the
> 'ubidump' code is needed and not a properly configured/versioned kernel;
> or at least only the nandsim module which is similar to some other
> utilities.  If you had a 'ubi_scan()' which has an 'info->pmap[leb]'
> which had,
> 

Yes, I think you are right~! I don't want this utility can't be run
without a properly configured kernel and this full-scan method will
be used again when we dumping with an image file.

But I'm worry about the performance. As we dump one specified peb
now, we can't record the mapping table and need to do full scan
each time running this utility.

Anyway, I will try what you said first~!

Thanks~!

Hu
hujianyang July 30, 2014, 2:19 a.m. UTC | #4
> 
> Isn't this racy?
> i.e. If a LEB change happens between ubi_is_mapped() and "pnum =
> vol->eba_tbl[req.lnum]".
> 

As I was writing this code, I used this ubi_is_mapped() to make sure
lnum is less than @vol->reserved_pebs and !@vol->upd_marker in order
to avoid kernel panic when we get pnum from eba_tbl.

Even we locked this ioctl, peb may also be changed before we read it
from MTD. That's why I didn't focus on this race. We can use some
condition check to make sure this vol is not mounted but user may use
this utility on rootfs.

Bill showed me a new method to get pnum without UBI driver. I will
try that first but this new method can't avoid peb changes before
we read it either. We should consider more on it.

Thanks!

Hu
Bill Pringlemeir July 30, 2014, 3:53 p.m. UTC | #5
On 29 Jul 2014, hujianyang@huawei.com wrote:

> But I'm worry about the performance. As we dump one specified peb
> now, we can't record the mapping table and need to do full scan
> each time running this utility.

You can record the table if you know that the MTD is not changing.  If
you introduce the 'ioctl()' to the kernel, it will most likely be on an
active file system.  I guess this is a big decision in the tools design.

Supporting an active file system is much more difficult.  For the case
of an real MTD backing store, I was thinking this would run from another
partition or an initramfs; UBI/UbiFS would not be active.  It could
possibly attempt to repair the UBI/UbiFs, while nothing else is using
it.  The other use case is to read out the flash via some tools and
loopback mount it with nandsim on a PC for diagnosis; a post-mortem
analysis.

If you wish to run the utility on an active MTD, I would suggest some
'freeze/thaw' type function to stop UBI/UbiFs and then just use the tool
on the 'frozen' MTD.  Maybe this exists already for suspend/resume or
something else.  It might only be a debug type interface; so
conditionally compiled for developers.  It needs a hook so you can
trigger the freeze on some condition via code.

I think others don't understand why you would want the 'ubidump' to run
on an active file system?  At least, I don't understand this?  Is there
some use case we are missing?  I am quite sure that you can perform
better diagnostics on a 'frozen' image where you can look at the whole
image state.

Fwiw,
Bill Pringlemeir.
hujianyang July 31, 2014, 3:01 a.m. UTC | #6
On 2014/7/30 23:53, Bill Pringlemeir wrote:
> 
> You can record the table if you know that the MTD is not changing.  If
> you introduce the 'ioctl()' to the kernel, it will most likely be on an
> active file system.  I guess this is a big decision in the tools design.

This ioctl() doesn't need UBIFS filesystem to be mounted, just needs
UBI driver. We can keep this partition not active during this tool
running.

But a properly configured kernel requirement you said in last mail
worries me most. I think you are right and want to have a try. But
I've discussed these stuff with Artem and he said, just quote:

""
So I envision that the tool would work like this.

$ ubidump my.img --lnum 5
  - dump LEB 5, will need to scan the entire image.

$ ubidump /dev/ubiX_Y --lnum 5
  - will just ask the UBI driver to give the PEB number for LEB 5, then
find out the MTD device for this volume (should be possible by checking
the sysfs files), and then reads the MTD device, and gets the UBI-level
information from there.

Something like this, just quick thoughts.
""
http://lists.infradead.org/pipermail/linux-mtd/2014-July/054674.html


We can keep a temporary file to record the mapping table but that will
make the initial submission much more complicated. But this way you
mentioned to record the table should be considered to add into this tool
in the future.

> 
> I think others don't understand why you would want the 'ubidump' to run
> on an active file system?  At least, I don't understand this?  Is there
> some use case we are missing?  I am quite sure that you can perform
> better diagnostics on a 'frozen' image where you can look at the whole
> image state.
> 

Yes, I want this tool to be flexible before. But dumping with an active
partition really useless as you said now. Because in most cases, the
volumes we want to dump must have something wrong.


I think we need to make a lighter version and submit it to ubi-utils
at beginning, then make it strongly step by step. Despite all that,
we should have a clear design of what we need now. So I'm happy with
your sharing of your thoughts.

The "freeze/thaw" method will result a complex tool. We can use some
functionality to make sure UBIFS partition is not active. Should I need
to add them in this version or enable them in the future? Do you think
it is a key problem of this tool? This utility may tolerance some data
mistake caused by peb update race in my considering.
Artem Bityutskiy July 31, 2014, 1:24 p.m. UTC | #7
On Thu, 2014-07-31 at 11:01 +0800, hujianyang wrote:
> On 2014/7/30 23:53, Bill Pringlemeir wrote:
> > 
> > You can record the table if you know that the MTD is not changing.  If
> > you introduce the 'ioctl()' to the kernel, it will most likely be on an
> > active file system.  I guess this is a big decision in the tools design.
> 
> This ioctl() doesn't need UBIFS filesystem to be mounted, just needs
> UBI driver. We can keep this partition not active during this tool
> running.
> 
> But a properly configured kernel requirement you said in last mail
> worries me most. I think you are right and want to have a try. But
> I've discussed these stuff with Artem and he said, just quote:
> 
> ""
> So I envision that the tool would work like this.
> 
> $ ubidump my.img --lnum 5
>   - dump LEB 5, will need to scan the entire image.
> 
> $ ubidump /dev/ubiX_Y --lnum 5
>   - will just ask the UBI driver to give the PEB number for LEB 5, then
> find out the MTD device for this volume (should be possible by checking
> the sysfs files), and then reads the MTD device, and gets the UBI-level
> information from there.
> 
> Something like this, just quick thoughts.
> ""

Having a tool which does not need any kernel help is a lot more
preferable. I though we may have an ioctl like this if no one has time
to write full scanning support in user-space, and then maintain it.

> We can keep a temporary file to record the mapping table but that will
> make the initial submission much more complicated. But this way you
> mentioned to record the table should be considered to add into this tool
> in the future.

Hujianyang, AFAIU, right now you need an ability to dump UBIFS stuff.
You do not really need to know PEB number. So ubidump may
analyze /dev/ubiX_Y without knowing the PEB number, right? It can just
read from /dev/ubiX_Y directly. And print you all the UBIFS nodes.

I mean, if what user gives you is an UBI volume, you do not try going to
the MTD level.
Artem Bityutskiy July 31, 2014, 1:25 p.m. UTC | #8
On Wed, 2014-07-30 at 11:53 -0400, Bill Pringlemeir wrote:
> I think others don't understand why you would want the 'ubidump' to run
> on an active file system?  At least, I don't understand this?  Is there
> some use case we are missing?  I am quite sure that you can perform
> better diagnostics on a 'frozen' image where you can look at the whole
> image state.

I think you are right that in ideal world ubidump should be able to
figure out the LEB->PEB mapping without asking the driver. It should be
able to scan the image and figure everything out itself.

However, writing such a toll is _hard_. Maintaining it is _hard too_.
You basically need to copy part of the driver to user-space, and
maintain it there. Whenever someone adds a new UBI feature like
"fastmap", you need to port that to the tool too.

This is doable, e.g., e2fsprogs project basically does this. But so far,
no one did this for UBI/UBIFS. And the UBIFS user-base is relatively
small. Many people expressed a desire to have a 'chkfs.ubifs' and
'ubidump', but no one went ahead ad did this - this is not easy and this
costs.

So I initially thought may be a little help from the kernel would not
hurt. But I am not 100% sure this is a good idea.
hujianyang Aug. 1, 2014, 2:50 a.m. UTC | #9
On 2014/7/31 21:24, Artem Bityutskiy wrote:
> 
> Hujianyang, AFAIU, right now you need an ability to dump UBIFS stuff.

Yes, but not all of what I want. I need this utility dumping UBI-level
stuff too. The ability, not only to dump UBIFS stuff but also to dump
UBI stuff will make it a useful tool.

> You do not really need to know PEB number. So ubidump may
> analyze /dev/ubiX_Y without knowing the PEB number, right? It can just
> read from /dev/ubiX_Y directly. And print you all the UBIFS nodes.
> 

Last version I sent to this maillist was reading data from UBI driver
directly. But our .read() function in UBI driver jump over UBI stuff, so
I read UBI stuff from an ioctl(). This isn't a common way as you said.

I have to say I am about to leave Beijing for at least 3 months and
have no time for the development of this tool. I aspire if we can push
some of my work to ubi-utils before my leaving. So we should hurry up~!

First, we can leave UBI/UBIFS format dumping behind, just dump binary
data, that will make initial submission smaller.

Then, think about what we want at this time.

1) No UBI stuff and read UBIFS stuff from UBI-driver
This is a easiest way and we don't need any ioctl either. We can't get
UBI-level stuff right now but we can put this into our design of dumping
an image file.

2) Read data from MTD-driver and find a way to get pnum
This way is basing on my v2 work. We can put an ioctl() to kernel and
if kernel don't support this ioctl(), this tool will do a full-scan on
MTD device. It's transparent to user. But we should take some time to
consider how to do this full-scan.

3) Re-design this tool about dropping UBI driver based dumping.
This way is like the exist ubiformat utility. We don't use UBI driver
and just do read/write(maybe we can do some repairing later) with MTD
device or image files. It's not small.


I think you will prefer to solution 1. Although it's not fit all my
desires, I will agree with it if you insist. Or do you have any other
suggestions?


Thanks~!

Hu
Artem Bityutskiy Aug. 1, 2014, 7:30 a.m. UTC | #10
On Fri, 2014-08-01 at 10:50 +0800, hujianyang wrote:
> On 2014/7/31 21:24, Artem Bityutskiy wrote:
> > 
> > Hujianyang, AFAIU, right now you need an ability to dump UBIFS stuff.
> 
> Yes, but not all of what I want. I need this utility dumping UBI-level
> stuff too. The ability, not only to dump UBIFS stuff but also to dump
> UBI stuff will make it a useful tool.

OK.

> > You do not really need to know PEB number. So ubidump may
> > analyze /dev/ubiX_Y without knowing the PEB number, right? It can just
> > read from /dev/ubiX_Y directly. And print you all the UBIFS nodes.
> > 
> 
> Last version I sent to this maillist was reading data from UBI driver
> directly. But our .read() function in UBI driver jump over UBI stuff, so
> I read UBI stuff from an ioctl(). This isn't a common way as you said.

There are 2 layers involved: MTD and UBI.

On the MTD level, the entire flash contents is available (/dev/mtdX)
On the UBI level, only UBI volumes are available (/dev/ubiX_Y)

If I give you an UBI volume dump (cp /dev/ubiX_Y ubi.img), you will only
see the contents of the UBI volume. If there was UBIFS file-system, you
will see UBIFS nodes in the "ubi.img" file. You will not see UBI headers
there.

The ideal design would be treating /dev/ubiX_Y the same way as
'ubi.img'. IOW:

ubidump ubi.img
and
ubidump /dev/ubiX_Y

should give the same results.

If I give you an MTD image (nanddump -o mtd.img /dev/mtdX), you should
be able to see both UBIFS and UBI stuff in 'mtd.img'. You may see the
same information in /dev/mtdX. To get the LEB<->PEB mapping, you need to
do scanning, etc. So

ubidump mtd.img
and
ubidump /dev/mtdX

should be the same.

This is the basic picture.

Now what you want is to add an exception to this scheme. Namely, for the
'ubidump /dev/ubiX_Y' case: you want to get help from the driver, using
the ioctl you introduced.

Frankly, I am not 100% sure this is a good idea, would be nice to hear
from other people. I'd need to think a bit on this.

So what I suggested is the you do not submit this exception so far. Just
teach ubidump to handle UBI volumes. I.e., make it work with

ubidump ubi.img
ubidump /dev/ubiX_Y

yes, these 2 will dump only ubifs stuff. Limited functionality.

> 1) No UBI stuff and read UBIFS stuff from UBI-driver
> This is a easiest way and we don't need any ioctl either. We can't get
> UBI-level stuff right now but we can put this into our design of dumping
> an image file.

Yes. That's a good first step.

> 2) Read data from MTD-driver and find a way to get pnum
> This way is basing on my v2 work. We can put an ioctl() to kernel and
> if kernel don't support this ioctl(), this tool will do a full-scan on
> MTD device. It's transparent to user. But we should take some time to
> consider how to do this full-scan.

I guess, I'd need to think some more about the ioctl(), and may be
someone else suggests something.

> 3) Re-design this tool about dropping UBI driver based dumping.
> This way is like the exist ubiformat utility. We don't use UBI driver
> and just do read/write(maybe we can do some repairing later) with MTD
> device or image files. It's not small.

Probably, we'll think about this when we are done with 1) and may be 2).
hujianyang Aug. 1, 2014, 10:10 a.m. UTC | #11
On 2014/8/1 15:30, Artem Bityutskiy wrote:
> 
> There are 2 layers involved: MTD and UBI.
> 
> On the MTD level, the entire flash contents is available (/dev/mtdX)
> On the UBI level, only UBI volumes are available (/dev/ubiX_Y)
> 
> If I give you an UBI volume dump (cp /dev/ubiX_Y ubi.img), you will only
> see the contents of the UBI volume. If there was UBIFS file-system, you
> will see UBIFS nodes in the "ubi.img" file. You will not see UBI headers
> there.
> 
> The ideal design would be treating /dev/ubiX_Y the same way as
> 'ubi.img'. IOW:
> 
> ubidump ubi.img
> and
> ubidump /dev/ubiX_Y
> 
> should give the same results.
> 
> If I give you an MTD image (nanddump -o mtd.img /dev/mtdX), you should
> be able to see both UBIFS and UBI stuff in 'mtd.img'. You may see the
> same information in /dev/mtdX. To get the LEB<->PEB mapping, you need to
> do scanning, etc. So
> 
> ubidump mtd.img
> and
> ubidump /dev/mtdX
> 
> should be the same.
> 
> This is the basic picture.
> 
> Now what you want is to add an exception to this scheme. Namely, for the
> 'ubidump /dev/ubiX_Y' case: you want to get help from the driver, using
> the ioctl you introduced.
> 
> Frankly, I am not 100% sure this is a good idea, would be nice to hear
> from other people. I'd need to think a bit on this.
> 
> So what I suggested is the you do not submit this exception so far. Just
> teach ubidump to handle UBI volumes. I.e., make it work with
> 
> ubidump ubi.img
> ubidump /dev/ubiX_Y
> 

Oh, I've confused these things before. I want this utility work with
mtd.img and /dev/ubiX_Y. It seems they are not same.

> yes, these 2 will dump only ubifs stuff. Limited functionality.
> 
>> 1) No UBI stuff and read UBIFS stuff from UBI-driver
>> This is a easiest way and we don't need any ioctl either. We can't get
>> UBI-level stuff right now but we can put this into our design of dumping
>> an image file.
> 
> Yes. That's a good first step.
> 

How about writing a new version basing on this?

>> 2) Read data from MTD-driver and find a way to get pnum
>> This way is basing on my v2 work. We can put an ioctl() to kernel and
>> if kernel don't support this ioctl(), this tool will do a full-scan on
>> MTD device. It's transparent to user. But we should take some time to
>> consider how to do this full-scan.
> 
> I guess, I'd need to think some more about the ioctl(), and may be
> someone else suggests something.
> 

OK.


Have a good weekend~!

Hu
Bill Pringlemeir Aug. 1, 2014, 3:35 p.m. UTC | #12
On  1 Aug 2014, dedekind1@gmail.com wrote:

> On Fri, 2014-08-01 at 10:50 +0800, hujianyang wrote:

>> 1) No UBI stuff and read UBIFS stuff from UBI-driver This is a
>> easiest way and we don't need any ioctl either. We can't get
>> UBI-level stuff right now but we can put this into our design of
>> dumping an image file.

> Yes. That's a good first step.

I agree that this is a good way to partition the functionality.

>> 2) Read data from MTD-driver and find a way to get pnum This way is
>> basing on my v2 work. We can put an ioctl() to kernel and if kernel
>> don't support this ioctl(), this tool will do a full-scan on MTD
>> device. It's transparent to user. But we should take some time to
>> consider how to do this full-scan.

> I guess, I'd need to think some more about the ioctl(), and may be
> someone else suggests something.

Isn't this just more work?  The tool will rely on two things?

>> 3) Re-design this tool about dropping UBI driver based dumping.
>> This way is like the exist ubiformat utility. We don't use UBI driver
>> and just do read/write(maybe we can do some repairing later) with MTD
>> device or image files. It's not small.

> Probably, we'll think about this when we are done with 1) and may be
> 2).

Some of these points I don't understand.  The 'ubiformat' already uses
'libscan.c', which have the basics for creating a static 'PEB->LEB'
mapping.  So while I agree that maintaining two version of UBI is not
good, I think we already have this?

UBI has to deal with writing, especially atomic writes, and concurrency.
I don't think the tool needs either, so the 2nd version of UBI is
significantly simpler.  Just look at 'libscan.c' versus
'driver/mtd/ubi/*.c'.

Also, I think a lot of people will have issues where they can not mount
the device and the only thing they can get is the whole 'nanddump';
which won't be done through Linux, but some other means.  Ie, some ROM
boot code (which doesn't understand either UBI or UbiFS) to read out
NAND, a NAND programmer, etc.

Finally, the separate implementation can be good.  Having two versions
of code use the same data structures will often show issues.  If we want
to debug UBI, then using it is not so helpful.  Having the separate
(simpler) implementation with worse performance is exactly the kind of
thing you want for debugging and repair, isn't it?  Features like
'fastmap' are optional and we should fall back to a linear scan.  I
think that future UBI changes will strive to be backwards compatible
unless there is some really good reason not to be?

The big advantage of doing everything outside the kernel is we can
eventually add lots of extra consistency checks that you would never
want to do in a running file system.  Ie, verify a linear scan and the
fastmap are consistent, etc.

My intent is never to make work for anyone.  Adding a patch to libscan.c
seems easy to me.

 1. Allocate a 'pmap' array and a temporary 'sequence' array.
 2. Initialize 'pmap' to -1.
 3a. For each valid 'eb', get 'lnum' and 'sqnum' from vid header.
  b. if (pmap[lnum] == -1 || sequence[lnum] < sqnum)
      pmap[lnum] = eb, sequence[lnum] = sqnum;

but maybe I don't understand all the complexities.  Isn't it that
simple?

Now if we want to use the tool on live volumes, 'libscan.c' will not
work for that.  I thought that the concepts of a debugger may apply.  It
can look at a core file, or a paused processes memory.  But maybe it is
crazy to 'pause' Ubi/UbiFs.  Someone might want such 'snapshots' to
examine the evolution of storage or some race issues... but I guess that
is not the main reason of using the ioctl().

Fwiw,
Bill Pringlemeir.
hujianyang Aug. 4, 2014, 3:54 a.m. UTC | #13
On 2014/8/1 23:35, Bill Pringlemeir wrote:
> 
>  1. Allocate a 'pmap' array and a temporary 'sequence' array.
>  2. Initialize 'pmap' to -1.
>  3a. For each valid 'eb', get 'lnum' and 'sqnum' from vid header.
>   b. if (pmap[lnum] == -1 || sequence[lnum] < sqnum)
>       pmap[lnum] = eb, sequence[lnum] = sqnum;
> 

I've researched the function ubi_scan() in libscan.c today and
found it's not easy to just add small changes to enable LEB->PEB
mapping table as we want.

There are two methods on enabling this functionality:
1) Add a mapping table in struct ubi_scan_info
2) Add some new parameters for function ubi_scan()

You know each MTD device may contain more than one UBI volume
and each volume should has a private mapping table. So it's not
easy to enable it in struct ubi_scan_info like method 1) because
we don't know the actual counts of volumes.

Further more, @sqnum is not enough for peb determining, we should
consider @on_copy flag for wear-leveling and atomic write. If
@on_copy flag is set, we need to read the whole leb and check CRC
to determine which peb is right. As this is a debugging tool, I
think printing all pebs have same lnum(of course in same volume)
is better. Now, we have to start thinking how to record this
stuff, that's a big problem.


In another way like method 2), we can add a new structure named
ubi_translate, put lnum and vol_id in it and returns a list or an
array contains the set of pebs which has a lnum equal to what we
set in translate struct. Original call like ubiformat set this
structure pointer to NULL and only ubidump use it. But if it's
better than writing a new function to do this stuff separately?
The code in ubi_scan is complicated now. Lnum translation is not
needed for ubiformat and the calculate of ec is not needed for
ubidump.

How about adding a new function to scan the whole MTD device and
just return a list contain all the pebs(mostly 1 or 2) in same
vol_id and same lnum?

in libscan.c

struct ubi_translate_info {
	int lnum,
	int vol_id,
	int find,	// counts of peb we find
	int *array	// dynamic or static(5? in size) array for
			// recording peb num
};

int ubi_translate(struct ubi_translate_info *info /* or **info */)
{
	// full scan the MTD device
	while (...) {
		read(ec);
		
		// determine if it is a valid peb
		...

		read(vid);

		if (vid->vol_id == info->vol_id &&
		    vid->lnum == info->lnum) {
			// add to list @array
			...

			find++;			
		}
	}

	return err; // MTD is bad or something else
}

Just quick thought.

What's your opinion?

By the way, I have to say this separating of UBI and UBIFS makes
it really hard to read data from both sides.
Bill Pringlemeir Aug. 5, 2014, 9:10 p.m. UTC | #14
On  3 Aug 2014, hujianyang@huawei.com wrote:

> On 2014/8/1 23:35, Bill Pringlemeir wrote:

>> 1. Allocate a 'pmap' array and a temporary 'sequence' array.
>> 2. Initialize 'pmap' to -1.
>> 3a. For each valid 'eb', get 'lnum' and 'sqnum' from vid header.
>> b. if (pmap[lnum] == -1 || sequence[lnum] < sqnum)
>> pmap[lnum] = eb, sequence[lnum] = sqnum;

> I've researched the function ubi_scan() in libscan.c today and
> found it's not easy to just add small changes to enable LEB->PEB
> mapping table as we want.
>
> There are two methods on enabling this functionality:
> 1) Add a mapping table in struct ubi_scan_info
> 2) Add some new parameters for function ubi_scan()
>
> You know each MTD device may contain more than one UBI volume
> and each volume should has a private mapping table. So it's not
> easy to enable it in struct ubi_scan_info like method 1) because
> we don't know the actual counts of volumes.

Yes, I think you are completely correct.  The array only keeps track of
the erase count not the LEB.  Sorry, I was confused about that.

> Further more, @sqnum is not enough for peb determining, we should
> consider @on_copy flag for wear-leveling and atomic write. If
> @on_copy flag is set, we need to read the whole leb and check CRC
> to determine which peb is right. As this is a debugging tool, I
> think printing all pebs have same lnum(of course in same volume)
> is better. Now, we have to start thinking how to record this
> stuff, that's a big problem.

Validating the header CRC is always part of seeing if the block is
valid.  For a real physical device, the unstable bits maybe an issue.
So, it is probably better to examine 'copy_flag'.  Many of these are
abnormal cases, which need to be handled, but the tool would not
normally run through these paths for every block.  Running the CRC on
the data maybe particularly slow.

> In another way like method 2), we can add a new structure named
> ubi_translate, put lnum and vol_id in it and returns a list or an
> array contains the set of pebs which has a lnum equal to what we
> set in translate struct. Original call like ubiformat set this
> structure pointer to NULL and only ubidump use it. But if it's
> better than writing a new function to do this stuff separately?
> The code in ubi_scan is complicated now. Lnum translation is not
> needed for ubiformat and the calculate of ec is not needed for
> ubidump.

I agree it is complex.  It could be decomposed into functions.

> How about adding a new function to scan the whole MTD device and
> just return a list contain all the pebs(mostly 1 or 2) in same
> vol_id and same lnum?
>
> in libscan.c
>
> struct ubi_translate_info {
> 	int lnum,
> 	int vol_id,
> 	int find,	// counts of peb we find
> 	int *array	// dynamic or static(5? in size) array for
> 			// recording peb num
> };

> int ubi_translate(struct ubi_translate_info *info /* or **info */)
> {
> 	// full scan the MTD device
> 	while (...) {
> 		read(ec);
> 		
> 		// determine if it is a valid peb
> 		...
>
> 		read(vid);
>
> 		if (vid->vol_id == info->vol_id &&
> 		    vid->lnum == info->lnum) {
> 			// add to list @array
> 			...
>
> 			find++;			
> 		}
> 	}
>
> 	return err; // MTD is bad or something else
> }

ubi_translate() is more complex.  You need to read the VID header and
possibly data as well.  The ubi_scan() never reads these.  

However, a lot of the erase header checking will be the same.  So
besides all 'si->...=' lines most of ubi_scan() loop body is needed for
the 'ubi_translate()' to validate the erase header.  Then you need the
extra step to read the 'vid'.  I think you could have a function like,

/* ech invalid unless return < MAX_EC */
static uint32_t read_ehdr(struct mtd_dev_info *mtd, int fd, 
		int eb, struct ubi_ec_hdr *ech)
{
  ... 

That returns the 'enum' or erase count.  The 'enum' could be amended
with a 'fatal', which is the current 'goto out_ec' path (or you need
another parameter).  Then, the current ubi_scan() could be,

	for (eb = 0; eb < mtd->eb_cnt; eb++) {
		uint32_t ec;
		ec = read_ehdr(mtd, fd, eb, &ech);
                switch(ec) {
                   ...
                /* Do stuff to 'si->... here. */
			break;
		case EB_FATAL:
			goto out_ec;

And then ubi_translate() could reuse the read_ehdr() to do the
'determine if a valid peb' part.  Ie, whether you should even read the
'vid'.

> By the way, I have to say this separating of UBI and UBIFS makes
> it really hard to read data from both sides.

You mean both layers at once?  Ie, layers == sides?  Or do you mean to
support reading from an 'mtd' or a 'ubi' device?

Regards,
Bill Pringlemeir.
diff mbox

Patch

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 7646220..6fa7346 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -581,6 +581,32 @@  static long vol_cdev_ioctl(struct file *file, unsigned int cmd,
 		break;
 	}

+	/* Get pnum of a specified leb command */
+	case UBI_IOCEBGETPNUM:
+	{
+		struct ubi_lnum2pnum_req req;
+		int pnum;
+
+		err = copy_from_user(&req, argp,
+				sizeof(struct ubi_lnum2pnum_req));
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = ubi_is_mapped(desc, req.lnum);
+		if (err <= 0)
+			break;
+		pnum = vol->eba_tbl[req.lnum];
+		req.pnum = pnum;
+
+		err = copy_to_user(argp, &req,
+				sizeof(struct ubi_lnum2pnum_req));
+		if (err)
+			err = -EFAULT;
+		break;
+	}
+
 	default:
 		err = -ENOTTY;
 		break;
diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h
index 1927b0d..fc41ddb 100644
--- a/include/uapi/mtd/ubi-user.h
+++ b/include/uapi/mtd/ubi-user.h
@@ -205,6 +205,8 @@ 
 #define UBI_IOCVOLCRBLK _IOW(UBI_VOL_IOC_MAGIC, 7, struct ubi_blkcreate_req)
 /* Remove the R/O block device */
 #define UBI_IOCVOLRMBLK _IO(UBI_VOL_IOC_MAGIC, 8)
+/* Get pnum of a specified leb */
+#define UBI_IOCEBGETPNUM _IOW(UBI_VOL_IOC_MAGIC, 9, struct ubi_lnum2pnum_req)

 /* Maximum MTD device name length supported by UBI */
 #define MAX_UBI_MTD_NAME_LEN 127
@@ -442,4 +444,14 @@  struct ubi_blkcreate_req {
 	__s8  padding[128];
 }  __packed;

+/**
+ * struct ubi_lnum2pnum_req - a data structure used in lnum translate requests.
+ * @lnum: logical eraseblock num to translate
+ * @pnum: physical eraseblock num @lnum mapped
+ */
+struct ubi_lnum2pnum_req {
+	__s32 lnum;
+	__s32 pnum;
+}  __packed;
+
 #endif /* __UBI_USER_H__ */