diff mbox series

[3/8] blk: blkmap: Add basic infrastructure

Message ID 20230201181016.4145834-4-tobias@waldekranz.com
State Superseded
Delegated to: Tom Rini
Headers show
Series blk: blkmap: Composable virtual block devices | expand

Commit Message

Tobias Waldekranz Feb. 1, 2023, 6:10 p.m. UTC
blkmaps are loosely modeled on Linux's device mapper subsystem. The
basic idea is that you can create virtual block devices whose blocks
can be backed by a plethora of sources that are user configurable.

This change just adds the basic infrastructure for creating and
removing blkmap devices. Subsequent changes will extend this to add
support for actual mappings.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---
 MAINTAINERS                      |   6 +
 disk/part.c                      |   1 +
 drivers/block/Kconfig            |  18 ++
 drivers/block/Makefile           |   1 +
 drivers/block/blk-uclass.c       |   1 +
 drivers/block/blkmap.c           | 275 +++++++++++++++++++++++++++++++
 include/blkmap.h                 |  15 ++
 include/dm/uclass-id.h           |   1 +
 include/efi_loader.h             |   4 +
 lib/efi_loader/efi_device_path.c |  30 ++++
 10 files changed, 352 insertions(+)
 create mode 100644 drivers/block/blkmap.c
 create mode 100644 include/blkmap.h

Comments

Simon Glass Feb. 1, 2023, 8:20 p.m. UTC | #1
Hi Tobias,

On Wed, 1 Feb 2023 at 11:10, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>
> blkmaps are loosely modeled on Linux's device mapper subsystem. The
> basic idea is that you can create virtual block devices whose blocks
> can be backed by a plethora of sources that are user configurable.
>
> This change just adds the basic infrastructure for creating and
> removing blkmap devices. Subsequent changes will extend this to add
> support for actual mappings.
>
> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> ---
>  MAINTAINERS                      |   6 +
>  disk/part.c                      |   1 +
>  drivers/block/Kconfig            |  18 ++
>  drivers/block/Makefile           |   1 +
>  drivers/block/blk-uclass.c       |   1 +
>  drivers/block/blkmap.c           | 275 +++++++++++++++++++++++++++++++
>  include/blkmap.h                 |  15 ++
>  include/dm/uclass-id.h           |   1 +
>  include/efi_loader.h             |   4 +
>  lib/efi_loader/efi_device_path.c |  30 ++++
>  10 files changed, 352 insertions(+)
>  create mode 100644 drivers/block/blkmap.c
>  create mode 100644 include/blkmap.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 3e8e193ecc..28a34231bf 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -786,6 +786,12 @@ M: Alper Nebi Yasak <alpernebiyasak@gmail.com>
>  S:     Maintained
>  F:     tools/binman/
>
> +BLKMAP
> +M:     Tobias Waldekranz <tobias@waldekranz.com>
> +S:     Maintained
> +F:     drivers/block/blkmap.c
> +F:     include/blkmap.h
> +
>  BOOTDEVICE
>  M:     Simon Glass <sjg@chromium.org>
>  S:     Maintained
> diff --git a/disk/part.c b/disk/part.c
> index d449635254..35300df590 100644
> --- a/disk/part.c
> +++ b/disk/part.c
> @@ -140,6 +140,7 @@ void dev_print(struct blk_desc *dev_desc)
>         case UCLASS_NVME:
>         case UCLASS_PVBLOCK:
>         case UCLASS_HOST:
> +       case UCLASS_BLKMAP:
>                 printf ("Vendor: %s Rev: %s Prod: %s\n",
>                         dev_desc->vendor,
>                         dev_desc->revision,
> diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
> index e95da48bdc..5a1aeb3d2b 100644
> --- a/drivers/block/Kconfig
> +++ b/drivers/block/Kconfig
> @@ -67,6 +67,24 @@ config BLOCK_CACHE
>           it will prevent repeated reads from directory structures and other
>           filesystem data structures.
>
> +config BLKMAP
> +       bool "Composable virtual block devices (blkmap)"
> +       depends on BLK
> +       help
> +         Create virtual block devices that are backed by various sources,
> +         e.g. RAM, or parts of an existing block device. Though much more
> +         rudimentary, it borrows a lot of ideas from Linux's device mapper
> +         subsystem.
> +
> +         Example use-cases:
> +         - Treat a region of RAM as a block device, i.e. a RAM disk. This let's
> +            you extract files from filesystem images stored in RAM (perhaps as a
> +            result of a TFTP transfer).
> +         - Create a virtual partition on an existing device. This let's you
> +            access filesystems that aren't stored at an exact partition
> +            boundary. A common example is a filesystem image embedded in an FIT
> +            image.
> +
>  config SPL_BLOCK_CACHE
>         bool "Use block device cache in SPL"
>         depends on SPL_BLK
> diff --git a/drivers/block/Makefile b/drivers/block/Makefile
> index f12447d78d..a161d145fd 100644
> --- a/drivers/block/Makefile
> +++ b/drivers/block/Makefile
> @@ -14,6 +14,7 @@ obj-$(CONFIG_IDE) += ide.o
>  endif
>  obj-$(CONFIG_SANDBOX) += sandbox.o host-uclass.o host_dev.o
>  obj-$(CONFIG_$(SPL_TPL_)BLOCK_CACHE) += blkcache.o
> +obj-$(CONFIG_BLKMAP) += blkmap.o
>
>  obj-$(CONFIG_EFI_MEDIA) += efi-media-uclass.o
>  obj-$(CONFIG_EFI_MEDIA_SANDBOX) += sb_efi_media.o
> diff --git a/drivers/block/blk-uclass.c b/drivers/block/blk-uclass.c
> index c69fc4d518..cb73faaeda 100644
> --- a/drivers/block/blk-uclass.c
> +++ b/drivers/block/blk-uclass.c
> @@ -32,6 +32,7 @@ static struct {
>         { UCLASS_EFI_LOADER, "efiloader" },
>         { UCLASS_VIRTIO, "virtio" },
>         { UCLASS_PVBLOCK, "pvblock" },
> +       { UCLASS_BLKMAP, "blkmap" },
>  };
>
>  static enum uclass_id uclass_name_to_iftype(const char *uclass_idname)
> diff --git a/drivers/block/blkmap.c b/drivers/block/blkmap.c
> new file mode 100644
> index 0000000000..a6ba07404c
> --- /dev/null
> +++ b/drivers/block/blkmap.c
> @@ -0,0 +1,275 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright (c) 2023 Addiva Elektronik
> + * Author: Tobias Waldekranz <tobias@waldekranz.com>
> + */
> +
> +#include <common.h>
> +#include <blk.h>
> +#include <blkmap.h>
> +#include <dm.h>
> +#include <dm/device-internal.h>
> +#include <dm/lists.h>
> +#include <dm/root.h>

The three above should go at the end:

https://u-boot.readthedocs.io/en/latest/develop/codingstyle.html#include-files

> +#include <malloc.h>
> +#include <part.h>
> +
> +struct blkmap;
> +
> +struct blkmap_slice {
> +       struct list_head node;
> +
> +       lbaint_t blknr;
> +       lbaint_t blkcnt;
> +
> +       ulong (*read)(struct blkmap *bm, struct blkmap_slice *bms,
> +                     lbaint_t blknr, lbaint_t blkcnt, void *buffer);
> +       ulong (*write)(struct blkmap *bm, struct blkmap_slice *bms,
> +                      lbaint_t blknr, lbaint_t blkcnt, const void *buffer);
> +       void (*destroy)(struct blkmap *bm, struct blkmap_slice *bms);
> +};

Please comment these fully in Sphinx style

> +
> +struct blkmap {
> +       struct udevice *dev;
> +       struct list_head slices;
> +};
> +
> +static bool blkmap_slice_contains(struct blkmap_slice *bms, lbaint_t blknr)
> +{
> +       return (blknr >= bms->blknr) && (blknr < (bms->blknr + bms->blkcnt));

lots of brackets!

> +}
> +
> +static bool blkmap_slice_available(struct blkmap *bm, struct blkmap_slice *new)
> +{
> +       struct blkmap_slice *bms;
> +       lbaint_t first, last;
> +
> +       first = new->blknr;
> +       last = new->blknr + new->blkcnt - 1;
> +
> +       list_for_each_entry(bms, &bm->slices, node) {
> +               if (blkmap_slice_contains(bms, first) ||
> +                   blkmap_slice_contains(bms, last) ||
> +                   blkmap_slice_contains(new, bms->blknr) ||
> +                   blkmap_slice_contains(new, bms->blknr + bms->blkcnt - 1))
> +                       return false;
> +       }
> +
> +       return true;
> +}
> +
> +static struct blkmap *blkmap_from_devnum(int devnum)

I don't really like the use of devnum everywhere. Can we instead limit
the devnum stuff to the cmdline implementation, and use a struct
udevice everywhere else?

The devum can be allocated when the UCLASS_BLK device is created.

> +{
> +       struct udevice *dev;
> +       int err;
> +
> +       err = blk_find_device(UCLASS_BLKMAP, devnum, &dev);
> +
> +       return err ? NULL : dev_get_priv(dev);
> +}
> +
> +static int blkmap_add(struct blkmap *bm, struct blkmap_slice *new)
> +{
> +       struct blk_desc *bd = dev_get_uclass_plat(bm->dev);
> +       struct list_head *insert = &bm->slices;
> +       struct blkmap_slice *bms;
> +
> +       if (!blkmap_slice_available(bm, new))
> +               return -EBUSY;
> +
> +       list_for_each_entry(bms, &bm->slices, node) {
> +               if (bms->blknr < new->blknr)
> +                       continue;
> +
> +               insert = &bms->node;
> +               break;
> +       }
> +
> +       list_add_tail(&new->node, insert);
> +
> +       /* Disk might have grown, update the size */
> +       bms = list_last_entry(&bm->slices, struct blkmap_slice, node);
> +       bd->lba = bms->blknr + bms->blkcnt;
> +       return 0;
> +}
> +
> +static struct udevice *blkmap_root(void)

This needs to be created as part of DM.  See how host_create_device()
works. It attaches something to the uclass and then creates child
devices from there. It also operations (struct host_ops) but you don't
need to do that.

Note that the host commands support either an label or a devnum, which
I think is useful, so you might copy that?

> +{
> +       static struct udevice *dev;
> +       int err;
> +
> +       if (dev)
> +               return dev;
> +
> +       err = device_bind_driver(dm_root(), "blkmap_root", "blkmap", &dev);
> +       if (err)
> +               return NULL;
> +
> +       err = device_probe(dev);
> +       if (err) {
> +               device_unbind(dev);
> +               return NULL;
> +       }

Should not be needed as probing children will cause this to be probed.

So this function just becomes

uclass_first_device(UCLASS_BLKDEV, &

> +
> +       return dev;
> +}
> +
> +int blkmap_create(int devnum)

Again, please drop the use of devnum and use devices. Here you could
use a label, perhaps?
> +{
> +       struct udevice *root;

Please don't use that name , as we use it for the DM root device. How
about bm_parent? It isn't actually a tree of devices, so root doesn't
sound right to me anyway.

> +       struct blk_desc *bd;
> +       struct blkmap *bm;
> +       int err;
> +
> +       if (devnum >= 0 && blkmap_from_devnum(devnum))
> +               return -EBUSY;
> +
> +       root = blkmap_root();
> +       if (!root)
> +               return -ENODEV;
> +
> +       bm = calloc(1, sizeof(*bm));

Can this be attached to the device as private data, so avoiding the malloc()?

> +       if (!bm)
> +               return -ENOMEM;
> +
> +       err = blk_create_devicef(root, "blkmap_blk", "blk", UCLASS_BLKMAP,
> +                                devnum, 512, 0, &bm->dev);
> +       if (err)
> +               goto err_free;
> +
> +       bd = dev_get_uclass_plat(bm->dev);
> +
> +       /* EFI core isn't keen on zero-sized disks, so we lie. This is
> +        * updated with the correct size once the user adds a
> +        * mapping.
> +        */
> +       bd->lba = 1;

if (CONFIG_IS_ENABLED(EFI_LOADER))

?

> +
> +       dev_set_priv(bm->dev, bm);
> +       INIT_LIST_HEAD(&bm->slices);
> +
> +       err = blk_probe_or_unbind(bm->dev);
> +       if (err)
> +               goto err_remove;
> +
> +       return bd->devnum;
> +
> +err_remove:
> +       device_remove(bm->dev, DM_REMOVE_NORMAL);
> +err_free:
> +       free(bm);
> +       return err;
> +}
> +
> +int blkmap_destroy(int devnum)

label

> +{
> +       struct blkmap_slice *bms, *tmp;
> +       struct blkmap *bm;
> +       int err;
> +
> +       bm = blkmap_from_devnum(devnum);
> +       if (!bm)
> +               return -ENODEV;
> +
> +       err = device_remove(bm->dev, DM_REMOVE_NORMAL);
> +       if (err)
> +               return err;
> +
> +       err = device_unbind(bm->dev);
> +       if (err)
> +               return err;
> +
> +       list_for_each_entry_safe(bms, tmp, &bm->slices, node) {
> +               list_del(&bms->node);
> +               free(bms);
> +       }
> +
> +       free(bm);
> +       return 0;
> +}
> +
> +static ulong blkmap_read_slice(struct blkmap *bm, struct blkmap_slice *bms,
> +                              lbaint_t blknr, lbaint_t blkcnt, void *buffer)
> +{
> +       lbaint_t nr, cnt;
> +
> +       nr = blknr - bms->blknr;
> +       cnt = (blkcnt < bms->blkcnt) ? blkcnt : bms->blkcnt;
> +       return bms->read(bm, bms, nr, cnt, buffer);
> +}
> +
> +static ulong blkmap_read(struct udevice *dev, lbaint_t blknr, lbaint_t blkcnt,
> +                        void *buffer)
> +{
> +       struct blk_desc *bd = dev_get_uclass_plat(dev);
> +       struct blkmap *bm = dev_get_priv(dev);
> +       struct blkmap_slice *bms;
> +       lbaint_t cnt, total = 0;
> +
> +       list_for_each_entry(bms, &bm->slices, node) {
> +               if (!blkmap_slice_contains(bms, blknr))
> +                       continue;
> +
> +               cnt = blkmap_read_slice(bm, bms, blknr, blkcnt, buffer);
> +               blknr += cnt;
> +               blkcnt -= cnt;
> +               buffer += cnt << bd->log2blksz;
> +               total += cnt;
> +       }
> +
> +       return total;
> +}
> +
> +static ulong blkmap_write_slice(struct blkmap *bm, struct blkmap_slice *bms,
> +                               lbaint_t blknr, lbaint_t blkcnt,
> +                               const void *buffer)
> +{
> +       lbaint_t nr, cnt;
> +
> +       nr = blknr - bms->blknr;
> +       cnt = (blkcnt < bms->blkcnt) ? blkcnt : bms->blkcnt;
> +       return bms->write(bm, bms, nr, cnt, buffer);
> +}
> +
> +static ulong blkmap_write(struct udevice *dev, lbaint_t blknr, lbaint_t blkcnt,
> +                         const void *buffer)
> +{
> +       struct blk_desc *bd = dev_get_uclass_plat(dev);
> +       struct blkmap *bm = dev_get_priv(dev);
> +       struct blkmap_slice *bms;
> +       lbaint_t cnt, total = 0;
> +
> +       list_for_each_entry(bms, &bm->slices, node) {
> +               if (!blkmap_slice_contains(bms, blknr))
> +                       continue;
> +
> +               cnt = blkmap_write_slice(bm, bms, blknr, blkcnt, buffer);
> +               blknr += cnt;
> +               blkcnt -= cnt;
> +               buffer += cnt << bd->log2blksz;
> +               total += cnt;
> +       }
> +
> +       return total;
> +}
> +
> +static const struct blk_ops blkmap_ops = {
> +       .read   = blkmap_read,
> +       .write  = blkmap_write,
> +};
> +
> +U_BOOT_DRIVER(blkmap_blk) = {
> +       .name           = "blkmap_blk",
> +       .id             = UCLASS_BLK,
> +       .ops            = &blkmap_ops,
> +};
> +
> +U_BOOT_DRIVER(blkmap_root) = {
> +       .name           = "blkmap_root",
> +       .id             = UCLASS_BLKMAP,
> +};
> +
> +UCLASS_DRIVER(blkmap) = {
> +       .id             = UCLASS_BLKMAP,
> +       .name           = "blkmap",
> +};
> diff --git a/include/blkmap.h b/include/blkmap.h
> new file mode 100644
> index 0000000000..37c0c31c3f
> --- /dev/null
> +++ b/include/blkmap.h
> @@ -0,0 +1,15 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*
> + * Copyright (c) 2023 Addiva Elektronik
> + * Author: Tobias Waldekranz <tobias@waldekranz.com>
> + */
> +
> +#ifndef _BLKMAP_H
> +#define _BLKMAP_H
> +
> +#include <stdbool.h>
> +
> +int blkmap_create(int devnum);
> +int blkmap_destroy(int devnum);

full comments for exported functions

> +
> +#endif /* _BLKMAP_H */
> diff --git a/include/dm/uclass-id.h b/include/dm/uclass-id.h
> index 33e43c20db..576237b954 100644
> --- a/include/dm/uclass-id.h
> +++ b/include/dm/uclass-id.h
> @@ -37,6 +37,7 @@ enum uclass_id {
>         UCLASS_AUDIO_CODEC,     /* Audio codec with control and data path */
>         UCLASS_AXI,             /* AXI bus */
>         UCLASS_BLK,             /* Block device */
> +       UCLASS_BLKMAP,          /* Composable virtual block device */
>         UCLASS_BOOTCOUNT,       /* Bootcount backing store */
>         UCLASS_BOOTDEV,         /* Boot device for locating an OS to boot */
>         UCLASS_BOOTMETH,        /* Bootmethod for booting an OS */
> diff --git a/include/efi_loader.h b/include/efi_loader.h
> index 4560b0d04c..59687f44de 100644
> --- a/include/efi_loader.h
> +++ b/include/efi_loader.h
> @@ -134,6 +134,10 @@ static inline efi_status_t efi_launch_capsules(void)
>  #define U_BOOT_GUID \
>         EFI_GUID(0xe61d73b9, 0xa384, 0x4acc, \
>                  0xae, 0xab, 0x82, 0xe8, 0x28, 0xf3, 0x62, 0x8b)
> +/* GUID used as root for blkmap devices */
> +#define U_BOOT_BLKMAP_DEV_GUID \
> +       EFI_GUID(0x4cad859d, 0xd644, 0x42ff,    \
> +                0x87, 0x0b, 0xc0, 0x2e, 0xac, 0x05, 0x58, 0x63)
>  /* GUID used as host device on sandbox */
>  #define U_BOOT_HOST_DEV_GUID \
>         EFI_GUID(0xbbe4e671, 0x5773, 0x4ea1, \
> diff --git a/lib/efi_loader/efi_device_path.c b/lib/efi_loader/efi_device_path.c
> index 3b267b713e..4b4c96bc2e 100644
> --- a/lib/efi_loader/efi_device_path.c
> +++ b/lib/efi_loader/efi_device_path.c

Please put this EFI stuff in a separate patch.

> @@ -21,6 +21,9 @@
>  #include <asm-generic/unaligned.h>
>  #include <linux/compat.h> /* U16_MAX */
>
> +#ifdef CONFIG_BLKMAP
> +const efi_guid_t efi_guid_blkmap_dev = U_BOOT_BLKMAP_DEV_GUID;
> +#endif
>  #ifdef CONFIG_SANDBOX
>  const efi_guid_t efi_guid_host_dev = U_BOOT_HOST_DEV_GUID;
>  #endif
> @@ -573,6 +576,16 @@ __maybe_unused static unsigned int dp_size(struct udevice *dev)
>                           */
>                         return dp_size(dev->parent)
>                                 + sizeof(struct efi_device_path_vendor) + 1;
> +#endif
> +#ifdef CONFIG_BLKMAP
> +               case UCLASS_BLKMAP:
> +                        /*
> +                         * blkmap devices will be represented as a vendor
> +                         * device node with an extra byte for the device
> +                         * number.
> +                         */
> +                       return dp_size(dev->parent)
> +                               + sizeof(struct efi_device_path_vendor) + 1;
>  #endif
>                 default:
>                         return dp_size(dev->parent);
> @@ -631,6 +644,23 @@ __maybe_unused static void *dp_fill(void *buf, struct udevice *dev)
>  #endif
>         case UCLASS_BLK:
>                 switch (dev->parent->uclass->uc_drv->id) {
> +#ifdef CONFIG_BLKMAP
> +               case UCLASS_BLKMAP: {
> +                       struct efi_device_path_vendor *dp;
> +                       struct blk_desc *desc = dev_get_uclass_plat(dev);
> +
> +                       dp_fill(buf, dev->parent);
> +                       dp = buf;
> +                       ++dp;
> +                       dp->dp.type = DEVICE_PATH_TYPE_HARDWARE_DEVICE;
> +                       dp->dp.sub_type = DEVICE_PATH_SUB_TYPE_VENDOR;
> +                       dp->dp.length = sizeof(*dp) + 1;
> +                       memcpy(&dp->guid, &efi_guid_blkmap_dev,
> +                              sizeof(efi_guid_t));
> +                       dp->vendor_data[0] = desc->devnum;
> +                       return &dp->vendor_data[1];
> +                       }
> +#endif
>  #ifdef CONFIG_SANDBOX
>                 case UCLASS_HOST: {
>                         /* stop traversing parents at this point: */
> --
> 2.34.1
>

Regards,
Simon
Tobias Waldekranz Feb. 3, 2023, 9:38 a.m. UTC | #2
On ons, feb 01, 2023 at 13:20, Simon Glass <sjg@chromium.org> wrote:
> Hi Tobias,

Hi Simon,

Thanks for the review!

> On Wed, 1 Feb 2023 at 11:10, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>>
>> blkmaps are loosely modeled on Linux's device mapper subsystem. The
>> basic idea is that you can create virtual block devices whose blocks
>> can be backed by a plethora of sources that are user configurable.
>>
>> This change just adds the basic infrastructure for creating and
>> removing blkmap devices. Subsequent changes will extend this to add
>> support for actual mappings.
>>
>> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
>> ---
>>  MAINTAINERS                      |   6 +
>>  disk/part.c                      |   1 +
>>  drivers/block/Kconfig            |  18 ++
>>  drivers/block/Makefile           |   1 +
>>  drivers/block/blk-uclass.c       |   1 +
>>  drivers/block/blkmap.c           | 275 +++++++++++++++++++++++++++++++
>>  include/blkmap.h                 |  15 ++
>>  include/dm/uclass-id.h           |   1 +
>>  include/efi_loader.h             |   4 +
>>  lib/efi_loader/efi_device_path.c |  30 ++++
>>  10 files changed, 352 insertions(+)
>>  create mode 100644 drivers/block/blkmap.c
>>  create mode 100644 include/blkmap.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 3e8e193ecc..28a34231bf 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -786,6 +786,12 @@ M: Alper Nebi Yasak <alpernebiyasak@gmail.com>
>>  S:     Maintained
>>  F:     tools/binman/
>>
>> +BLKMAP
>> +M:     Tobias Waldekranz <tobias@waldekranz.com>
>> +S:     Maintained
>> +F:     drivers/block/blkmap.c
>> +F:     include/blkmap.h
>> +
>>  BOOTDEVICE
>>  M:     Simon Glass <sjg@chromium.org>
>>  S:     Maintained
>> diff --git a/disk/part.c b/disk/part.c
>> index d449635254..35300df590 100644
>> --- a/disk/part.c
>> +++ b/disk/part.c
>> @@ -140,6 +140,7 @@ void dev_print(struct blk_desc *dev_desc)
>>         case UCLASS_NVME:
>>         case UCLASS_PVBLOCK:
>>         case UCLASS_HOST:
>> +       case UCLASS_BLKMAP:
>>                 printf ("Vendor: %s Rev: %s Prod: %s\n",
>>                         dev_desc->vendor,
>>                         dev_desc->revision,
>> diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
>> index e95da48bdc..5a1aeb3d2b 100644
>> --- a/drivers/block/Kconfig
>> +++ b/drivers/block/Kconfig
>> @@ -67,6 +67,24 @@ config BLOCK_CACHE
>>           it will prevent repeated reads from directory structures and other
>>           filesystem data structures.
>>
>> +config BLKMAP
>> +       bool "Composable virtual block devices (blkmap)"
>> +       depends on BLK
>> +       help
>> +         Create virtual block devices that are backed by various sources,
>> +         e.g. RAM, or parts of an existing block device. Though much more
>> +         rudimentary, it borrows a lot of ideas from Linux's device mapper
>> +         subsystem.
>> +
>> +         Example use-cases:
>> +         - Treat a region of RAM as a block device, i.e. a RAM disk. This let's
>> +            you extract files from filesystem images stored in RAM (perhaps as a
>> +            result of a TFTP transfer).
>> +         - Create a virtual partition on an existing device. This let's you
>> +            access filesystems that aren't stored at an exact partition
>> +            boundary. A common example is a filesystem image embedded in an FIT
>> +            image.
>> +
>>  config SPL_BLOCK_CACHE
>>         bool "Use block device cache in SPL"
>>         depends on SPL_BLK
>> diff --git a/drivers/block/Makefile b/drivers/block/Makefile
>> index f12447d78d..a161d145fd 100644
>> --- a/drivers/block/Makefile
>> +++ b/drivers/block/Makefile
>> @@ -14,6 +14,7 @@ obj-$(CONFIG_IDE) += ide.o
>>  endif
>>  obj-$(CONFIG_SANDBOX) += sandbox.o host-uclass.o host_dev.o
>>  obj-$(CONFIG_$(SPL_TPL_)BLOCK_CACHE) += blkcache.o
>> +obj-$(CONFIG_BLKMAP) += blkmap.o
>>
>>  obj-$(CONFIG_EFI_MEDIA) += efi-media-uclass.o
>>  obj-$(CONFIG_EFI_MEDIA_SANDBOX) += sb_efi_media.o
>> diff --git a/drivers/block/blk-uclass.c b/drivers/block/blk-uclass.c
>> index c69fc4d518..cb73faaeda 100644
>> --- a/drivers/block/blk-uclass.c
>> +++ b/drivers/block/blk-uclass.c
>> @@ -32,6 +32,7 @@ static struct {
>>         { UCLASS_EFI_LOADER, "efiloader" },
>>         { UCLASS_VIRTIO, "virtio" },
>>         { UCLASS_PVBLOCK, "pvblock" },
>> +       { UCLASS_BLKMAP, "blkmap" },
>>  };
>>
>>  static enum uclass_id uclass_name_to_iftype(const char *uclass_idname)
>> diff --git a/drivers/block/blkmap.c b/drivers/block/blkmap.c
>> new file mode 100644
>> index 0000000000..a6ba07404c
>> --- /dev/null
>> +++ b/drivers/block/blkmap.c
>> @@ -0,0 +1,275 @@
>> +// SPDX-License-Identifier: GPL-2.0+
>> +/*
>> + * Copyright (c) 2023 Addiva Elektronik
>> + * Author: Tobias Waldekranz <tobias@waldekranz.com>
>> + */
>> +
>> +#include <common.h>
>> +#include <blk.h>
>> +#include <blkmap.h>
>> +#include <dm.h>
>> +#include <dm/device-internal.h>
>> +#include <dm/lists.h>
>> +#include <dm/root.h>
>
> The three above should go at the end:
>
> https://u-boot.readthedocs.io/en/latest/develop/codingstyle.html#include-files

I should've read more carefully, sorry. Fixing.

>> +#include <malloc.h>
>> +#include <part.h>
>> +
>> +struct blkmap;
>> +
>> +struct blkmap_slice {
>> +       struct list_head node;
>> +
>> +       lbaint_t blknr;
>> +       lbaint_t blkcnt;
>> +
>> +       ulong (*read)(struct blkmap *bm, struct blkmap_slice *bms,
>> +                     lbaint_t blknr, lbaint_t blkcnt, void *buffer);
>> +       ulong (*write)(struct blkmap *bm, struct blkmap_slice *bms,
>> +                      lbaint_t blknr, lbaint_t blkcnt, const void *buffer);
>> +       void (*destroy)(struct blkmap *bm, struct blkmap_slice *bms);
>> +};
>
> Please comment these fully in Sphinx style

Will do.

>> +
>> +struct blkmap {
>> +       struct udevice *dev;
>> +       struct list_head slices;
>> +};
>> +
>> +static bool blkmap_slice_contains(struct blkmap_slice *bms, lbaint_t blknr)
>> +{
>> +       return (blknr >= bms->blknr) && (blknr < (bms->blknr + bms->blkcnt));
>
> lots of brackets!
>
>> +}
>> +
>> +static bool blkmap_slice_available(struct blkmap *bm, struct blkmap_slice *new)
>> +{
>> +       struct blkmap_slice *bms;
>> +       lbaint_t first, last;
>> +
>> +       first = new->blknr;
>> +       last = new->blknr + new->blkcnt - 1;
>> +
>> +       list_for_each_entry(bms, &bm->slices, node) {
>> +               if (blkmap_slice_contains(bms, first) ||
>> +                   blkmap_slice_contains(bms, last) ||
>> +                   blkmap_slice_contains(new, bms->blknr) ||
>> +                   blkmap_slice_contains(new, bms->blknr + bms->blkcnt - 1))
>> +                       return false;
>> +       }
>> +
>> +       return true;
>> +}
>> +
>> +static struct blkmap *blkmap_from_devnum(int devnum)
>
> I don't really like the use of devnum everywhere. Can we instead limit
> the devnum stuff to the cmdline implementation, and use a struct
> udevice everywhere else?

Sure, I'll change it.

> The devum can be allocated when the UCLASS_BLK device is created.
>
>> +{
>> +       struct udevice *dev;
>> +       int err;
>> +
>> +       err = blk_find_device(UCLASS_BLKMAP, devnum, &dev);
>> +
>> +       return err ? NULL : dev_get_priv(dev);
>> +}
>> +
>> +static int blkmap_add(struct blkmap *bm, struct blkmap_slice *new)
>> +{
>> +       struct blk_desc *bd = dev_get_uclass_plat(bm->dev);
>> +       struct list_head *insert = &bm->slices;
>> +       struct blkmap_slice *bms;
>> +
>> +       if (!blkmap_slice_available(bm, new))
>> +               return -EBUSY;
>> +
>> +       list_for_each_entry(bms, &bm->slices, node) {
>> +               if (bms->blknr < new->blknr)
>> +                       continue;
>> +
>> +               insert = &bms->node;
>> +               break;
>> +       }
>> +
>> +       list_add_tail(&new->node, insert);
>> +
>> +       /* Disk might have grown, update the size */
>> +       bms = list_last_entry(&bm->slices, struct blkmap_slice, node);
>> +       bd->lba = bms->blknr + bms->blkcnt;
>> +       return 0;
>> +}
>> +
>> +static struct udevice *blkmap_root(void)
>
> This needs to be created as part of DM.  See how host_create_device()
> works. It attaches something to the uclass and then creates child
> devices from there. It also operations (struct host_ops) but you don't
> need to do that.
>
> Note that the host commands support either an label or a devnum, which
> I think is useful, so you might copy that?
>

I took a look at the hostfs implementation. I agree that labels are much
nicer than bare integers. However, for block maps the idea is to fit in
to the existing filesystem infrastructure. Addressing block devices
using the "<interface> <dev>[:<part>]" pattern seems very well
established...

>> +{
>> +       static struct udevice *dev;
>> +       int err;
>> +
>> +       if (dev)
>> +               return dev;
>> +
>> +       err = device_bind_driver(dm_root(), "blkmap_root", "blkmap", &dev);
>> +       if (err)
>> +               return NULL;
>> +
>> +       err = device_probe(dev);
>> +       if (err) {
>> +               device_unbind(dev);
>> +               return NULL;
>> +       }
>
> Should not be needed as probing children will cause this to be probed.
>
> So this function just becomes
>
> uclass_first_device(UCLASS_BLKDEV, &
>
>> +
>> +       return dev;
>> +}
>> +
>> +int blkmap_create(int devnum)
>
> Again, please drop the use of devnum and use devices. Here you could
> use a label, perhaps?

...which is why I don't think a label is going to fly here. Let's say I
create a new ramdisk with a label instead, e.g.:

    blkmap create rd
    blkmap map rd 0 0x100 mem ${loadaddr}

How do I know which <dev> to supply to, e.g.:

    ls blkmap <dev> /boot

It seems like labels are a hostfs-specific feature, or am I missing
something?

>> +{
>> +       struct udevice *root;
>
> Please don't use that name , as we use it for the DM root device. How
> about bm_parent? It isn't actually a tree of devices, so root doesn't
> sound right to me anyway.

Alright, I'll change it.

>> +       struct blk_desc *bd;
>> +       struct blkmap *bm;
>> +       int err;
>> +
>> +       if (devnum >= 0 && blkmap_from_devnum(devnum))
>> +               return -EBUSY;
>> +
>> +       root = blkmap_root();
>> +       if (!root)
>> +               return -ENODEV;
>> +
>> +       bm = calloc(1, sizeof(*bm));
>
> Can this be attached to the device as private data, so avoiding the malloc()?

Maybe, I'm not familiar enough with the U-Boot internals.

As it is now, all blkmaps are children of a single "blkmap_root"
device. I chose that approach so that devnums would be allocated from a
single pool.

AFAIK, that would mean having to store it in the "blkmap_blk" device,
but I thought that its private data was owned by the block subsystem?

>> +       if (!bm)
>> +               return -ENOMEM;
>> +
>> +       err = blk_create_devicef(root, "blkmap_blk", "blk", UCLASS_BLKMAP,
>> +                                devnum, 512, 0, &bm->dev);
>> +       if (err)
>> +               goto err_free;
>> +
>> +       bd = dev_get_uclass_plat(bm->dev);
>> +
>> +       /* EFI core isn't keen on zero-sized disks, so we lie. This is
>> +        * updated with the correct size once the user adds a
>> +        * mapping.
>> +        */
>> +       bd->lba = 1;
>
> if (CONFIG_IS_ENABLED(EFI_LOADER))
>
> ?

Right.

>> +
>> +       dev_set_priv(bm->dev, bm);
>> +       INIT_LIST_HEAD(&bm->slices);
>> +
>> +       err = blk_probe_or_unbind(bm->dev);
>> +       if (err)
>> +               goto err_remove;
>> +
>> +       return bd->devnum;
>> +
>> +err_remove:
>> +       device_remove(bm->dev, DM_REMOVE_NORMAL);
>> +err_free:
>> +       free(bm);
>> +       return err;
>> +}
>> +
>> +int blkmap_destroy(int devnum)
>
> label
>
>> +{
>> +       struct blkmap_slice *bms, *tmp;
>> +       struct blkmap *bm;
>> +       int err;
>> +
>> +       bm = blkmap_from_devnum(devnum);
>> +       if (!bm)
>> +               return -ENODEV;
>> +
>> +       err = device_remove(bm->dev, DM_REMOVE_NORMAL);
>> +       if (err)
>> +               return err;
>> +
>> +       err = device_unbind(bm->dev);
>> +       if (err)
>> +               return err;
>> +
>> +       list_for_each_entry_safe(bms, tmp, &bm->slices, node) {
>> +               list_del(&bms->node);
>> +               free(bms);
>> +       }
>> +
>> +       free(bm);
>> +       return 0;
>> +}
>> +
>> +static ulong blkmap_read_slice(struct blkmap *bm, struct blkmap_slice *bms,
>> +                              lbaint_t blknr, lbaint_t blkcnt, void *buffer)
>> +{
>> +       lbaint_t nr, cnt;
>> +
>> +       nr = blknr - bms->blknr;
>> +       cnt = (blkcnt < bms->blkcnt) ? blkcnt : bms->blkcnt;
>> +       return bms->read(bm, bms, nr, cnt, buffer);
>> +}
>> +
>> +static ulong blkmap_read(struct udevice *dev, lbaint_t blknr, lbaint_t blkcnt,
>> +                        void *buffer)
>> +{
>> +       struct blk_desc *bd = dev_get_uclass_plat(dev);
>> +       struct blkmap *bm = dev_get_priv(dev);
>> +       struct blkmap_slice *bms;
>> +       lbaint_t cnt, total = 0;
>> +
>> +       list_for_each_entry(bms, &bm->slices, node) {
>> +               if (!blkmap_slice_contains(bms, blknr))
>> +                       continue;
>> +
>> +               cnt = blkmap_read_slice(bm, bms, blknr, blkcnt, buffer);
>> +               blknr += cnt;
>> +               blkcnt -= cnt;
>> +               buffer += cnt << bd->log2blksz;
>> +               total += cnt;
>> +       }
>> +
>> +       return total;
>> +}
>> +
>> +static ulong blkmap_write_slice(struct blkmap *bm, struct blkmap_slice *bms,
>> +                               lbaint_t blknr, lbaint_t blkcnt,
>> +                               const void *buffer)
>> +{
>> +       lbaint_t nr, cnt;
>> +
>> +       nr = blknr - bms->blknr;
>> +       cnt = (blkcnt < bms->blkcnt) ? blkcnt : bms->blkcnt;
>> +       return bms->write(bm, bms, nr, cnt, buffer);
>> +}
>> +
>> +static ulong blkmap_write(struct udevice *dev, lbaint_t blknr, lbaint_t blkcnt,
>> +                         const void *buffer)
>> +{
>> +       struct blk_desc *bd = dev_get_uclass_plat(dev);
>> +       struct blkmap *bm = dev_get_priv(dev);
>> +       struct blkmap_slice *bms;
>> +       lbaint_t cnt, total = 0;
>> +
>> +       list_for_each_entry(bms, &bm->slices, node) {
>> +               if (!blkmap_slice_contains(bms, blknr))
>> +                       continue;
>> +
>> +               cnt = blkmap_write_slice(bm, bms, blknr, blkcnt, buffer);
>> +               blknr += cnt;
>> +               blkcnt -= cnt;
>> +               buffer += cnt << bd->log2blksz;
>> +               total += cnt;
>> +       }
>> +
>> +       return total;
>> +}
>> +
>> +static const struct blk_ops blkmap_ops = {
>> +       .read   = blkmap_read,
>> +       .write  = blkmap_write,
>> +};
>> +
>> +U_BOOT_DRIVER(blkmap_blk) = {
>> +       .name           = "blkmap_blk",
>> +       .id             = UCLASS_BLK,
>> +       .ops            = &blkmap_ops,
>> +};
>> +
>> +U_BOOT_DRIVER(blkmap_root) = {
>> +       .name           = "blkmap_root",
>> +       .id             = UCLASS_BLKMAP,
>> +};
>> +
>> +UCLASS_DRIVER(blkmap) = {
>> +       .id             = UCLASS_BLKMAP,
>> +       .name           = "blkmap",
>> +};
>> diff --git a/include/blkmap.h b/include/blkmap.h
>> new file mode 100644
>> index 0000000000..37c0c31c3f
>> --- /dev/null
>> +++ b/include/blkmap.h
>> @@ -0,0 +1,15 @@
>> +/* SPDX-License-Identifier: GPL-2.0+ */
>> +/*
>> + * Copyright (c) 2023 Addiva Elektronik
>> + * Author: Tobias Waldekranz <tobias@waldekranz.com>
>> + */
>> +
>> +#ifndef _BLKMAP_H
>> +#define _BLKMAP_H
>> +
>> +#include <stdbool.h>
>> +
>> +int blkmap_create(int devnum);
>> +int blkmap_destroy(int devnum);
>
> full comments for exported functions

Fixing.

>> +
>> +#endif /* _BLKMAP_H */
>> diff --git a/include/dm/uclass-id.h b/include/dm/uclass-id.h
>> index 33e43c20db..576237b954 100644
>> --- a/include/dm/uclass-id.h
>> +++ b/include/dm/uclass-id.h
>> @@ -37,6 +37,7 @@ enum uclass_id {
>>         UCLASS_AUDIO_CODEC,     /* Audio codec with control and data path */
>>         UCLASS_AXI,             /* AXI bus */
>>         UCLASS_BLK,             /* Block device */
>> +       UCLASS_BLKMAP,          /* Composable virtual block device */
>>         UCLASS_BOOTCOUNT,       /* Bootcount backing store */
>>         UCLASS_BOOTDEV,         /* Boot device for locating an OS to boot */
>>         UCLASS_BOOTMETH,        /* Bootmethod for booting an OS */
>> diff --git a/include/efi_loader.h b/include/efi_loader.h
>> index 4560b0d04c..59687f44de 100644
>> --- a/include/efi_loader.h
>> +++ b/include/efi_loader.h
>> @@ -134,6 +134,10 @@ static inline efi_status_t efi_launch_capsules(void)
>>  #define U_BOOT_GUID \
>>         EFI_GUID(0xe61d73b9, 0xa384, 0x4acc, \
>>                  0xae, 0xab, 0x82, 0xe8, 0x28, 0xf3, 0x62, 0x8b)
>> +/* GUID used as root for blkmap devices */
>> +#define U_BOOT_BLKMAP_DEV_GUID \
>> +       EFI_GUID(0x4cad859d, 0xd644, 0x42ff,    \
>> +                0x87, 0x0b, 0xc0, 0x2e, 0xac, 0x05, 0x58, 0x63)
>>  /* GUID used as host device on sandbox */
>>  #define U_BOOT_HOST_DEV_GUID \
>>         EFI_GUID(0xbbe4e671, 0x5773, 0x4ea1, \
>> diff --git a/lib/efi_loader/efi_device_path.c b/lib/efi_loader/efi_device_path.c
>> index 3b267b713e..4b4c96bc2e 100644
>> --- a/lib/efi_loader/efi_device_path.c
>> +++ b/lib/efi_loader/efi_device_path.c
>
> Please put this EFI stuff in a separate patch.

Will do.

>> @@ -21,6 +21,9 @@
>>  #include <asm-generic/unaligned.h>
>>  #include <linux/compat.h> /* U16_MAX */
>>
>> +#ifdef CONFIG_BLKMAP
>> +const efi_guid_t efi_guid_blkmap_dev = U_BOOT_BLKMAP_DEV_GUID;
>> +#endif
>>  #ifdef CONFIG_SANDBOX
>>  const efi_guid_t efi_guid_host_dev = U_BOOT_HOST_DEV_GUID;
>>  #endif
>> @@ -573,6 +576,16 @@ __maybe_unused static unsigned int dp_size(struct udevice *dev)
>>                           */
>>                         return dp_size(dev->parent)
>>                                 + sizeof(struct efi_device_path_vendor) + 1;
>> +#endif
>> +#ifdef CONFIG_BLKMAP
>> +               case UCLASS_BLKMAP:
>> +                        /*
>> +                         * blkmap devices will be represented as a vendor
>> +                         * device node with an extra byte for the device
>> +                         * number.
>> +                         */
>> +                       return dp_size(dev->parent)
>> +                               + sizeof(struct efi_device_path_vendor) + 1;
>>  #endif
>>                 default:
>>                         return dp_size(dev->parent);
>> @@ -631,6 +644,23 @@ __maybe_unused static void *dp_fill(void *buf, struct udevice *dev)
>>  #endif
>>         case UCLASS_BLK:
>>                 switch (dev->parent->uclass->uc_drv->id) {
>> +#ifdef CONFIG_BLKMAP
>> +               case UCLASS_BLKMAP: {
>> +                       struct efi_device_path_vendor *dp;
>> +                       struct blk_desc *desc = dev_get_uclass_plat(dev);
>> +
>> +                       dp_fill(buf, dev->parent);
>> +                       dp = buf;
>> +                       ++dp;
>> +                       dp->dp.type = DEVICE_PATH_TYPE_HARDWARE_DEVICE;
>> +                       dp->dp.sub_type = DEVICE_PATH_SUB_TYPE_VENDOR;
>> +                       dp->dp.length = sizeof(*dp) + 1;
>> +                       memcpy(&dp->guid, &efi_guid_blkmap_dev,
>> +                              sizeof(efi_guid_t));
>> +                       dp->vendor_data[0] = desc->devnum;
>> +                       return &dp->vendor_data[1];
>> +                       }
>> +#endif
>>  #ifdef CONFIG_SANDBOX
>>                 case UCLASS_HOST: {
>>                         /* stop traversing parents at this point: */
>> --
>> 2.34.1
>>
>
> Regards,
> Simon
Simon Glass Feb. 4, 2023, 12:20 a.m. UTC | #3
Hi Tobias,

On Fri, 3 Feb 2023 at 02:38, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>
> On ons, feb 01, 2023 at 13:20, Simon Glass <sjg@chromium.org> wrote:
> > Hi Tobias,
>
> Hi Simon,
>
> Thanks for the review!
>
> > On Wed, 1 Feb 2023 at 11:10, Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >>
> >> blkmaps are loosely modeled on Linux's device mapper subsystem. The
> >> basic idea is that you can create virtual block devices whose blocks
> >> can be backed by a plethora of sources that are user configurable.
> >>
> >> This change just adds the basic infrastructure for creating and
> >> removing blkmap devices. Subsequent changes will extend this to add
> >> support for actual mappings.
> >>
> >> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> >> ---
> >>  MAINTAINERS                      |   6 +
> >>  disk/part.c                      |   1 +
> >>  drivers/block/Kconfig            |  18 ++
> >>  drivers/block/Makefile           |   1 +
> >>  drivers/block/blk-uclass.c       |   1 +
> >>  drivers/block/blkmap.c           | 275 +++++++++++++++++++++++++++++++
> >>  include/blkmap.h                 |  15 ++
> >>  include/dm/uclass-id.h           |   1 +
> >>  include/efi_loader.h             |   4 +
> >>  lib/efi_loader/efi_device_path.c |  30 ++++
> >>  10 files changed, 352 insertions(+)
> >>  create mode 100644 drivers/block/blkmap.c
> >>  create mode 100644 include/blkmap.h
> >>

[..]

> > This needs to be created as part of DM.  See how host_create_device()
> > works. It attaches something to the uclass and then creates child
> > devices from there. It also operations (struct host_ops) but you don't
> > need to do that.
> >
> > Note that the host commands support either an label or a devnum, which
> > I think is useful, so you might copy that?
> >
>
> I took a look at the hostfs implementation. I agree that labels are much
> nicer than bare integers. However, for block maps the idea is to fit in
> to the existing filesystem infrastructure. Addressing block devices
> using the "<interface> <dev>[:<part>]" pattern seems very well
> established...

You can still do that, so long as the labels are "0" and "1", etc. But
it lets us move to a more flexible system in future.

>
> >> +{
> >> +       static struct udevice *dev;
> >> +       int err;
> >> +
> >> +       if (dev)
> >> +               return dev;
> >> +
> >> +       err = device_bind_driver(dm_root(), "blkmap_root", "blkmap", &dev);
> >> +       if (err)
> >> +               return NULL;
> >> +
> >> +       err = device_probe(dev);
> >> +       if (err) {
> >> +               device_unbind(dev);
> >> +               return NULL;
> >> +       }
> >
> > Should not be needed as probing children will cause this to be probed.
> >
> > So this function just becomes
> >
> > uclass_first_device(UCLASS_BLKDEV, &
> >
> >> +
> >> +       return dev;
> >> +}
> >> +
> >> +int blkmap_create(int devnum)
> >
> > Again, please drop the use of devnum and use devices. Here you could
> > use a label, perhaps?
>
> ...which is why I don't think a label is going to fly here. Let's say I
> create a new ramdisk with a label instead, e.g.:
>
>     blkmap create rd
>     blkmap map rd 0 0x100 mem ${loadaddr}
>
> How do I know which <dev> to supply to, e.g.:
>
>     ls blkmap <dev> /boot
>
> It seems like labels are a hostfs-specific feature, or am I missing
> something?

We have the same problem with hostfs, since we have not implemented
labels in block devices. For now you must use integer labels. But we
will get there.

>
> >> +{
> >> +       struct udevice *root;
> >
> > Please don't use that name , as we use it for the DM root device. How
> > about bm_parent? It isn't actually a tree of devices, so root doesn't
> > sound right to me anyway.
>
> Alright, I'll change it.
>
> >> +       struct blk_desc *bd;
> >> +       struct blkmap *bm;
> >> +       int err;
> >> +
> >> +       if (devnum >= 0 && blkmap_from_devnum(devnum))
> >> +               return -EBUSY;
> >> +
> >> +       root = blkmap_root();
> >> +       if (!root)
> >> +               return -ENODEV;
> >> +
> >> +       bm = calloc(1, sizeof(*bm));
> >
> > Can this be attached to the device as private data, so avoiding the malloc()?
>
> Maybe, I'm not familiar enough with the U-Boot internals.
>
> As it is now, all blkmaps are children of a single "blkmap_root"
> device. I chose that approach so that devnums would be allocated from a
> single pool.

Well, driver model handles this for you (see dev_seq()). You have a
single uclass so can attach your 'overall' blkmap data to that. Then
each device can have its own private data attached.

The only requirement is that BLK devices have a parent (so we know the
media type). I had understood that you had one blkmap for each block
map. If that is true, then you don't need to have a parent one as
well. You can use the BLKMAP uclass to hold any overall data.

>
> AFAIK, that would mean having to store it in the "blkmap_blk" device,
> but I thought that its private data was owned by the block subsystem?

[..]

Regards,
Simon
Tobias Waldekranz Feb. 6, 2023, 8:30 a.m. UTC | #4
On fre, feb 03, 2023 at 17:20, Simon Glass <sjg@chromium.org> wrote:
> Hi Tobias,
>
> On Fri, 3 Feb 2023 at 02:38, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>>
>> On ons, feb 01, 2023 at 13:20, Simon Glass <sjg@chromium.org> wrote:
>> > Hi Tobias,
>>
>> Hi Simon,
>>
>> Thanks for the review!
>>
>> > On Wed, 1 Feb 2023 at 11:10, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>> >>
>> >> blkmaps are loosely modeled on Linux's device mapper subsystem. The
>> >> basic idea is that you can create virtual block devices whose blocks
>> >> can be backed by a plethora of sources that are user configurable.
>> >>
>> >> This change just adds the basic infrastructure for creating and
>> >> removing blkmap devices. Subsequent changes will extend this to add
>> >> support for actual mappings.
>> >>
>> >> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
>> >> ---
>> >>  MAINTAINERS                      |   6 +
>> >>  disk/part.c                      |   1 +
>> >>  drivers/block/Kconfig            |  18 ++
>> >>  drivers/block/Makefile           |   1 +
>> >>  drivers/block/blk-uclass.c       |   1 +
>> >>  drivers/block/blkmap.c           | 275 +++++++++++++++++++++++++++++++
>> >>  include/blkmap.h                 |  15 ++
>> >>  include/dm/uclass-id.h           |   1 +
>> >>  include/efi_loader.h             |   4 +
>> >>  lib/efi_loader/efi_device_path.c |  30 ++++
>> >>  10 files changed, 352 insertions(+)
>> >>  create mode 100644 drivers/block/blkmap.c
>> >>  create mode 100644 include/blkmap.h
>> >>
>
> [..]
>
>> > This needs to be created as part of DM.  See how host_create_device()
>> > works. It attaches something to the uclass and then creates child
>> > devices from there. It also operations (struct host_ops) but you don't
>> > need to do that.
>> >
>> > Note that the host commands support either an label or a devnum, which
>> > I think is useful, so you might copy that?
>> >
>>
>> I took a look at the hostfs implementation. I agree that labels are much
>> nicer than bare integers. However, for block maps the idea is to fit in
>> to the existing filesystem infrastructure. Addressing block devices
>> using the "<interface> <dev>[:<part>]" pattern seems very well
>> established...
>
> You can still do that, so long as the labels are "0" and "1", etc. But
> it lets us move to a more flexible system in future.
>
>>
>> >> +{
>> >> +       static struct udevice *dev;
>> >> +       int err;
>> >> +
>> >> +       if (dev)
>> >> +               return dev;
>> >> +
>> >> +       err = device_bind_driver(dm_root(), "blkmap_root", "blkmap", &dev);
>> >> +       if (err)
>> >> +               return NULL;
>> >> +
>> >> +       err = device_probe(dev);
>> >> +       if (err) {
>> >> +               device_unbind(dev);
>> >> +               return NULL;
>> >> +       }
>> >
>> > Should not be needed as probing children will cause this to be probed.
>> >
>> > So this function just becomes
>> >
>> > uclass_first_device(UCLASS_BLKDEV, &
>> >
>> >> +
>> >> +       return dev;
>> >> +}
>> >> +
>> >> +int blkmap_create(int devnum)
>> >
>> > Again, please drop the use of devnum and use devices. Here you could
>> > use a label, perhaps?
>>
>> ...which is why I don't think a label is going to fly here. Let's say I
>> create a new ramdisk with a label instead, e.g.:
>>
>>     blkmap create rd
>>     blkmap map rd 0 0x100 mem ${loadaddr}
>>
>> How do I know which <dev> to supply to, e.g.:
>>
>>     ls blkmap <dev> /boot
>>
>> It seems like labels are a hostfs-specific feature, or am I missing
>> something?
>
> We have the same problem with hostfs, since we have not implemented
> labels in block devices. For now you must use integer labels. But we
> will get there.

But there is no connection to the devnum that is allocated internally by
U-Boot. Here's an experiment I just ran:

I created two squashfs images containing a single directory:

    zero.squashfs:
     i_am_zero

    one.squashfs:
     i_am_one

Then I added a binding to them:

    => host bind 1 one.squashfs
    => host bind 0 zero.squashfs

When accessing them, we see that the existing filesystem utilities work
on the internally generated devnums, ignoring the labels:

    => ls host 1
                i_am_zero/

    0 file(s), 1 dir(s)

    => ls host 0
                i_am_one/

    0 file(s), 1 dir(s)

    =>

Doesn't it therefore make more sense to stick with the established
abstraction?

>>
>> >> +{
>> >> +       struct udevice *root;
>> >
>> > Please don't use that name , as we use it for the DM root device. How
>> > about bm_parent? It isn't actually a tree of devices, so root doesn't
>> > sound right to me anyway.
>>
>> Alright, I'll change it.
>>
>> >> +       struct blk_desc *bd;
>> >> +       struct blkmap *bm;
>> >> +       int err;
>> >> +
>> >> +       if (devnum >= 0 && blkmap_from_devnum(devnum))
>> >> +               return -EBUSY;
>> >> +
>> >> +       root = blkmap_root();
>> >> +       if (!root)
>> >> +               return -ENODEV;
>> >> +
>> >> +       bm = calloc(1, sizeof(*bm));
>> >
>> > Can this be attached to the device as private data, so avoiding the malloc()?
>>
>> Maybe, I'm not familiar enough with the U-Boot internals.
>>
>> As it is now, all blkmaps are children of a single "blkmap_root"
>> device. I chose that approach so that devnums would be allocated from a
>> single pool.
>
> Well, driver model handles this for you (see dev_seq()). You have a
> single uclass so can attach your 'overall' blkmap data to that. Then
> each device can have its own private data attached.
>
> The only requirement is that BLK devices have a parent (so we know the
> media type). I had understood that you had one blkmap for each block
> map. If that is true, then you don't need to have a parent one as
> well. You can use the BLKMAP uclass to hold any overall data.
>
>>
>> AFAIK, that would mean having to store it in the "blkmap_blk" device,
>> but I thought that its private data was owned by the block subsystem?
>
> [..]
>
> Regards,
> Simon
Simon Glass Feb. 7, 2023, 4:02 a.m. UTC | #5
Hi Tobias,

On Mon, 6 Feb 2023 at 01:30, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>
> On fre, feb 03, 2023 at 17:20, Simon Glass <sjg@chromium.org> wrote:
> > Hi Tobias,
> >
> > On Fri, 3 Feb 2023 at 02:38, Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >>
> >> On ons, feb 01, 2023 at 13:20, Simon Glass <sjg@chromium.org> wrote:
> >> > Hi Tobias,
> >>
> >> Hi Simon,
> >>
> >> Thanks for the review!
> >>
> >> > On Wed, 1 Feb 2023 at 11:10, Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >> >>
> >> >> blkmaps are loosely modeled on Linux's device mapper subsystem. The
> >> >> basic idea is that you can create virtual block devices whose blocks
> >> >> can be backed by a plethora of sources that are user configurable.
> >> >>
> >> >> This change just adds the basic infrastructure for creating and
> >> >> removing blkmap devices. Subsequent changes will extend this to add
> >> >> support for actual mappings.
> >> >>
> >> >> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> >> >> ---
> >> >>  MAINTAINERS                      |   6 +
> >> >>  disk/part.c                      |   1 +
> >> >>  drivers/block/Kconfig            |  18 ++
> >> >>  drivers/block/Makefile           |   1 +
> >> >>  drivers/block/blk-uclass.c       |   1 +
> >> >>  drivers/block/blkmap.c           | 275 +++++++++++++++++++++++++++++++
> >> >>  include/blkmap.h                 |  15 ++
> >> >>  include/dm/uclass-id.h           |   1 +
> >> >>  include/efi_loader.h             |   4 +
> >> >>  lib/efi_loader/efi_device_path.c |  30 ++++
> >> >>  10 files changed, 352 insertions(+)
> >> >>  create mode 100644 drivers/block/blkmap.c
> >> >>  create mode 100644 include/blkmap.h
> >> >>
> >
> > [..]
> >
> >> > This needs to be created as part of DM.  See how host_create_device()
> >> > works. It attaches something to the uclass and then creates child
> >> > devices from there. It also operations (struct host_ops) but you don't
> >> > need to do that.
> >> >
> >> > Note that the host commands support either an label or a devnum, which
> >> > I think is useful, so you might copy that?
> >> >
> >>
> >> I took a look at the hostfs implementation. I agree that labels are much
> >> nicer than bare integers. However, for block maps the idea is to fit in
> >> to the existing filesystem infrastructure. Addressing block devices
> >> using the "<interface> <dev>[:<part>]" pattern seems very well
> >> established...
> >
> > You can still do that, so long as the labels are "0" and "1", etc. But
> > it lets us move to a more flexible system in future.
> >
> >>
> >> >> +{
> >> >> +       static struct udevice *dev;
> >> >> +       int err;
> >> >> +
> >> >> +       if (dev)
> >> >> +               return dev;
> >> >> +
> >> >> +       err = device_bind_driver(dm_root(), "blkmap_root", "blkmap", &dev);
> >> >> +       if (err)
> >> >> +               return NULL;
> >> >> +
> >> >> +       err = device_probe(dev);
> >> >> +       if (err) {
> >> >> +               device_unbind(dev);
> >> >> +               return NULL;
> >> >> +       }
> >> >
> >> > Should not be needed as probing children will cause this to be probed.
> >> >
> >> > So this function just becomes
> >> >
> >> > uclass_first_device(UCLASS_BLKDEV, &
> >> >
> >> >> +
> >> >> +       return dev;
> >> >> +}
> >> >> +
> >> >> +int blkmap_create(int devnum)
> >> >
> >> > Again, please drop the use of devnum and use devices. Here you could
> >> > use a label, perhaps?
> >>
> >> ...which is why I don't think a label is going to fly here. Let's say I
> >> create a new ramdisk with a label instead, e.g.:
> >>
> >>     blkmap create rd
> >>     blkmap map rd 0 0x100 mem ${loadaddr}
> >>
> >> How do I know which <dev> to supply to, e.g.:
> >>
> >>     ls blkmap <dev> /boot
> >>
> >> It seems like labels are a hostfs-specific feature, or am I missing
> >> something?
> >
> > We have the same problem with hostfs, since we have not implemented
> > labels in block devices. For now you must use integer labels. But we
> > will get there.
>
> But there is no connection to the devnum that is allocated internally by
> U-Boot. Here's an experiment I just ran:
>
> I created two squashfs images containing a single directory:
>
>     zero.squashfs:
>      i_am_zero
>
>     one.squashfs:
>      i_am_one
>
> Then I added a binding to them:
>
>     => host bind 1 one.squashfs
>     => host bind 0 zero.squashfs
>
> When accessing them, we see that the existing filesystem utilities work
> on the internally generated devnums, ignoring the labels:
>
>     => ls host 1
>                 i_am_zero/
>
>     0 file(s), 1 dir(s)
>
>     => ls host 0
>                 i_am_one/
>
>     0 file(s), 1 dir(s)
>
>     =>
>
> Doesn't it therefore make more sense to stick with the established
> abstraction?

It is pretty clear that this is a bit silly though :-)

I mean, right now, it would be easier to stick with numbered devices.
But we want to be able to support named devices (e.g. using struct
udevice->name). A good way to be forward compatible is to support a
label today.

When we do get to it, the less code that has piled up and needs
converting, the more likely it is to happen.

Sure, you have the problem as above, but mostly people are only going
to use one of them, so it doesn't matter.

We could also have a way of obtaining a number from a label, if you
want to go that far. But I suggest just telling people to use labels
like "1" and "0" which should work.

[.]

Regards,
SImon
Tobias Waldekranz Feb. 7, 2023, 8:31 a.m. UTC | #6
On mån, feb 06, 2023 at 21:02, Simon Glass <sjg@chromium.org> wrote:
> Hi Tobias,
>
> On Mon, 6 Feb 2023 at 01:30, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>>
>> On fre, feb 03, 2023 at 17:20, Simon Glass <sjg@chromium.org> wrote:
>> > Hi Tobias,
>> >
>> > On Fri, 3 Feb 2023 at 02:38, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>> >>
>> >> On ons, feb 01, 2023 at 13:20, Simon Glass <sjg@chromium.org> wrote:
>> >> > Hi Tobias,
>> >>
>> >> Hi Simon,
>> >>
>> >> Thanks for the review!
>> >>
>> >> > On Wed, 1 Feb 2023 at 11:10, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>> >> >>
>> >> >> blkmaps are loosely modeled on Linux's device mapper subsystem. The
>> >> >> basic idea is that you can create virtual block devices whose blocks
>> >> >> can be backed by a plethora of sources that are user configurable.
>> >> >>
>> >> >> This change just adds the basic infrastructure for creating and
>> >> >> removing blkmap devices. Subsequent changes will extend this to add
>> >> >> support for actual mappings.
>> >> >>
>> >> >> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
>> >> >> ---
>> >> >>  MAINTAINERS                      |   6 +
>> >> >>  disk/part.c                      |   1 +
>> >> >>  drivers/block/Kconfig            |  18 ++
>> >> >>  drivers/block/Makefile           |   1 +
>> >> >>  drivers/block/blk-uclass.c       |   1 +
>> >> >>  drivers/block/blkmap.c           | 275 +++++++++++++++++++++++++++++++
>> >> >>  include/blkmap.h                 |  15 ++
>> >> >>  include/dm/uclass-id.h           |   1 +
>> >> >>  include/efi_loader.h             |   4 +
>> >> >>  lib/efi_loader/efi_device_path.c |  30 ++++
>> >> >>  10 files changed, 352 insertions(+)
>> >> >>  create mode 100644 drivers/block/blkmap.c
>> >> >>  create mode 100644 include/blkmap.h
>> >> >>
>> >
>> > [..]
>> >
>> >> > This needs to be created as part of DM.  See how host_create_device()
>> >> > works. It attaches something to the uclass and then creates child
>> >> > devices from there. It also operations (struct host_ops) but you don't
>> >> > need to do that.
>> >> >
>> >> > Note that the host commands support either an label or a devnum, which
>> >> > I think is useful, so you might copy that?
>> >> >
>> >>
>> >> I took a look at the hostfs implementation. I agree that labels are much
>> >> nicer than bare integers. However, for block maps the idea is to fit in
>> >> to the existing filesystem infrastructure. Addressing block devices
>> >> using the "<interface> <dev>[:<part>]" pattern seems very well
>> >> established...
>> >
>> > You can still do that, so long as the labels are "0" and "1", etc. But
>> > it lets us move to a more flexible system in future.
>> >
>> >>
>> >> >> +{
>> >> >> +       static struct udevice *dev;
>> >> >> +       int err;
>> >> >> +
>> >> >> +       if (dev)
>> >> >> +               return dev;
>> >> >> +
>> >> >> +       err = device_bind_driver(dm_root(), "blkmap_root", "blkmap", &dev);
>> >> >> +       if (err)
>> >> >> +               return NULL;
>> >> >> +
>> >> >> +       err = device_probe(dev);
>> >> >> +       if (err) {
>> >> >> +               device_unbind(dev);
>> >> >> +               return NULL;
>> >> >> +       }
>> >> >
>> >> > Should not be needed as probing children will cause this to be probed.
>> >> >
>> >> > So this function just becomes
>> >> >
>> >> > uclass_first_device(UCLASS_BLKDEV, &
>> >> >
>> >> >> +
>> >> >> +       return dev;
>> >> >> +}
>> >> >> +
>> >> >> +int blkmap_create(int devnum)
>> >> >
>> >> > Again, please drop the use of devnum and use devices. Here you could
>> >> > use a label, perhaps?
>> >>
>> >> ...which is why I don't think a label is going to fly here. Let's say I
>> >> create a new ramdisk with a label instead, e.g.:
>> >>
>> >>     blkmap create rd
>> >>     blkmap map rd 0 0x100 mem ${loadaddr}
>> >>
>> >> How do I know which <dev> to supply to, e.g.:
>> >>
>> >>     ls blkmap <dev> /boot
>> >>
>> >> It seems like labels are a hostfs-specific feature, or am I missing
>> >> something?
>> >
>> > We have the same problem with hostfs, since we have not implemented
>> > labels in block devices. For now you must use integer labels. But we
>> > will get there.
>>
>> But there is no connection to the devnum that is allocated internally by
>> U-Boot. Here's an experiment I just ran:
>>
>> I created two squashfs images containing a single directory:
>>
>>     zero.squashfs:
>>      i_am_zero
>>
>>     one.squashfs:
>>      i_am_one
>>
>> Then I added a binding to them:
>>
>>     => host bind 1 one.squashfs
>>     => host bind 0 zero.squashfs
>>
>> When accessing them, we see that the existing filesystem utilities work
>> on the internally generated devnums, ignoring the labels:
>>
>>     => ls host 1
>>                 i_am_zero/
>>
>>     0 file(s), 1 dir(s)
>>
>>     => ls host 0
>>                 i_am_one/
>>
>>     0 file(s), 1 dir(s)
>>
>>     =>
>>
>> Doesn't it therefore make more sense to stick with the established
>> abstraction?
>
> It is pretty clear that this is a bit silly though :-)

As in "this specific example will never be used in practice"? Sure :)

My point was just that the approach to stick to integer labels is
brittle, since there is no connection between the label and the devnum
used by existing commands.

Here's an even simpler example that might actually trip up a user:

    => host bind 1 one.squashfs
    => ls host 1
    ** Bad device specification host 1 **
    Couldn't find partition host 1
    => ls host 0
                i_am_one/

    0 file(s), 1 dir(s)

    =>

> I mean, right now, it would be easier to stick with numbered devices.
> But we want to be able to support named devices (e.g. using struct
> udevice->name). A good way to be forward compatible is to support a
> label today.
>
> When we do get to it, the less code that has piled up and needs
> converting, the more likely it is to happen.

I completely understand, and agree with, the direction you want to take
this.

> Sure, you have the problem as above, but mostly people are only going
> to use one of them, so it doesn't matter.
>
> We could also have a way of obtaining a number from a label, if you
> want to go that far. But I suggest just telling people to use labels
> like "1" and "0" which should work.

Alright, well, if that is acceptable then I'll do it that way. For my
own piece of mind, I think I'll also add some way of safely doing the
reverse mapping for any label. Does the following look ok?

    blkmap get <label> dev <var>

This way you could extend it with other attributes in the future
(e.g. size).
Simon Glass Feb. 7, 2023, 1:38 p.m. UTC | #7
Hi Tobias,

On Tue, 7 Feb 2023 at 01:31, Tobias Waldekranz <tobias@waldekranz.com> wrote:
>
> On mån, feb 06, 2023 at 21:02, Simon Glass <sjg@chromium.org> wrote:
> > Hi Tobias,
> >
> > On Mon, 6 Feb 2023 at 01:30, Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >>
> >> On fre, feb 03, 2023 at 17:20, Simon Glass <sjg@chromium.org> wrote:
> >> > Hi Tobias,
> >> >
> >> > On Fri, 3 Feb 2023 at 02:38, Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >> >>
> >> >> On ons, feb 01, 2023 at 13:20, Simon Glass <sjg@chromium.org> wrote:
> >> >> > Hi Tobias,
> >> >>
> >> >> Hi Simon,
> >> >>
> >> >> Thanks for the review!
> >> >>
> >> >> > On Wed, 1 Feb 2023 at 11:10, Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >> >> >>
> >> >> >> blkmaps are loosely modeled on Linux's device mapper subsystem. The
> >> >> >> basic idea is that you can create virtual block devices whose blocks
> >> >> >> can be backed by a plethora of sources that are user configurable.
> >> >> >>
> >> >> >> This change just adds the basic infrastructure for creating and
> >> >> >> removing blkmap devices. Subsequent changes will extend this to add
> >> >> >> support for actual mappings.
> >> >> >>
> >> >> >> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> >> >> >> ---
> >> >> >>  MAINTAINERS                      |   6 +
> >> >> >>  disk/part.c                      |   1 +
> >> >> >>  drivers/block/Kconfig            |  18 ++
> >> >> >>  drivers/block/Makefile           |   1 +
> >> >> >>  drivers/block/blk-uclass.c       |   1 +
> >> >> >>  drivers/block/blkmap.c           | 275 +++++++++++++++++++++++++++++++
> >> >> >>  include/blkmap.h                 |  15 ++
> >> >> >>  include/dm/uclass-id.h           |   1 +
> >> >> >>  include/efi_loader.h             |   4 +
> >> >> >>  lib/efi_loader/efi_device_path.c |  30 ++++
> >> >> >>  10 files changed, 352 insertions(+)
> >> >> >>  create mode 100644 drivers/block/blkmap.c
> >> >> >>  create mode 100644 include/blkmap.h
> >> >> >>
> >> >
> >> > [..]
> >> >
> >> >> > This needs to be created as part of DM.  See how host_create_device()
> >> >> > works. It attaches something to the uclass and then creates child
> >> >> > devices from there. It also operations (struct host_ops) but you don't
> >> >> > need to do that.
> >> >> >
> >> >> > Note that the host commands support either an label or a devnum, which
> >> >> > I think is useful, so you might copy that?
> >> >> >
> >> >>
> >> >> I took a look at the hostfs implementation. I agree that labels are much
> >> >> nicer than bare integers. However, for block maps the idea is to fit in
> >> >> to the existing filesystem infrastructure. Addressing block devices
> >> >> using the "<interface> <dev>[:<part>]" pattern seems very well
> >> >> established...
> >> >
> >> > You can still do that, so long as the labels are "0" and "1", etc. But
> >> > it lets us move to a more flexible system in future.
> >> >
> >> >>
> >> >> >> +{
> >> >> >> +       static struct udevice *dev;
> >> >> >> +       int err;
> >> >> >> +
> >> >> >> +       if (dev)
> >> >> >> +               return dev;
> >> >> >> +
> >> >> >> +       err = device_bind_driver(dm_root(), "blkmap_root", "blkmap", &dev);
> >> >> >> +       if (err)
> >> >> >> +               return NULL;
> >> >> >> +
> >> >> >> +       err = device_probe(dev);
> >> >> >> +       if (err) {
> >> >> >> +               device_unbind(dev);
> >> >> >> +               return NULL;
> >> >> >> +       }
> >> >> >
> >> >> > Should not be needed as probing children will cause this to be probed.
> >> >> >
> >> >> > So this function just becomes
> >> >> >
> >> >> > uclass_first_device(UCLASS_BLKDEV, &
> >> >> >
> >> >> >> +
> >> >> >> +       return dev;
> >> >> >> +}
> >> >> >> +
> >> >> >> +int blkmap_create(int devnum)
> >> >> >
> >> >> > Again, please drop the use of devnum and use devices. Here you could
> >> >> > use a label, perhaps?
> >> >>
> >> >> ...which is why I don't think a label is going to fly here. Let's say I
> >> >> create a new ramdisk with a label instead, e.g.:
> >> >>
> >> >>     blkmap create rd
> >> >>     blkmap map rd 0 0x100 mem ${loadaddr}
> >> >>
> >> >> How do I know which <dev> to supply to, e.g.:
> >> >>
> >> >>     ls blkmap <dev> /boot
> >> >>
> >> >> It seems like labels are a hostfs-specific feature, or am I missing
> >> >> something?
> >> >
> >> > We have the same problem with hostfs, since we have not implemented
> >> > labels in block devices. For now you must use integer labels. But we
> >> > will get there.
> >>
> >> But there is no connection to the devnum that is allocated internally by
> >> U-Boot. Here's an experiment I just ran:
> >>
> >> I created two squashfs images containing a single directory:
> >>
> >>     zero.squashfs:
> >>      i_am_zero
> >>
> >>     one.squashfs:
> >>      i_am_one
> >>
> >> Then I added a binding to them:
> >>
> >>     => host bind 1 one.squashfs
> >>     => host bind 0 zero.squashfs
> >>
> >> When accessing them, we see that the existing filesystem utilities work
> >> on the internally generated devnums, ignoring the labels:
> >>
> >>     => ls host 1
> >>                 i_am_zero/
> >>
> >>     0 file(s), 1 dir(s)
> >>
> >>     => ls host 0
> >>                 i_am_one/
> >>
> >>     0 file(s), 1 dir(s)
> >>
> >>     =>
> >>
> >> Doesn't it therefore make more sense to stick with the established
> >> abstraction?
> >
> > It is pretty clear that this is a bit silly though :-)
>
> As in "this specific example will never be used in practice"? Sure :)
>
> My point was just that the approach to stick to integer labels is
> brittle, since there is no connection between the label and the devnum
> used by existing commands.
>
> Here's an even simpler example that might actually trip up a user:
>
>     => host bind 1 one.squashfs
>     => ls host 1
>     ** Bad device specification host 1 **
>     Couldn't find partition host 1
>     => ls host 0
>                 i_am_one/
>
>     0 file(s), 1 dir(s)
>
>     =>

Yes, I get it. Perhaps this will spur us to look at device
naming...one of the impediments has been that we don't use CONFIG_BLK
in SPL on quite a few boards, so there are effectively two
implementations, one of which does not use driver model. So naming
doesn't exist in that case. It is hard to require driver model in SPL,
but perhaps at some point we can require it if block devices are
needed.

>
> > I mean, right now, it would be easier to stick with numbered devices.
> > But we want to be able to support named devices (e.g. using struct
> > udevice->name). A good way to be forward compatible is to support a
> > label today.
> >
> > When we do get to it, the less code that has piled up and needs
> > converting, the more likely it is to happen.
>
> I completely understand, and agree with, the direction you want to take
> this.
>
> > Sure, you have the problem as above, but mostly people are only going
> > to use one of them, so it doesn't matter.
> >
> > We could also have a way of obtaining a number from a label, if you
> > want to go that far. But I suggest just telling people to use labels
> > like "1" and "0" which should work.
>
> Alright, well, if that is acceptable then I'll do it that way. For my
> own piece of mind, I think I'll also add some way of safely doing the
> reverse mapping for any label. Does the following look ok?
>
>     blkmap get <label> dev <var>
>
> This way you could extend it with other attributes in the future
> (e.g. size).

Yes that sounds great. Perhaps we can (at some point) extend that sort
of thing to block devices in general.

Regards,
SImon
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 3e8e193ecc..28a34231bf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -786,6 +786,12 @@  M:	Alper Nebi Yasak <alpernebiyasak@gmail.com>
 S:	Maintained
 F:	tools/binman/
 
+BLKMAP
+M:	Tobias Waldekranz <tobias@waldekranz.com>
+S:	Maintained
+F:	drivers/block/blkmap.c
+F:	include/blkmap.h
+
 BOOTDEVICE
 M:	Simon Glass <sjg@chromium.org>
 S:	Maintained
diff --git a/disk/part.c b/disk/part.c
index d449635254..35300df590 100644
--- a/disk/part.c
+++ b/disk/part.c
@@ -140,6 +140,7 @@  void dev_print(struct blk_desc *dev_desc)
 	case UCLASS_NVME:
 	case UCLASS_PVBLOCK:
 	case UCLASS_HOST:
+	case UCLASS_BLKMAP:
 		printf ("Vendor: %s Rev: %s Prod: %s\n",
 			dev_desc->vendor,
 			dev_desc->revision,
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index e95da48bdc..5a1aeb3d2b 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -67,6 +67,24 @@  config BLOCK_CACHE
 	  it will prevent repeated reads from directory structures and other
 	  filesystem data structures.
 
+config BLKMAP
+	bool "Composable virtual block devices (blkmap)"
+	depends on BLK
+	help
+ 	  Create virtual block devices that are backed by various sources,
+ 	  e.g. RAM, or parts of an existing block device. Though much more
+ 	  rudimentary, it borrows a lot of ideas from Linux's device mapper
+ 	  subsystem.
+
+	  Example use-cases:
+	  - Treat a region of RAM as a block device, i.e. a RAM disk. This let's
+            you extract files from filesystem images stored in RAM (perhaps as a
+            result of a TFTP transfer).
+	  - Create a virtual partition on an existing device. This let's you
+            access filesystems that aren't stored at an exact partition
+            boundary. A common example is a filesystem image embedded in an FIT
+            image.
+
 config SPL_BLOCK_CACHE
 	bool "Use block device cache in SPL"
 	depends on SPL_BLK
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index f12447d78d..a161d145fd 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -14,6 +14,7 @@  obj-$(CONFIG_IDE) += ide.o
 endif
 obj-$(CONFIG_SANDBOX) += sandbox.o host-uclass.o host_dev.o
 obj-$(CONFIG_$(SPL_TPL_)BLOCK_CACHE) += blkcache.o
+obj-$(CONFIG_BLKMAP) += blkmap.o
 
 obj-$(CONFIG_EFI_MEDIA) += efi-media-uclass.o
 obj-$(CONFIG_EFI_MEDIA_SANDBOX) += sb_efi_media.o
diff --git a/drivers/block/blk-uclass.c b/drivers/block/blk-uclass.c
index c69fc4d518..cb73faaeda 100644
--- a/drivers/block/blk-uclass.c
+++ b/drivers/block/blk-uclass.c
@@ -32,6 +32,7 @@  static struct {
 	{ UCLASS_EFI_LOADER, "efiloader" },
 	{ UCLASS_VIRTIO, "virtio" },
 	{ UCLASS_PVBLOCK, "pvblock" },
+	{ UCLASS_BLKMAP, "blkmap" },
 };
 
 static enum uclass_id uclass_name_to_iftype(const char *uclass_idname)
diff --git a/drivers/block/blkmap.c b/drivers/block/blkmap.c
new file mode 100644
index 0000000000..a6ba07404c
--- /dev/null
+++ b/drivers/block/blkmap.c
@@ -0,0 +1,275 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2023 Addiva Elektronik
+ * Author: Tobias Waldekranz <tobias@waldekranz.com>
+ */
+
+#include <common.h>
+#include <blk.h>
+#include <blkmap.h>
+#include <dm.h>
+#include <dm/device-internal.h>
+#include <dm/lists.h>
+#include <dm/root.h>
+#include <malloc.h>
+#include <part.h>
+
+struct blkmap;
+
+struct blkmap_slice {
+	struct list_head node;
+
+	lbaint_t blknr;
+	lbaint_t blkcnt;
+
+	ulong (*read)(struct blkmap *bm, struct blkmap_slice *bms,
+		      lbaint_t blknr, lbaint_t blkcnt, void *buffer);
+	ulong (*write)(struct blkmap *bm, struct blkmap_slice *bms,
+		       lbaint_t blknr, lbaint_t blkcnt, const void *buffer);
+	void (*destroy)(struct blkmap *bm, struct blkmap_slice *bms);
+};
+
+struct blkmap {
+	struct udevice *dev;
+	struct list_head slices;
+};
+
+static bool blkmap_slice_contains(struct blkmap_slice *bms, lbaint_t blknr)
+{
+	return (blknr >= bms->blknr) && (blknr < (bms->blknr + bms->blkcnt));
+}
+
+static bool blkmap_slice_available(struct blkmap *bm, struct blkmap_slice *new)
+{
+	struct blkmap_slice *bms;
+	lbaint_t first, last;
+
+	first = new->blknr;
+	last = new->blknr + new->blkcnt - 1;
+
+	list_for_each_entry(bms, &bm->slices, node) {
+		if (blkmap_slice_contains(bms, first) ||
+		    blkmap_slice_contains(bms, last) ||
+		    blkmap_slice_contains(new, bms->blknr) ||
+		    blkmap_slice_contains(new, bms->blknr + bms->blkcnt - 1))
+			return false;
+	}
+
+	return true;
+}
+
+static struct blkmap *blkmap_from_devnum(int devnum)
+{
+	struct udevice *dev;
+	int err;
+
+	err = blk_find_device(UCLASS_BLKMAP, devnum, &dev);
+
+	return err ? NULL : dev_get_priv(dev);
+}
+
+static int blkmap_add(struct blkmap *bm, struct blkmap_slice *new)
+{
+	struct blk_desc *bd = dev_get_uclass_plat(bm->dev);
+	struct list_head *insert = &bm->slices;
+	struct blkmap_slice *bms;
+
+	if (!blkmap_slice_available(bm, new))
+		return -EBUSY;
+
+	list_for_each_entry(bms, &bm->slices, node) {
+		if (bms->blknr < new->blknr)
+			continue;
+
+		insert = &bms->node;
+		break;
+	}
+
+	list_add_tail(&new->node, insert);
+
+	/* Disk might have grown, update the size */
+	bms = list_last_entry(&bm->slices, struct blkmap_slice, node);
+	bd->lba = bms->blknr + bms->blkcnt;
+	return 0;
+}
+
+static struct udevice *blkmap_root(void)
+{
+	static struct udevice *dev;
+	int err;
+
+	if (dev)
+		return dev;
+
+	err = device_bind_driver(dm_root(), "blkmap_root", "blkmap", &dev);
+	if (err)
+		return NULL;
+
+	err = device_probe(dev);
+	if (err) {
+		device_unbind(dev);
+		return NULL;
+	}
+
+	return dev;
+}
+
+int blkmap_create(int devnum)
+{
+	struct udevice *root;
+	struct blk_desc *bd;
+	struct blkmap *bm;
+	int err;
+
+	if (devnum >= 0 && blkmap_from_devnum(devnum))
+		return -EBUSY;
+
+	root = blkmap_root();
+	if (!root)
+		return -ENODEV;
+
+	bm = calloc(1, sizeof(*bm));
+	if (!bm)
+		return -ENOMEM;
+
+	err = blk_create_devicef(root, "blkmap_blk", "blk", UCLASS_BLKMAP,
+				 devnum, 512, 0, &bm->dev);
+	if (err)
+		goto err_free;
+
+	bd = dev_get_uclass_plat(bm->dev);
+
+	/* EFI core isn't keen on zero-sized disks, so we lie. This is
+	 * updated with the correct size once the user adds a
+	 * mapping.
+	 */
+	bd->lba = 1;
+
+	dev_set_priv(bm->dev, bm);
+	INIT_LIST_HEAD(&bm->slices);
+
+	err = blk_probe_or_unbind(bm->dev);
+	if (err)
+		goto err_remove;
+
+	return bd->devnum;
+
+err_remove:
+	device_remove(bm->dev, DM_REMOVE_NORMAL);
+err_free:
+	free(bm);
+	return err;
+}
+
+int blkmap_destroy(int devnum)
+{
+	struct blkmap_slice *bms, *tmp;
+	struct blkmap *bm;
+	int err;
+
+	bm = blkmap_from_devnum(devnum);
+	if (!bm)
+		return -ENODEV;
+
+	err = device_remove(bm->dev, DM_REMOVE_NORMAL);
+	if (err)
+		return err;
+
+	err = device_unbind(bm->dev);
+	if (err)
+		return err;
+
+	list_for_each_entry_safe(bms, tmp, &bm->slices, node) {
+		list_del(&bms->node);
+		free(bms);
+	}
+
+	free(bm);
+	return 0;
+}
+
+static ulong blkmap_read_slice(struct blkmap *bm, struct blkmap_slice *bms,
+			       lbaint_t blknr, lbaint_t blkcnt, void *buffer)
+{
+	lbaint_t nr, cnt;
+
+	nr = blknr - bms->blknr;
+	cnt = (blkcnt < bms->blkcnt) ? blkcnt : bms->blkcnt;
+	return bms->read(bm, bms, nr, cnt, buffer);
+}
+
+static ulong blkmap_read(struct udevice *dev, lbaint_t blknr, lbaint_t blkcnt,
+			 void *buffer)
+{
+	struct blk_desc *bd = dev_get_uclass_plat(dev);
+	struct blkmap *bm = dev_get_priv(dev);
+	struct blkmap_slice *bms;
+	lbaint_t cnt, total = 0;
+
+	list_for_each_entry(bms, &bm->slices, node) {
+		if (!blkmap_slice_contains(bms, blknr))
+			continue;
+
+		cnt = blkmap_read_slice(bm, bms, blknr, blkcnt, buffer);
+		blknr += cnt;
+		blkcnt -= cnt;
+		buffer += cnt << bd->log2blksz;
+		total += cnt;
+	}
+
+	return total;
+}
+
+static ulong blkmap_write_slice(struct blkmap *bm, struct blkmap_slice *bms,
+				lbaint_t blknr, lbaint_t blkcnt,
+				const void *buffer)
+{
+	lbaint_t nr, cnt;
+
+	nr = blknr - bms->blknr;
+	cnt = (blkcnt < bms->blkcnt) ? blkcnt : bms->blkcnt;
+	return bms->write(bm, bms, nr, cnt, buffer);
+}
+
+static ulong blkmap_write(struct udevice *dev, lbaint_t blknr, lbaint_t blkcnt,
+			  const void *buffer)
+{
+	struct blk_desc *bd = dev_get_uclass_plat(dev);
+	struct blkmap *bm = dev_get_priv(dev);
+	struct blkmap_slice *bms;
+	lbaint_t cnt, total = 0;
+
+	list_for_each_entry(bms, &bm->slices, node) {
+		if (!blkmap_slice_contains(bms, blknr))
+			continue;
+
+		cnt = blkmap_write_slice(bm, bms, blknr, blkcnt, buffer);
+		blknr += cnt;
+		blkcnt -= cnt;
+		buffer += cnt << bd->log2blksz;
+		total += cnt;
+	}
+
+	return total;
+}
+
+static const struct blk_ops blkmap_ops = {
+	.read	= blkmap_read,
+	.write	= blkmap_write,
+};
+
+U_BOOT_DRIVER(blkmap_blk) = {
+	.name		= "blkmap_blk",
+	.id		= UCLASS_BLK,
+	.ops		= &blkmap_ops,
+};
+
+U_BOOT_DRIVER(blkmap_root) = {
+	.name		= "blkmap_root",
+	.id		= UCLASS_BLKMAP,
+};
+
+UCLASS_DRIVER(blkmap) = {
+	.id		= UCLASS_BLKMAP,
+	.name		= "blkmap",
+};
diff --git a/include/blkmap.h b/include/blkmap.h
new file mode 100644
index 0000000000..37c0c31c3f
--- /dev/null
+++ b/include/blkmap.h
@@ -0,0 +1,15 @@ 
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2023 Addiva Elektronik
+ * Author: Tobias Waldekranz <tobias@waldekranz.com>
+ */
+
+#ifndef _BLKMAP_H
+#define _BLKMAP_H
+
+#include <stdbool.h>
+
+int blkmap_create(int devnum);
+int blkmap_destroy(int devnum);
+
+#endif	/* _BLKMAP_H */
diff --git a/include/dm/uclass-id.h b/include/dm/uclass-id.h
index 33e43c20db..576237b954 100644
--- a/include/dm/uclass-id.h
+++ b/include/dm/uclass-id.h
@@ -37,6 +37,7 @@  enum uclass_id {
 	UCLASS_AUDIO_CODEC,	/* Audio codec with control and data path */
 	UCLASS_AXI,		/* AXI bus */
 	UCLASS_BLK,		/* Block device */
+	UCLASS_BLKMAP,		/* Composable virtual block device */
 	UCLASS_BOOTCOUNT,       /* Bootcount backing store */
 	UCLASS_BOOTDEV,		/* Boot device for locating an OS to boot */
 	UCLASS_BOOTMETH,	/* Bootmethod for booting an OS */
diff --git a/include/efi_loader.h b/include/efi_loader.h
index 4560b0d04c..59687f44de 100644
--- a/include/efi_loader.h
+++ b/include/efi_loader.h
@@ -134,6 +134,10 @@  static inline efi_status_t efi_launch_capsules(void)
 #define U_BOOT_GUID \
 	EFI_GUID(0xe61d73b9, 0xa384, 0x4acc, \
 		 0xae, 0xab, 0x82, 0xe8, 0x28, 0xf3, 0x62, 0x8b)
+/* GUID used as root for blkmap devices */
+#define U_BOOT_BLKMAP_DEV_GUID \
+	EFI_GUID(0x4cad859d, 0xd644, 0x42ff,	\
+		 0x87, 0x0b, 0xc0, 0x2e, 0xac, 0x05, 0x58, 0x63)
 /* GUID used as host device on sandbox */
 #define U_BOOT_HOST_DEV_GUID \
 	EFI_GUID(0xbbe4e671, 0x5773, 0x4ea1, \
diff --git a/lib/efi_loader/efi_device_path.c b/lib/efi_loader/efi_device_path.c
index 3b267b713e..4b4c96bc2e 100644
--- a/lib/efi_loader/efi_device_path.c
+++ b/lib/efi_loader/efi_device_path.c
@@ -21,6 +21,9 @@ 
 #include <asm-generic/unaligned.h>
 #include <linux/compat.h> /* U16_MAX */
 
+#ifdef CONFIG_BLKMAP
+const efi_guid_t efi_guid_blkmap_dev = U_BOOT_BLKMAP_DEV_GUID;
+#endif
 #ifdef CONFIG_SANDBOX
 const efi_guid_t efi_guid_host_dev = U_BOOT_HOST_DEV_GUID;
 #endif
@@ -573,6 +576,16 @@  __maybe_unused static unsigned int dp_size(struct udevice *dev)
 			  */
 			return dp_size(dev->parent)
 				+ sizeof(struct efi_device_path_vendor) + 1;
+#endif
+#ifdef CONFIG_BLKMAP
+		case UCLASS_BLKMAP:
+			 /*
+			  * blkmap devices will be represented as a vendor
+			  * device node with an extra byte for the device
+			  * number.
+			  */
+			return dp_size(dev->parent)
+				+ sizeof(struct efi_device_path_vendor) + 1;
 #endif
 		default:
 			return dp_size(dev->parent);
@@ -631,6 +644,23 @@  __maybe_unused static void *dp_fill(void *buf, struct udevice *dev)
 #endif
 	case UCLASS_BLK:
 		switch (dev->parent->uclass->uc_drv->id) {
+#ifdef CONFIG_BLKMAP
+		case UCLASS_BLKMAP: {
+			struct efi_device_path_vendor *dp;
+			struct blk_desc *desc = dev_get_uclass_plat(dev);
+
+			dp_fill(buf, dev->parent);
+			dp = buf;
+			++dp;
+			dp->dp.type = DEVICE_PATH_TYPE_HARDWARE_DEVICE;
+			dp->dp.sub_type = DEVICE_PATH_SUB_TYPE_VENDOR;
+			dp->dp.length = sizeof(*dp) + 1;
+			memcpy(&dp->guid, &efi_guid_blkmap_dev,
+			       sizeof(efi_guid_t));
+			dp->vendor_data[0] = desc->devnum;
+			return &dp->vendor_data[1];
+			}
+#endif
 #ifdef CONFIG_SANDBOX
 		case UCLASS_HOST: {
 			/* stop traversing parents at this point: */