diff mbox series

[1/2] handlers: diskpart_handler.c: add handler ifor script gptswap

Message ID 20220405131715.79763-1-philippe.reynes@softathome.com
State Changes Requested
Headers show
Series [1/2] handlers: diskpart_handler.c: add handler ifor script gptswap | expand

Commit Message

Philippe REYNES April 5, 2022, 1:17 p.m. UTC
Add a handler gptswap to swap gpt partition.
This handler is a script that can be used to swap gpt partition
names after all the image were flashed (only partition names are
swapped). This script may be usefull for a dual bank strategy.

For example:
scripts: (
	{
		type = "gptswap";
		device = "/dev/vdb";
		properties =
		{
			swap-0 = [ "u-boot-0" , "u-boot-1" ];
			swap-1 = [ "kernel-0" , "kernel-1" ];
		};
	},
);

Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
---
 doc/source/handlers.rst     |  24 ++++
 handlers/diskpart_handler.c | 244 ++++++++++++++++++++++++++++++++++++
 2 files changed, 268 insertions(+)

Comments

Stefano Babic April 5, 2022, 2:17 p.m. UTC | #1
Hi Philippe,

On 05.04.22 15:17, Philippe Reynes wrote:
> Add a handler gptswap to swap gpt partition.
> This handler is a script that can be used to swap gpt partition
> names after all the image were flashed (only partition names are
> swapped). This script may be usefull for a dual bank strategy.
> 
> For example:
> scripts: (
> 	{
> 		type = "gptswap";
> 		device = "/dev/vdb";
> 		properties =
> 		{
> 			swap-0 = [ "u-boot-0" , "u-boot-1" ];
> 			swap-1 = [ "kernel-0" , "kernel-1" ];
> 		};
> 	},
> );
> 

I would like first to discuss about the way to do, not the patch (I will 
review later). My concerns are related if this can be atomic.

The procedure you implement is something like:

- you read the partitions from disk
- you exchange the names of partitions according to the rule in 
sw-description. This happens in memory, no problem up now.

- call diskpart_write_table() and rewrite the whole table, as it should be.

At the end, the whole partition table is rewritten. Because of that, and 
also because the data should then written by the low level driver (for 
example a SDHC driver), I have the feeling that it is not atomic. We can 
also have two partitions with the same name after a power cut, or even a 
corrupted table. Or do you have some other thoughts ?

I have thought to implement this in a less invasive way, just setting a 
"bootflag" - we have not on GPT, but we can use an attribute 
(http://ftp.ntu.edu.tw/linux/utils/util-linux/v2.34/libfdisk-docs/libfdisk-UEFI-GPT.html#fdisk-gpt-get-partition-attrs), 
maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() with 
one "user" flag.

Can we rely on the mechanism you implemented ? Quite scared that the 
tables are rewritten at any toggle.

Regards,
Stefano

> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
> ---
>   doc/source/handlers.rst     |  24 ++++
>   handlers/diskpart_handler.c | 244 ++++++++++++++++++++++++++++++++++++
>   2 files changed, 268 insertions(+)
> 
> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
> index 8d2aeea..025d100 100644
> --- a/doc/source/handlers.rst
> +++ b/doc/source/handlers.rst
> @@ -943,6 +943,30 @@ MBR Example:
>   	   }
>   	}
>   
> +gpt partition swap
> +..................
> +
> +There is a handler gptswap that allow to swap gpt partitions after all the images were flashed.
> +This handler only swap the name of the partition. It coud be usefull for a dual bank strategy.
> +This handler is a script for the point of view of swupdate, so the node that provide it should
> +be added in the section scripts.
> +
> +Simple example:
> +
> +::
> +
> +        scripts: (
> +                {
> +                        type = "gptswap";
> +                        device = "/dev/vdb";
> +                        properties =
> +                        {
> +                                swap-0 = [ "u-boot-0" , "u-boot-1" ];
> +                                swap-1 = [ "kernel-0" , "kernel-1" ];
> +                        };
> +                },
> +         );
> +
>   Diskformat Handler
>   ------------------
>   
> diff --git a/handlers/diskpart_handler.c b/handlers/diskpart_handler.c
> index 0af4e60..0ff5968 100644
> --- a/handlers/diskpart_handler.c
> +++ b/handlers/diskpart_handler.c
> @@ -14,8 +14,10 @@
>   #include <errno.h>
>   #include <ctype.h>
>   #include <sys/file.h>
> +#include <sys/ioctl.h>
>   #include <sys/types.h>
>   #include <libfdisk/libfdisk.h>
> +#include <linux/fs.h>
>   #include <fs_interface.h>
>   #include <uuid/uuid.h>
>   #include "swupdate.h"
> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct fdisk_context *cxt, struct diskpart_ta
>   	return ret;
>   }
>   
> +static struct fdisk_partition *
> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table *tb, char *name)
> +{
> +	struct fdisk_partition *pa = NULL;
> +	struct fdisk_partition *ipa = NULL;
> +	struct fdisk_iter *itr;
> +	const char *iname;
> +
> +	if (!tb || !name)
> +		goto out;
> +
> +	itr = fdisk_new_iter(FDISK_ITER_FORWARD);
> +
> +	while (!fdisk_table_next_partition(tb, itr, &ipa)) {
> +		iname = fdisk_partition_get_name(ipa);
> +		if (iname && !strcmp(iname, name)) {
> +			pa = ipa;
> +			break;
> +		}
> +	}
> +
> +	fdisk_free_iter(itr);
> +
> + out:
> +	return pa;
> +}
> +
> +static struct fdisk_partition *
> +diskpart_get_partition_by_name(struct diskpart_table *tb, char *name)
> +{
> +	struct fdisk_partition *pa = NULL;
> +
> +	if (!tb || !name)
> +		goto out;
> +
> +	if (tb->parent)
> +		pa = diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
> +
> + out:
> +	return pa;
> +}
> +
> +static int diskpart_swap_partition(struct diskpart_table *tb,
> +				   struct create_table *createtable,
> +				   char *name1, char *name2)
> +{
> +	struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
> +	int ret = -1;
> +
> +	pa1 = diskpart_get_partition_by_name(tb, name1);
> +	if (!pa1) {
> +		ERROR("Can't find partition %s", name1);
> +		goto out;
> +	}
> +
> +	pa2 = diskpart_get_partition_by_name(tb, name2);
> +	if (!pa2) {
> +		ERROR("Can't find partition %s", name2);
> +		goto out;
> +	}
> +
> +	ret = fdisk_partition_set_name(pa1, name2);
> +	if (ret)
> +		goto out;
> +	ret = fdisk_partition_set_name(pa2, name1);
> +	if (ret)
> +		goto out;
> +
> +	createtable->parent = true;
> +
> + out:
> +	return ret;
> +}
> +
>   static int diskpart_set_partition(struct fdisk_partition *pa,
>   				  struct partition_data *part,
>   				  unsigned long sector_size,
> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct fdisk_context *cxt)
>   	fdisk_unref_context(cxt);
>   }
>   
> +static int diskpart_reread_partition(char *device)
> +{
> +	int fd, ret = -1;
> +
> +	fd = open(device, O_RDONLY);
> +	if (fd < 0) {
> +		ERROR("Device %s can't be opened", device);
> +		goto out;
> +	}
> +
> +	ret = ioctl(fd, BLKRRPART, NULL);
> +	if (ret < 0) {
> +		ERROR("Scan cannot be done on device %s", device);
> +		goto out_close;
> +	}
> +
> + out_close:
> +	close(fd);
> +
> + out:
> +	return ret;
> +}
> +
>   static int diskpart(struct img_type *img,
>   	void __attribute__ ((__unused__)) *data)
>   {
> @@ -1066,9 +1165,154 @@ handler_release:
>   	return ret;
>   }
>   
> +static int gpt_swap_partition(struct img_type *img, void *data)
> +{
> +	struct fdisk_context *cxt = NULL;
> +	struct diskpart_table *tb = NULL;
> +	int ret = 0;
> +	unsigned long hybrid = 0;
> +	struct hnd_priv priv =  {
> +		.labeltype = FDISK_DISKLABEL_DOS,
> +	};
> +	struct create_table *createtable = NULL;
> +	struct script_handler_data *script_data = data;
> +	char prop[SWUPDATE_GENERAL_STRING_SIZE];
> +	struct dict_list *partitions;
> +	struct dict_list_elem *partition;
> +	int num, count = 0;
> +	char *name[2];
> +
> +	if (!script_data)
> +		return -EINVAL;
> +
> +	/*
> +	 * Call only in case of postinstall
> +	 */
> +	if (script_data->scriptfn != POSTINSTALL)
> +		return 0;
> +
> +	LIST_INIT(&priv.listparts);
> +	if (!strlen(img->device)) {
> +		ERROR("Partition handler without setting the device");
> +		return -EINVAL;
> +	}
> +
> +	createtable = calloc(1, sizeof(*createtable));
> +	if (!createtable) {
> +		ERROR("OOM allocating createtable !");
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * Reads flags: nolock and noinuse
> +	 */
> +	priv.nolock = strtobool(dict_get_value(&img->properties, "nolock"));
> +	priv.noinuse = strtobool(dict_get_value(&img->properties, "noinuse"));
> +
> +        /*
> +         * Create context
> +         */
> +	ret = diskpart_assign_context(&cxt, img, priv, hybrid, createtable);
> +	if (ret == -EACCES)
> +		goto handler_release;
> +	else if (ret)
> +		goto handler_exit;
> +
> +	tb = calloc(1, sizeof(*tb));
> +	if (!tb) {
> +		ERROR("OOM loading partitions !");
> +		ret = -ENOMEM;
> +		goto handler_exit;
> +	}
> +
> +	/*
> +	 * Fill the in-memory partition table from the disk.
> +	 */
> +	ret = diskpart_get_partitions(cxt, tb, createtable);
> +	if (ret)
> +		goto handler_exit;
> +
> +	while (1) {
> +		snprintf(prop, sizeof(prop), "swap-%d", count);
> +		partitions = dict_get_list(&img->properties, prop);
> +		if (!partitions)
> +			break;
> +
> +		num = 0;
> +		LIST_FOREACH(partition, partitions, next) {
> +			if (num >= 2) {
> +				ERROR("Too many partition (%s)", prop);
> +				goto handler_exit;
> +			}
> +
> +			name[num] = partition->value;
> +			num++;
> +		}
> +
> +		if (num != 2) {
> +			ERROR("Invalid number (%d) of partition (%s)", num, prop);
> +			goto handler_exit;
> +		}
> +
> +		TRACE("swap partition %s <-> %s", name[0], name[1]);
> +
> +		ret = diskpart_swap_partition(tb, createtable, name[0], name[1]);
> +		if (ret) {
> +			ERROR("Can't swap %s and %s", name[0], name[1]);
> +			break;
> +		}
> +
> +		count++;
> +	}
> +
> +	/* Reload table for parent */
> +	ret = diskpart_reload_table(PARENT(cxt), tb->parent);
> +	if (ret) {
> +		ERROR("Can't reload table for parent (err = %d)", ret);
> +		goto handler_exit;
> +	}
> +
> +	/* Reload table for child */
> +	if (IS_HYBRID(cxt)) {
> +		ret = diskpart_reload_table(cxt, tb->child);
> +		if (ret) {
> +			ERROR("Can't reload table for child (err = %d)", ret);
> +			goto handler_exit;
> +		}
> +	}
> +
> +	/* Write table */
> +	ret = diskpart_write_table(cxt, createtable, priv.nolock, priv.noinuse);
> +	if (ret) {
> +		ERROR("Can't write table (err = %d)", ret);
> +		goto handler_exit;
> +	}
> +
> +handler_exit:
> +	if (tb)
> +		diskpart_unref_table(tb);
> +	if (cxt && fdisk_get_devfd(cxt) >= 0)
> +		if (fdisk_deassign_device(cxt, 0))
> +			WARN("Error deassign device %s", img->device);
> +
> +handler_release:
> +	if (cxt)
> +		diskpart_unref_context(cxt);
> +
> +	/*
> +	 * Re-read the partition table to be sure that SWupdate does not
> +	 * try to acces the partitions before the kernel is ready
> +	 */
> +	diskpart_reread_partition(img->device);
> +
> +	return ret;
> +}
> +
>   __attribute__((constructor))
>   void diskpart_handler(void)
>   {
>   	register_handler("diskpart", diskpart,
>   				PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
> +        register_handler("gptswap", gpt_swap_partition,
> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>   }
Philippe REYNES April 6, 2022, 3:30 p.m. UTC | #2
Hi Stefano,


Le 05/04/2022 à 16:17, Stefano Babic a écrit :
> Hi Philippe,
>
> On 05.04.22 15:17, Philippe Reynes wrote:
>> Add a handler gptswap to swap gpt partition.
>> This handler is a script that can be used to swap gpt partition
>> names after all the image were flashed (only partition names are
>> swapped). This script may be usefull for a dual bank strategy.
>>
>> For example:
>> scripts: (
>>     {
>>         type = "gptswap";
>>         device = "/dev/vdb";
>>         properties =
>>         {
>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>         };
>>     },
>> );
>>
>
> I would like first to discuss about the way to do, not the patch (I 
> will review later). My concerns are related if this can be atomic.
>
> The procedure you implement is something like:
>
> - you read the partitions from disk
> - you exchange the names of partitions according to the rule in 
> sw-description. This happens in memory, no problem up now.
>
> - call diskpart_write_table() and rewrite the whole table, as it 
> should be.
>
> At the end, the whole partition table is rewritten. Because of that, 
> and also because the data should then written by the low level driver 
> (for example a SDHC driver), I have the feeling that it is not atomic. 
> We can also have two partitions with the same name after a power cut, 
> or even a corrupted table. Or do you have some other thoughts ?
>
> I have thought to implement this in a less invasive way, just setting 
> a "bootflag" - we have not on GPT, but we can use an attribute 
> (https://fra01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fftp.ntu.edu.tw%2Flinux%2Futils%2Futil-linux%2Fv2.34%2Flibfdisk-docs%2Flibfdisk-UEFI-GPT.html%23fdisk-gpt-get-partition-attrs&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7Ced0439efd7c94fde4dd708da170ef58d%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637847650262696847%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=5Mboz3yvvY9ZQJ0VWb2O3HNsvplvjyP5kBrqgWksbPM%3D&amp;reserved=0), 
> maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() with 
> one "user" flag.
>

In my understanding, partitions flags are set in memory, and then the 
whole GPT partition is written to the disk.
So the behavior is the same if we swap the partition name or a partition 
flag.


I have looked the function gpt_write_disklabel (in util-linux) that is 
used to write the full GPT partition.

This comment describes how it happens:

     /*

      * UEFI requires writing in this specific order:

      *   1) backup partition tables

      *   2) backup GPT header

      *   3) primary partition tables

      *   4) primary GPT header

      *   5) protective MBR

      *

      * If any write fails, we abort the rest.

      */

When GPT is used on a disk, there are 5 elements:
- protective MBR (LBA 0)
- primary GPT table header (LBA 1)
- primary GPT table partition (LBA 2 -> 33)
- secondary GPT table partition  (LBA -2 -> -33)
- secondary GPT table header (LBA - 1)
And there are two CRCs in the GPT table header, one that protect the GPT 
table header, and one that protect the GPT table partition.

if step 1 or 2 fail, the secondary GPT is invalid and it can be 
recovered using the primary GPT (and the swap is not applied).
If step 3 or 4 fail, the primary GPT is invalid and it can be recovered 
using the secondary GPT (and the swap is applied).
If step 5 fails, I am afraid that protective MBR could be corrupted, so 
the GPT table may be invalid.

But, I think that we could avoid to write the protective MBR if there is 
already a protective MBR on the disk.
And as we talk about a script to swap GPT partition, a protective MBR 
should be already on the disk.


> Can we rely on the mechanism you implemented ? Quite scared that the 
> tables are rewritten at any toggle.
>

If we modify libfdisk to only write the MBR when it is usefull.
Before writting the MBR, libfdisk could read the MBR on the disk, and if 
the MBR on the disk is the same
that the MBR in memory, then the write is avoided.
I think that with this change, the swap could be quite safe.

> Regards,
> Stefano


Regards,

Philippe



>
>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>> ---
>>   doc/source/handlers.rst     |  24 ++++
>>   handlers/diskpart_handler.c | 244 ++++++++++++++++++++++++++++++++++++
>>   2 files changed, 268 insertions(+)
>>
>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>> index 8d2aeea..025d100 100644
>> --- a/doc/source/handlers.rst
>> +++ b/doc/source/handlers.rst
>> @@ -943,6 +943,30 @@ MBR Example:
>>          }
>>       }
>>   +gpt partition swap
>> +..................
>> +
>> +There is a handler gptswap that allow to swap gpt partitions after 
>> all the images were flashed.
>> +This handler only swap the name of the partition. It coud be usefull 
>> for a dual bank strategy.
>> +This handler is a script for the point of view of swupdate, so the 
>> node that provide it should
>> +be added in the section scripts.
>> +
>> +Simple example:
>> +
>> +::
>> +
>> +        scripts: (
>> +                {
>> +                        type = "gptswap";
>> +                        device = "/dev/vdb";
>> +                        properties =
>> +                        {
>> +                                swap-0 = [ "u-boot-0" , "u-boot-1" ];
>> +                                swap-1 = [ "kernel-0" , "kernel-1" ];
>> +                        };
>> +                },
>> +         );
>> +
>>   Diskformat Handler
>>   ------------------
>>   diff --git a/handlers/diskpart_handler.c b/handlers/diskpart_handler.c
>> index 0af4e60..0ff5968 100644
>> --- a/handlers/diskpart_handler.c
>> +++ b/handlers/diskpart_handler.c
>> @@ -14,8 +14,10 @@
>>   #include <errno.h>
>>   #include <ctype.h>
>>   #include <sys/file.h>
>> +#include <sys/ioctl.h>
>>   #include <sys/types.h>
>>   #include <libfdisk/libfdisk.h>
>> +#include <linux/fs.h>
>>   #include <fs_interface.h>
>>   #include <uuid/uuid.h>
>>   #include "swupdate.h"
>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>> fdisk_context *cxt, struct diskpart_ta
>>       return ret;
>>   }
>>   +static struct fdisk_partition *
>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table *tb, 
>> char *name)
>> +{
>> +    struct fdisk_partition *pa = NULL;
>> +    struct fdisk_partition *ipa = NULL;
>> +    struct fdisk_iter *itr;
>> +    const char *iname;
>> +
>> +    if (!tb || !name)
>> +        goto out;
>> +
>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>> +
>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>> +        iname = fdisk_partition_get_name(ipa);
>> +        if (iname && !strcmp(iname, name)) {
>> +            pa = ipa;
>> +            break;
>> +        }
>> +    }
>> +
>> +    fdisk_free_iter(itr);
>> +
>> + out:
>> +    return pa;
>> +}
>> +
>> +static struct fdisk_partition *
>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char *name)
>> +{
>> +    struct fdisk_partition *pa = NULL;
>> +
>> +    if (!tb || !name)
>> +        goto out;
>> +
>> +    if (tb->parent)
>> +        pa = diskpart_fdisk_table_get_partition_by_name(tb->parent, 
>> name);
>> +
>> + out:
>> +    return pa;
>> +}
>> +
>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>> +                   struct create_table *createtable,
>> +                   char *name1, char *name2)
>> +{
>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>> +    int ret = -1;
>> +
>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>> +    if (!pa1) {
>> +        ERROR("Can't find partition %s", name1);
>> +        goto out;
>> +    }
>> +
>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>> +    if (!pa2) {
>> +        ERROR("Can't find partition %s", name2);
>> +        goto out;
>> +    }
>> +
>> +    ret = fdisk_partition_set_name(pa1, name2);
>> +    if (ret)
>> +        goto out;
>> +    ret = fdisk_partition_set_name(pa2, name1);
>> +    if (ret)
>> +        goto out;
>> +
>> +    createtable->parent = true;
>> +
>> + out:
>> +    return ret;
>> +}
>> +
>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>                     struct partition_data *part,
>>                     unsigned long sector_size,
>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>> fdisk_context *cxt)
>>       fdisk_unref_context(cxt);
>>   }
>>   +static int diskpart_reread_partition(char *device)
>> +{
>> +    int fd, ret = -1;
>> +
>> +    fd = open(device, O_RDONLY);
>> +    if (fd < 0) {
>> +        ERROR("Device %s can't be opened", device);
>> +        goto out;
>> +    }
>> +
>> +    ret = ioctl(fd, BLKRRPART, NULL);
>> +    if (ret < 0) {
>> +        ERROR("Scan cannot be done on device %s", device);
>> +        goto out_close;
>> +    }
>> +
>> + out_close:
>> +    close(fd);
>> +
>> + out:
>> +    return ret;
>> +}
>> +
>>   static int diskpart(struct img_type *img,
>>       void __attribute__ ((__unused__)) *data)
>>   {
>> @@ -1066,9 +1165,154 @@ handler_release:
>>       return ret;
>>   }
>>   +static int gpt_swap_partition(struct img_type *img, void *data)
>> +{
>> +    struct fdisk_context *cxt = NULL;
>> +    struct diskpart_table *tb = NULL;
>> +    int ret = 0;
>> +    unsigned long hybrid = 0;
>> +    struct hnd_priv priv =  {
>> +        .labeltype = FDISK_DISKLABEL_DOS,
>> +    };
>> +    struct create_table *createtable = NULL;
>> +    struct script_handler_data *script_data = data;
>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>> +    struct dict_list *partitions;
>> +    struct dict_list_elem *partition;
>> +    int num, count = 0;
>> +    char *name[2];
>> +
>> +    if (!script_data)
>> +        return -EINVAL;
>> +
>> +    /*
>> +     * Call only in case of postinstall
>> +     */
>> +    if (script_data->scriptfn != POSTINSTALL)
>> +        return 0;
>> +
>> +    LIST_INIT(&priv.listparts);
>> +    if (!strlen(img->device)) {
>> +        ERROR("Partition handler without setting the device");
>> +        return -EINVAL;
>> +    }
>> +
>> +    createtable = calloc(1, sizeof(*createtable));
>> +    if (!createtable) {
>> +        ERROR("OOM allocating createtable !");
>> +        return -ENOMEM;
>> +    }
>> +
>> +    /*
>> +     * Reads flags: nolock and noinuse
>> +     */
>> +    priv.nolock = strtobool(dict_get_value(&img->properties, 
>> "nolock"));
>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>> "noinuse"));
>> +
>> +        /*
>> +         * Create context
>> +         */
>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, 
>> createtable);
>> +    if (ret == -EACCES)
>> +        goto handler_release;
>> +    else if (ret)
>> +        goto handler_exit;
>> +
>> +    tb = calloc(1, sizeof(*tb));
>> +    if (!tb) {
>> +        ERROR("OOM loading partitions !");
>> +        ret = -ENOMEM;
>> +        goto handler_exit;
>> +    }
>> +
>> +    /*
>> +     * Fill the in-memory partition table from the disk.
>> +     */
>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>> +    if (ret)
>> +        goto handler_exit;
>> +
>> +    while (1) {
>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>> +        partitions = dict_get_list(&img->properties, prop);
>> +        if (!partitions)
>> +            break;
>> +
>> +        num = 0;
>> +        LIST_FOREACH(partition, partitions, next) {
>> +            if (num >= 2) {
>> +                ERROR("Too many partition (%s)", prop);
>> +                goto handler_exit;
>> +            }
>> +
>> +            name[num] = partition->value;
>> +            num++;
>> +        }
>> +
>> +        if (num != 2) {
>> +            ERROR("Invalid number (%d) of partition (%s)", num, prop);
>> +            goto handler_exit;
>> +        }
>> +
>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>> +
>> +        ret = diskpart_swap_partition(tb, createtable, name[0], 
>> name[1]);
>> +        if (ret) {
>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>> +            break;
>> +        }
>> +
>> +        count++;
>> +    }
>> +
>> +    /* Reload table for parent */
>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>> +    if (ret) {
>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>> +        goto handler_exit;
>> +    }
>> +
>> +    /* Reload table for child */
>> +    if (IS_HYBRID(cxt)) {
>> +        ret = diskpart_reload_table(cxt, tb->child);
>> +        if (ret) {
>> +            ERROR("Can't reload table for child (err = %d)", ret);
>> +            goto handler_exit;
>> +        }
>> +    }
>> +
>> +    /* Write table */
>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>> priv.noinuse);
>> +    if (ret) {
>> +        ERROR("Can't write table (err = %d)", ret);
>> +        goto handler_exit;
>> +    }
>> +
>> +handler_exit:
>> +    if (tb)
>> +        diskpart_unref_table(tb);
>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>> +        if (fdisk_deassign_device(cxt, 0))
>> +            WARN("Error deassign device %s", img->device);
>> +
>> +handler_release:
>> +    if (cxt)
>> +        diskpart_unref_context(cxt);
>> +
>> +    /*
>> +     * Re-read the partition table to be sure that SWupdate does not
>> +     * try to acces the partitions before the kernel is ready
>> +     */
>> +    diskpart_reread_partition(img->device);
>> +
>> +    return ret;
>> +}
>> +
>>   __attribute__((constructor))
>>   void diskpart_handler(void)
>>   {
>>       register_handler("diskpart", diskpart,
>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>> +        register_handler("gptswap", gpt_swap_partition,
>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>   }
>
>
Stefano Babic April 6, 2022, 4:27 p.m. UTC | #3
Hi Philippe,

On 06.04.22 17:30, Philippe REYNES wrote:
> Hi Stefano,
> 
> 
> Le 05/04/2022 à 16:17, Stefano Babic a écrit :
>> Hi Philippe,
>>
>> On 05.04.22 15:17, Philippe Reynes wrote:
>>> Add a handler gptswap to swap gpt partition.
>>> This handler is a script that can be used to swap gpt partition
>>> names after all the image were flashed (only partition names are
>>> swapped). This script may be usefull for a dual bank strategy.
>>>
>>> For example:
>>> scripts: (
>>>     {
>>>         type = "gptswap";
>>>         device = "/dev/vdb";
>>>         properties =
>>>         {
>>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>>         };
>>>     },
>>> );
>>>
>>
>> I would like first to discuss about the way to do, not the patch (I 
>> will review later). My concerns are related if this can be atomic.
>>
>> The procedure you implement is something like:
>>
>> - you read the partitions from disk
>> - you exchange the names of partitions according to the rule in 
>> sw-description. This happens in memory, no problem up now.
>>
>> - call diskpart_write_table() and rewrite the whole table, as it 
>> should be.
>>
>> At the end, the whole partition table is rewritten. Because of that, 
>> and also because the data should then written by the low level driver 
>> (for example a SDHC driver), I have the feeling that it is not atomic. 
>> We can also have two partitions with the same name after a power cut, 
>> or even a corrupted table. Or do you have some other thoughts ?
>>
>> I have thought to implement this in a less invasive way, just setting 
>> a "bootflag" - we have not on GPT, but we can use an attribute 
>> (https://fra01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fftp.ntu.edu.tw%2Flinux%2Futils%2Futil-linux%2Fv2.34%2Flibfdisk-docs%2Flibfdisk-UEFI-GPT.html%23fdisk-gpt-get-partition-attrs&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7Ced0439efd7c94fde4dd708da170ef58d%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637847650262696847%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=5Mboz3yvvY9ZQJ0VWb2O3HNsvplvjyP5kBrqgWksbPM%3D&amp;reserved=0), 
>> maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() with 
>> one "user" flag.
>>
> 
> In my understanding, partitions flags are set in memory, and then the 
> whole GPT partition is written to the disk.

Right.

> So the behavior is the same if we swap the partition name or a partition 
> flag.

Ok, got it. So because we rewrite the whole GPT header, we recompute the 
CRC, and then it is not important if we change a string (=name) or a 
single bit (=flag). I think this is fine, so let's proceed.

> 
> 
> I have looked the function gpt_write_disklabel (in util-linux)

...yes, util-linux is the source of inspiration for most code around 
disks in SWUpdate....

> that is 
> used to write the full GPT partition.
> 
> This comment describes how it happens:
> 
>      /*
> 
>       * UEFI requires writing in this specific order:
> 
>       *   1) backup partition tables
> 
>       *   2) backup GPT header
> 
>       *   3) primary partition tables
> 
>       *   4) primary GPT header
> 
>       *   5) protective MBR
> 
>       *
> 
>       * If any write fails, we abort the rest.
> 
>       */
> 
> When GPT is used on a disk, there are 5 elements:
> - protective MBR (LBA 0)
> - primary GPT table header (LBA 1)
> - primary GPT table partition (LBA 2 -> 33)
> - secondary GPT table partition  (LBA -2 -> -33)
> - secondary GPT table header (LBA - 1)
> And there are two CRCs in the GPT table header, one that protect the GPT 
> table header, and one that protect the GPT table partition.

Ok

> 
> if step 1 or 2 fail, the secondary GPT is invalid and it can be 
> recovered using the primary GPT (and the swap is not applied).

More as one step fails, I am bothering what is happening in case of 
power-cut and if some of these steps are not atomic, and system does not 
boot or boot with the wrong partition.

My first concern is who is then in charge to repair : we have a working 
system with full GPT protection (both header are valid), we update and 
we have a power-cut. Good, backup header is corrupted, but we can boot 
again. But as far as I know, the kernel just mounts the partition and 
the GPT is not explicitely set or repaired, decreasing our reliability.

What do you think ?

> If step 3 or 4 fail, the primary GPT is invalid and it can be recovered 
> using the secondary GPT (and the swap is applied).

Ok - fine, with the exception that I do not know who will repair the 
primary GPT header.

> If step 5 fails, I am afraid that protective MBR could be corrupted, so 
> the GPT table may be invalid.

I just tested with a not very recent U-Boot, in any case a device with 
GPT table. Dropping protective MBR, U-Boot does not find any partition 
and device does not boot. So it looks like a no way here.

And rerunning SWUpdate as recovery, the diskpartitioner (at the end, 
libfdisk) reports that partitions are damaged and must be created.

> 
> But, I think that we could avoid to write the protective MBR if there is 
> already a protective MBR on the disk.

According to gpt_write_disklabel, this is done by forcing a hybrid MBR, 
but sure it is not what we want.

> And as we talk about a script to swap GPT partition, a protective MBR 
> should be already on the disk.

Sure, agree.

> 
> 
>> Can we rely on the mechanism you implemented ? Quite scared that the 
>> tables are rewritten at any toggle.
>>
> 
> If we modify libfdisk to only write the MBR when it is usefull.

Ok - the question is how to push this change mainline, and if this 
change can be mainlined. Surely a hack just for SWUpdate (in a meta 
layer) is not praticable, and not appliable for other systems (Debian 
for example).

This is not so simple, because struct fdisk_context is opaque. And 
context is the only parameter for gpt_write_disklabel().

> Before writting the MBR, libfdisk could read the MBR on the disk, and if 
> the MBR on the disk is the same
> that the MBR in memory, then the write is avoided.
> I think that with this change, the swap could be quite safe.

Point is how to push such as changes, that are useful probably just in 
our use cases....

Regards,
Stefano

> 
>> Regards,
>> Stefano
> 
> 
> Regards,
> 
> Philippe
> 
> 
> 
>>
>>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>>> ---
>>>   doc/source/handlers.rst     |  24 ++++
>>>   handlers/diskpart_handler.c | 244 ++++++++++++++++++++++++++++++++++++
>>>   2 files changed, 268 insertions(+)
>>>
>>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>>> index 8d2aeea..025d100 100644
>>> --- a/doc/source/handlers.rst
>>> +++ b/doc/source/handlers.rst
>>> @@ -943,6 +943,30 @@ MBR Example:
>>>          }
>>>       }
>>>   +gpt partition swap
>>> +..................
>>> +
>>> +There is a handler gptswap that allow to swap gpt partitions after 
>>> all the images were flashed.
>>> +This handler only swap the name of the partition. It coud be usefull 
>>> for a dual bank strategy.
>>> +This handler is a script for the point of view of swupdate, so the 
>>> node that provide it should
>>> +be added in the section scripts.
>>> +
>>> +Simple example:
>>> +
>>> +::
>>> +
>>> +        scripts: (
>>> +                {
>>> +                        type = "gptswap";
>>> +                        device = "/dev/vdb";
>>> +                        properties =
>>> +                        {
>>> +                                swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>> +                                swap-1 = [ "kernel-0" , "kernel-1" ];
>>> +                        };
>>> +                },
>>> +         );
>>> +
>>>   Diskformat Handler
>>>   ------------------
>>>   diff --git a/handlers/diskpart_handler.c b/handlers/diskpart_handler.c
>>> index 0af4e60..0ff5968 100644
>>> --- a/handlers/diskpart_handler.c
>>> +++ b/handlers/diskpart_handler.c
>>> @@ -14,8 +14,10 @@
>>>   #include <errno.h>
>>>   #include <ctype.h>
>>>   #include <sys/file.h>
>>> +#include <sys/ioctl.h>
>>>   #include <sys/types.h>
>>>   #include <libfdisk/libfdisk.h>
>>> +#include <linux/fs.h>
>>>   #include <fs_interface.h>
>>>   #include <uuid/uuid.h>
>>>   #include "swupdate.h"
>>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>>> fdisk_context *cxt, struct diskpart_ta
>>>       return ret;
>>>   }
>>>   +static struct fdisk_partition *
>>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table *tb, 
>>> char *name)
>>> +{
>>> +    struct fdisk_partition *pa = NULL;
>>> +    struct fdisk_partition *ipa = NULL;
>>> +    struct fdisk_iter *itr;
>>> +    const char *iname;
>>> +
>>> +    if (!tb || !name)
>>> +        goto out;
>>> +
>>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>>> +
>>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>>> +        iname = fdisk_partition_get_name(ipa);
>>> +        if (iname && !strcmp(iname, name)) {
>>> +            pa = ipa;
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    fdisk_free_iter(itr);
>>> +
>>> + out:
>>> +    return pa;
>>> +}
>>> +
>>> +static struct fdisk_partition *
>>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char *name)
>>> +{
>>> +    struct fdisk_partition *pa = NULL;
>>> +
>>> +    if (!tb || !name)
>>> +        goto out;
>>> +
>>> +    if (tb->parent)
>>> +        pa = diskpart_fdisk_table_get_partition_by_name(tb->parent, 
>>> name);
>>> +
>>> + out:
>>> +    return pa;
>>> +}
>>> +
>>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>>> +                   struct create_table *createtable,
>>> +                   char *name1, char *name2)
>>> +{
>>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>>> +    int ret = -1;
>>> +
>>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>>> +    if (!pa1) {
>>> +        ERROR("Can't find partition %s", name1);
>>> +        goto out;
>>> +    }
>>> +
>>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>>> +    if (!pa2) {
>>> +        ERROR("Can't find partition %s", name2);
>>> +        goto out;
>>> +    }
>>> +
>>> +    ret = fdisk_partition_set_name(pa1, name2);
>>> +    if (ret)
>>> +        goto out;
>>> +    ret = fdisk_partition_set_name(pa2, name1);
>>> +    if (ret)
>>> +        goto out;
>>> +
>>> +    createtable->parent = true;
>>> +
>>> + out:
>>> +    return ret;
>>> +}
>>> +
>>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>>                     struct partition_data *part,
>>>                     unsigned long sector_size,
>>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>>> fdisk_context *cxt)
>>>       fdisk_unref_context(cxt);
>>>   }
>>>   +static int diskpart_reread_partition(char *device)
>>> +{
>>> +    int fd, ret = -1;
>>> +
>>> +    fd = open(device, O_RDONLY);
>>> +    if (fd < 0) {
>>> +        ERROR("Device %s can't be opened", device);
>>> +        goto out;
>>> +    }
>>> +
>>> +    ret = ioctl(fd, BLKRRPART, NULL);
>>> +    if (ret < 0) {
>>> +        ERROR("Scan cannot be done on device %s", device);
>>> +        goto out_close;
>>> +    }
>>> +
>>> + out_close:
>>> +    close(fd);
>>> +
>>> + out:
>>> +    return ret;
>>> +}
>>> +
>>>   static int diskpart(struct img_type *img,
>>>       void __attribute__ ((__unused__)) *data)
>>>   {
>>> @@ -1066,9 +1165,154 @@ handler_release:
>>>       return ret;
>>>   }
>>>   +static int gpt_swap_partition(struct img_type *img, void *data)
>>> +{
>>> +    struct fdisk_context *cxt = NULL;
>>> +    struct diskpart_table *tb = NULL;
>>> +    int ret = 0;
>>> +    unsigned long hybrid = 0;
>>> +    struct hnd_priv priv =  {
>>> +        .labeltype = FDISK_DISKLABEL_DOS,
>>> +    };
>>> +    struct create_table *createtable = NULL;
>>> +    struct script_handler_data *script_data = data;
>>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>>> +    struct dict_list *partitions;
>>> +    struct dict_list_elem *partition;
>>> +    int num, count = 0;
>>> +    char *name[2];
>>> +
>>> +    if (!script_data)
>>> +        return -EINVAL;
>>> +
>>> +    /*
>>> +     * Call only in case of postinstall
>>> +     */
>>> +    if (script_data->scriptfn != POSTINSTALL)
>>> +        return 0;
>>> +
>>> +    LIST_INIT(&priv.listparts);
>>> +    if (!strlen(img->device)) {
>>> +        ERROR("Partition handler without setting the device");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    createtable = calloc(1, sizeof(*createtable));
>>> +    if (!createtable) {
>>> +        ERROR("OOM allocating createtable !");
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    /*
>>> +     * Reads flags: nolock and noinuse
>>> +     */
>>> +    priv.nolock = strtobool(dict_get_value(&img->properties, 
>>> "nolock"));
>>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>>> "noinuse"));
>>> +
>>> +        /*
>>> +         * Create context
>>> +         */
>>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, 
>>> createtable);
>>> +    if (ret == -EACCES)
>>> +        goto handler_release;
>>> +    else if (ret)
>>> +        goto handler_exit;
>>> +
>>> +    tb = calloc(1, sizeof(*tb));
>>> +    if (!tb) {
>>> +        ERROR("OOM loading partitions !");
>>> +        ret = -ENOMEM;
>>> +        goto handler_exit;
>>> +    }
>>> +
>>> +    /*
>>> +     * Fill the in-memory partition table from the disk.
>>> +     */
>>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>>> +    if (ret)
>>> +        goto handler_exit;
>>> +
>>> +    while (1) {
>>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>>> +        partitions = dict_get_list(&img->properties, prop);
>>> +        if (!partitions)
>>> +            break;
>>> +
>>> +        num = 0;
>>> +        LIST_FOREACH(partition, partitions, next) {
>>> +            if (num >= 2) {
>>> +                ERROR("Too many partition (%s)", prop);
>>> +                goto handler_exit;
>>> +            }
>>> +
>>> +            name[num] = partition->value;
>>> +            num++;
>>> +        }
>>> +
>>> +        if (num != 2) {
>>> +            ERROR("Invalid number (%d) of partition (%s)", num, prop);
>>> +            goto handler_exit;
>>> +        }
>>> +
>>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>>> +
>>> +        ret = diskpart_swap_partition(tb, createtable, name[0], 
>>> name[1]);
>>> +        if (ret) {
>>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>>> +            break;
>>> +        }
>>> +
>>> +        count++;
>>> +    }
>>> +
>>> +    /* Reload table for parent */
>>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>>> +    if (ret) {
>>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>>> +        goto handler_exit;
>>> +    }
>>> +
>>> +    /* Reload table for child */
>>> +    if (IS_HYBRID(cxt)) {
>>> +        ret = diskpart_reload_table(cxt, tb->child);
>>> +        if (ret) {
>>> +            ERROR("Can't reload table for child (err = %d)", ret);
>>> +            goto handler_exit;
>>> +        }
>>> +    }
>>> +
>>> +    /* Write table */
>>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>>> priv.noinuse);
>>> +    if (ret) {
>>> +        ERROR("Can't write table (err = %d)", ret);
>>> +        goto handler_exit;
>>> +    }
>>> +
>>> +handler_exit:
>>> +    if (tb)
>>> +        diskpart_unref_table(tb);
>>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>>> +        if (fdisk_deassign_device(cxt, 0))
>>> +            WARN("Error deassign device %s", img->device);
>>> +
>>> +handler_release:
>>> +    if (cxt)
>>> +        diskpart_unref_context(cxt);
>>> +
>>> +    /*
>>> +     * Re-read the partition table to be sure that SWupdate does not
>>> +     * try to acces the partitions before the kernel is ready
>>> +     */
>>> +    diskpart_reread_partition(img->device);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>   __attribute__((constructor))
>>>   void diskpart_handler(void)
>>>   {
>>>       register_handler("diskpart", diskpart,
>>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>>> +        register_handler("gptswap", gpt_swap_partition,
>>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>>   }
>>
>>
>
Philippe REYNES April 7, 2022, 1:15 p.m. UTC | #4
Hi Stefano,


Le 06/04/2022 à 18:27, Stefano Babic a écrit :
> Hi Philippe,
>
> On 06.04.22 17:30, Philippe REYNES wrote:
>> Hi Stefano,
>>
>>
>> Le 05/04/2022 à 16:17, Stefano Babic a écrit :
>>> Hi Philippe,
>>>
>>> On 05.04.22 15:17, Philippe Reynes wrote:
>>>> Add a handler gptswap to swap gpt partition.
>>>> This handler is a script that can be used to swap gpt partition
>>>> names after all the image were flashed (only partition names are
>>>> swapped). This script may be usefull for a dual bank strategy.
>>>>
>>>> For example:
>>>> scripts: (
>>>>     {
>>>>         type = "gptswap";
>>>>         device = "/dev/vdb";
>>>>         properties =
>>>>         {
>>>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>>>         };
>>>>     },
>>>> );
>>>>
>>>
>>> I would like first to discuss about the way to do, not the patch (I 
>>> will review later). My concerns are related if this can be atomic.
>>>
>>> The procedure you implement is something like:
>>>
>>> - you read the partitions from disk
>>> - you exchange the names of partitions according to the rule in 
>>> sw-description. This happens in memory, no problem up now.
>>>
>>> - call diskpart_write_table() and rewrite the whole table, as it 
>>> should be.
>>>
>>> At the end, the whole partition table is rewritten. Because of that, 
>>> and also because the data should then written by the low level 
>>> driver (for example a SDHC driver), I have the feeling that it is 
>>> not atomic. We can also have two partitions with the same name after 
>>> a power cut, or even a corrupted table. Or do you have some other 
>>> thoughts ?
>>>
>>> I have thought to implement this in a less invasive way, just 
>>> setting a "bootflag" - we have not on GPT, but we can use an 
>>> attribute 
>>> (https://fra01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fftp.ntu.edu.tw%2Flinux%2Futils%2Futil-linux%2Fv2.34%2Flibfdisk-docs%2Flibfdisk-UEFI-GPT.html%23fdisk-gpt-get-partition-attrs&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7C988784a0faec4c7f892208da17ea5966%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637848592538972432%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=FtkUO4BZcvmWiXMYTZfeLfr9S%2Bc57Rc%2F6nIYJ2SvQFQ%3D&amp;reserved=0), 
>>> maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() 
>>> with one "user" flag.
>>>
>>
>> In my understanding, partitions flags are set in memory, and then the 
>> whole GPT partition is written to the disk.
>
> Right.
>
>> So the behavior is the same if we swap the partition name or a 
>> partition flag.
>
> Ok, got it. So because we rewrite the whole GPT header, we recompute 
> the CRC, and then it is not important if we change a string (=name) or 
> a single bit (=flag). I think this is fine, so let's proceed.
>
>>
>>
>> I have looked the function gpt_write_disklabel (in util-linux)
>
> ...yes, util-linux is the source of inspiration for most code around 
> disks in SWUpdate....
>
>> that is used to write the full GPT partition.
>>
>> This comment describes how it happens:
>>
>>      /*
>>
>>       * UEFI requires writing in this specific order:
>>
>>       *   1) backup partition tables
>>
>>       *   2) backup GPT header
>>
>>       *   3) primary partition tables
>>
>>       *   4) primary GPT header
>>
>>       *   5) protective MBR
>>
>>       *
>>
>>       * If any write fails, we abort the rest.
>>
>>       */
>>
>> When GPT is used on a disk, there are 5 elements:
>> - protective MBR (LBA 0)
>> - primary GPT table header (LBA 1)
>> - primary GPT table partition (LBA 2 -> 33)
>> - secondary GPT table partition  (LBA -2 -> -33)
>> - secondary GPT table header (LBA - 1)
>> And there are two CRCs in the GPT table header, one that protect the 
>> GPT table header, and one that protect the GPT table partition.
>
> Ok
>
>>
>> if step 1 or 2 fail, the secondary GPT is invalid and it can be 
>> recovered using the primary GPT (and the swap is not applied).
>
> More as one step fails, I am bothering what is happening in case of 
> power-cut and if some of these steps are not atomic, and system does 
> not boot or boot with the wrong partition.
>
> My first concern is who is then in charge to repair : we have a 
> working system with full GPT protection (both header are valid), we 
> update and we have a power-cut. Good, backup header is corrupted, but 
> we can boot again. But as far as I know, the kernel just mounts the 
> partition and the GPT is not explicitely set or repaired, decreasing 
> our reliability.
>
> What do you think ?

I think that we should look for a way to automatically check and repair 
GPT partition at boot.
I don't found a way to do that. So may be we will need to provide a 
tool/patch to do this check and repair.

>
>> If step 3 or 4 fail, the primary GPT is invalid and it can be 
>> recovered using the secondary GPT (and the swap is applied).
>
> Ok - fine, with the exception that I do not know who will repair the 
> primary GPT header.
>
>> If step 5 fails, I am afraid that protective MBR could be corrupted, 
>> so the GPT table may be invalid.
>
> I just tested with a not very recent U-Boot, in any case a device with 
> GPT table. Dropping protective MBR, U-Boot does not find any partition 
> and device does not boot. So it looks like a no way here.
>
> And rerunning SWUpdate as recovery, the diskpartitioner (at the end, 
> libfdisk) reports that partitions are damaged and must be created.
>
>>
>> But, I think that we could avoid to write the protective MBR if there 
>> is already a protective MBR on the disk.
>
> According to gpt_write_disklabel, this is done by forcing a hybrid 
> MBR, but sure it is not what we want.

Yes, the MBR is not written if the mode if hybrib.
My idea is not to set the hybrid mode, but before writting a new MBR, 
read the MBR on the disk, and compare it with
the MRB that should be written. If they are the same, then the new MBR 
is not written on the disk.

>
>> And as we talk about a script to swap GPT partition, a protective MBR 
>> should be already on the disk.
>
> Sure, agree.
>
>>
>>
>>> Can we rely on the mechanism you implemented ? Quite scared that the 
>>> tables are rewritten at any toggle.
>>>
>>
>> If we modify libfdisk to only write the MBR when it is usefull.
>
> Ok - the question is how to push this change mainline, and if this 
> change can be mainlined. Surely a hack just for SWUpdate (in a meta 
> layer) is not praticable, and not appliable for other systems (Debian 
> for example).
>
> This is not so simple, because struct fdisk_context is opaque. And 
> context is the only parameter for gpt_write_disklabel().
>
>> Before writting the MBR, libfdisk could read the MBR on the disk, and 
>> if the MBR on the disk is the same
>> that the MBR in memory, then the write is avoided.
>> I think that with this change, the swap could be quite safe.
>
> Point is how to push such as changes, that are useful probably just in 
> our use cases....
>

I think that we should push a patch to the libfdisk community. If they 
accept the patch, we could continue in this
way, if they refuse the patch, we may need to consider another solution.
I prepare this patch and I send it to the libfdisk community ASAP.

If this patch is accepted, I think that we could add a feature that 
automatically check and repair GPT table partition.
I think it could be done in u-boot, in the kernel or in an userspace daemon.

I am interested to add this feature (of course, enabled by a config) in 
u-boot. The idea is quite simple,
when u-boot starts, before parsing the GPT table to find the partition 
for the kernel, u-boot checks and
if necessary, repairs the GPT table. If the primary GPT table is broken, 
the secondary GPT table is used
to repair the primary GPT table. And same, in the other way, primary GPT 
table is used to repair the
secondary GPT table. A question would be about MBR, if the MBR is 
invalid, but a GPT table is valid
(primary or secondary), should we write a protective MBR ?.


> Regards,
> Stefano
>
>>
>>> Regards,
>>> Stefano
>>
>>
>> Regards,
>>
>> Philippe
>>
>>

Regards,
Philippe



>>
>>>
>>>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>>>> ---
>>>>   doc/source/handlers.rst     |  24 ++++
>>>>   handlers/diskpart_handler.c | 244 
>>>> ++++++++++++++++++++++++++++++++++++
>>>>   2 files changed, 268 insertions(+)
>>>>
>>>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>>>> index 8d2aeea..025d100 100644
>>>> --- a/doc/source/handlers.rst
>>>> +++ b/doc/source/handlers.rst
>>>> @@ -943,6 +943,30 @@ MBR Example:
>>>>          }
>>>>       }
>>>>   +gpt partition swap
>>>> +..................
>>>> +
>>>> +There is a handler gptswap that allow to swap gpt partitions after 
>>>> all the images were flashed.
>>>> +This handler only swap the name of the partition. It coud be 
>>>> usefull for a dual bank strategy.
>>>> +This handler is a script for the point of view of swupdate, so the 
>>>> node that provide it should
>>>> +be added in the section scripts.
>>>> +
>>>> +Simple example:
>>>> +
>>>> +::
>>>> +
>>>> +        scripts: (
>>>> +                {
>>>> +                        type = "gptswap";
>>>> +                        device = "/dev/vdb";
>>>> +                        properties =
>>>> +                        {
>>>> +                                swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>> +                                swap-1 = [ "kernel-0" , "kernel-1" ];
>>>> +                        };
>>>> +                },
>>>> +         );
>>>> +
>>>>   Diskformat Handler
>>>>   ------------------
>>>>   diff --git a/handlers/diskpart_handler.c 
>>>> b/handlers/diskpart_handler.c
>>>> index 0af4e60..0ff5968 100644
>>>> --- a/handlers/diskpart_handler.c
>>>> +++ b/handlers/diskpart_handler.c
>>>> @@ -14,8 +14,10 @@
>>>>   #include <errno.h>
>>>>   #include <ctype.h>
>>>>   #include <sys/file.h>
>>>> +#include <sys/ioctl.h>
>>>>   #include <sys/types.h>
>>>>   #include <libfdisk/libfdisk.h>
>>>> +#include <linux/fs.h>
>>>>   #include <fs_interface.h>
>>>>   #include <uuid/uuid.h>
>>>>   #include "swupdate.h"
>>>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>>>> fdisk_context *cxt, struct diskpart_ta
>>>>       return ret;
>>>>   }
>>>>   +static struct fdisk_partition *
>>>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table *tb, 
>>>> char *name)
>>>> +{
>>>> +    struct fdisk_partition *pa = NULL;
>>>> +    struct fdisk_partition *ipa = NULL;
>>>> +    struct fdisk_iter *itr;
>>>> +    const char *iname;
>>>> +
>>>> +    if (!tb || !name)
>>>> +        goto out;
>>>> +
>>>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>>>> +
>>>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>>>> +        iname = fdisk_partition_get_name(ipa);
>>>> +        if (iname && !strcmp(iname, name)) {
>>>> +            pa = ipa;
>>>> +            break;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    fdisk_free_iter(itr);
>>>> +
>>>> + out:
>>>> +    return pa;
>>>> +}
>>>> +
>>>> +static struct fdisk_partition *
>>>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char *name)
>>>> +{
>>>> +    struct fdisk_partition *pa = NULL;
>>>> +
>>>> +    if (!tb || !name)
>>>> +        goto out;
>>>> +
>>>> +    if (tb->parent)
>>>> +        pa = 
>>>> diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
>>>> +
>>>> + out:
>>>> +    return pa;
>>>> +}
>>>> +
>>>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>>>> +                   struct create_table *createtable,
>>>> +                   char *name1, char *name2)
>>>> +{
>>>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>>>> +    int ret = -1;
>>>> +
>>>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>>>> +    if (!pa1) {
>>>> +        ERROR("Can't find partition %s", name1);
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>>>> +    if (!pa2) {
>>>> +        ERROR("Can't find partition %s", name2);
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    ret = fdisk_partition_set_name(pa1, name2);
>>>> +    if (ret)
>>>> +        goto out;
>>>> +    ret = fdisk_partition_set_name(pa2, name1);
>>>> +    if (ret)
>>>> +        goto out;
>>>> +
>>>> +    createtable->parent = true;
>>>> +
>>>> + out:
>>>> +    return ret;
>>>> +}
>>>> +
>>>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>>>                     struct partition_data *part,
>>>>                     unsigned long sector_size,
>>>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>>>> fdisk_context *cxt)
>>>>       fdisk_unref_context(cxt);
>>>>   }
>>>>   +static int diskpart_reread_partition(char *device)
>>>> +{
>>>> +    int fd, ret = -1;
>>>> +
>>>> +    fd = open(device, O_RDONLY);
>>>> +    if (fd < 0) {
>>>> +        ERROR("Device %s can't be opened", device);
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    ret = ioctl(fd, BLKRRPART, NULL);
>>>> +    if (ret < 0) {
>>>> +        ERROR("Scan cannot be done on device %s", device);
>>>> +        goto out_close;
>>>> +    }
>>>> +
>>>> + out_close:
>>>> +    close(fd);
>>>> +
>>>> + out:
>>>> +    return ret;
>>>> +}
>>>> +
>>>>   static int diskpart(struct img_type *img,
>>>>       void __attribute__ ((__unused__)) *data)
>>>>   {
>>>> @@ -1066,9 +1165,154 @@ handler_release:
>>>>       return ret;
>>>>   }
>>>>   +static int gpt_swap_partition(struct img_type *img, void *data)
>>>> +{
>>>> +    struct fdisk_context *cxt = NULL;
>>>> +    struct diskpart_table *tb = NULL;
>>>> +    int ret = 0;
>>>> +    unsigned long hybrid = 0;
>>>> +    struct hnd_priv priv =  {
>>>> +        .labeltype = FDISK_DISKLABEL_DOS,
>>>> +    };
>>>> +    struct create_table *createtable = NULL;
>>>> +    struct script_handler_data *script_data = data;
>>>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>>>> +    struct dict_list *partitions;
>>>> +    struct dict_list_elem *partition;
>>>> +    int num, count = 0;
>>>> +    char *name[2];
>>>> +
>>>> +    if (!script_data)
>>>> +        return -EINVAL;
>>>> +
>>>> +    /*
>>>> +     * Call only in case of postinstall
>>>> +     */
>>>> +    if (script_data->scriptfn != POSTINSTALL)
>>>> +        return 0;
>>>> +
>>>> +    LIST_INIT(&priv.listparts);
>>>> +    if (!strlen(img->device)) {
>>>> +        ERROR("Partition handler without setting the device");
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    createtable = calloc(1, sizeof(*createtable));
>>>> +    if (!createtable) {
>>>> +        ERROR("OOM allocating createtable !");
>>>> +        return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * Reads flags: nolock and noinuse
>>>> +     */
>>>> +    priv.nolock = strtobool(dict_get_value(&img->properties, 
>>>> "nolock"));
>>>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>>>> "noinuse"));
>>>> +
>>>> +        /*
>>>> +         * Create context
>>>> +         */
>>>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, 
>>>> createtable);
>>>> +    if (ret == -EACCES)
>>>> +        goto handler_release;
>>>> +    else if (ret)
>>>> +        goto handler_exit;
>>>> +
>>>> +    tb = calloc(1, sizeof(*tb));
>>>> +    if (!tb) {
>>>> +        ERROR("OOM loading partitions !");
>>>> +        ret = -ENOMEM;
>>>> +        goto handler_exit;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * Fill the in-memory partition table from the disk.
>>>> +     */
>>>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>>>> +    if (ret)
>>>> +        goto handler_exit;
>>>> +
>>>> +    while (1) {
>>>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>>>> +        partitions = dict_get_list(&img->properties, prop);
>>>> +        if (!partitions)
>>>> +            break;
>>>> +
>>>> +        num = 0;
>>>> +        LIST_FOREACH(partition, partitions, next) {
>>>> +            if (num >= 2) {
>>>> +                ERROR("Too many partition (%s)", prop);
>>>> +                goto handler_exit;
>>>> +            }
>>>> +
>>>> +            name[num] = partition->value;
>>>> +            num++;
>>>> +        }
>>>> +
>>>> +        if (num != 2) {
>>>> +            ERROR("Invalid number (%d) of partition (%s)", num, 
>>>> prop);
>>>> +            goto handler_exit;
>>>> +        }
>>>> +
>>>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>>>> +
>>>> +        ret = diskpart_swap_partition(tb, createtable, name[0], 
>>>> name[1]);
>>>> +        if (ret) {
>>>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        count++;
>>>> +    }
>>>> +
>>>> +    /* Reload table for parent */
>>>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>>>> +    if (ret) {
>>>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>>>> +        goto handler_exit;
>>>> +    }
>>>> +
>>>> +    /* Reload table for child */
>>>> +    if (IS_HYBRID(cxt)) {
>>>> +        ret = diskpart_reload_table(cxt, tb->child);
>>>> +        if (ret) {
>>>> +            ERROR("Can't reload table for child (err = %d)", ret);
>>>> +            goto handler_exit;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    /* Write table */
>>>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>>>> priv.noinuse);
>>>> +    if (ret) {
>>>> +        ERROR("Can't write table (err = %d)", ret);
>>>> +        goto handler_exit;
>>>> +    }
>>>> +
>>>> +handler_exit:
>>>> +    if (tb)
>>>> +        diskpart_unref_table(tb);
>>>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>>>> +        if (fdisk_deassign_device(cxt, 0))
>>>> +            WARN("Error deassign device %s", img->device);
>>>> +
>>>> +handler_release:
>>>> +    if (cxt)
>>>> +        diskpart_unref_context(cxt);
>>>> +
>>>> +    /*
>>>> +     * Re-read the partition table to be sure that SWupdate does not
>>>> +     * try to acces the partitions before the kernel is ready
>>>> +     */
>>>> +    diskpart_reread_partition(img->device);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>>   __attribute__((constructor))
>>>>   void diskpart_handler(void)
>>>>   {
>>>>       register_handler("diskpart", diskpart,
>>>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>>>> +        register_handler("gptswap", gpt_swap_partition,
>>>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>>>   }
>>>
>>>
>>
>
>
Stefano Babic April 7, 2022, 1:28 p.m. UTC | #5
Hi Philippe,

On 07.04.22 15:15, Philippe REYNES wrote:
> Hi Stefano,
> 
> 
> Le 06/04/2022 à 18:27, Stefano Babic a écrit :
>> Hi Philippe,
>>
>> On 06.04.22 17:30, Philippe REYNES wrote:
>>> Hi Stefano,
>>>
>>>
>>> Le 05/04/2022 à 16:17, Stefano Babic a écrit :
>>>> Hi Philippe,
>>>>
>>>> On 05.04.22 15:17, Philippe Reynes wrote:
>>>>> Add a handler gptswap to swap gpt partition.
>>>>> This handler is a script that can be used to swap gpt partition
>>>>> names after all the image were flashed (only partition names are
>>>>> swapped). This script may be usefull for a dual bank strategy.
>>>>>
>>>>> For example:
>>>>> scripts: (
>>>>>     {
>>>>>         type = "gptswap";
>>>>>         device = "/dev/vdb";
>>>>>         properties =
>>>>>         {
>>>>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>>>>         };
>>>>>     },
>>>>> );
>>>>>
>>>>
>>>> I would like first to discuss about the way to do, not the patch (I 
>>>> will review later). My concerns are related if this can be atomic.
>>>>
>>>> The procedure you implement is something like:
>>>>
>>>> - you read the partitions from disk
>>>> - you exchange the names of partitions according to the rule in 
>>>> sw-description. This happens in memory, no problem up now.
>>>>
>>>> - call diskpart_write_table() and rewrite the whole table, as it 
>>>> should be.
>>>>
>>>> At the end, the whole partition table is rewritten. Because of that, 
>>>> and also because the data should then written by the low level 
>>>> driver (for example a SDHC driver), I have the feeling that it is 
>>>> not atomic. We can also have two partitions with the same name after 
>>>> a power cut, or even a corrupted table. Or do you have some other 
>>>> thoughts ?
>>>>
>>>> I have thought to implement this in a less invasive way, just 
>>>> setting a "bootflag" - we have not on GPT, but we can use an 
>>>> attribute 
>>>> (https://fra01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fftp.ntu.edu.tw%2Flinux%2Futils%2Futil-linux%2Fv2.34%2Flibfdisk-docs%2Flibfdisk-UEFI-GPT.html%23fdisk-gpt-get-partition-attrs&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7C988784a0faec4c7f892208da17ea5966%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637848592538972432%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=FtkUO4BZcvmWiXMYTZfeLfr9S%2Bc57Rc%2F6nIYJ2SvQFQ%3D&amp;reserved=0), 
>>>> maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() 
>>>> with one "user" flag.
>>>>
>>>
>>> In my understanding, partitions flags are set in memory, and then the 
>>> whole GPT partition is written to the disk.
>>
>> Right.
>>
>>> So the behavior is the same if we swap the partition name or a 
>>> partition flag.
>>
>> Ok, got it. So because we rewrite the whole GPT header, we recompute 
>> the CRC, and then it is not important if we change a string (=name) or 
>> a single bit (=flag). I think this is fine, so let's proceed.
>>
>>>
>>>
>>> I have looked the function gpt_write_disklabel (in util-linux)
>>
>> ...yes, util-linux is the source of inspiration for most code around 
>> disks in SWUpdate....
>>
>>> that is used to write the full GPT partition.
>>>
>>> This comment describes how it happens:
>>>
>>>      /*
>>>
>>>       * UEFI requires writing in this specific order:
>>>
>>>       *   1) backup partition tables
>>>
>>>       *   2) backup GPT header
>>>
>>>       *   3) primary partition tables
>>>
>>>       *   4) primary GPT header
>>>
>>>       *   5) protective MBR
>>>
>>>       *
>>>
>>>       * If any write fails, we abort the rest.
>>>
>>>       */
>>>
>>> When GPT is used on a disk, there are 5 elements:
>>> - protective MBR (LBA 0)
>>> - primary GPT table header (LBA 1)
>>> - primary GPT table partition (LBA 2 -> 33)
>>> - secondary GPT table partition  (LBA -2 -> -33)
>>> - secondary GPT table header (LBA - 1)
>>> And there are two CRCs in the GPT table header, one that protect the 
>>> GPT table header, and one that protect the GPT table partition.
>>
>> Ok
>>
>>>
>>> if step 1 or 2 fail, the secondary GPT is invalid and it can be 
>>> recovered using the primary GPT (and the swap is not applied).
>>
>> More as one step fails, I am bothering what is happening in case of 
>> power-cut and if some of these steps are not atomic, and system does 
>> not boot or boot with the wrong partition.
>>
>> My first concern is who is then in charge to repair : we have a 
>> working system with full GPT protection (both header are valid), we 
>> update and we have a power-cut. Good, backup header is corrupted, but 
>> we can boot again. But as far as I know, the kernel just mounts the 
>> partition and the GPT is not explicitely set or repaired, decreasing 
>> our reliability.
>>
>> What do you think ?
> 
> I think that we should look for a way to automatically check and repair 
> GPT partition at boot.
> I don't found a way to do that. So may be we will need to provide a 
> tool/patch to do this check and repair.

You can fix it with fdisk / gdisk (I do npot know with sfdisk), or a 
small code doing this. Anyway, fixable.

> 
>>
>>> If step 3 or 4 fail, the primary GPT is invalid and it can be 
>>> recovered using the secondary GPT (and the swap is applied).
>>
>> Ok - fine, with the exception that I do not know who will repair the 
>> primary GPT header.
>>
>>> If step 5 fails, I am afraid that protective MBR could be corrupted, 
>>> so the GPT table may be invalid.
>>
>> I just tested with a not very recent U-Boot, in any case a device with 
>> GPT table. Dropping protective MBR, U-Boot does not find any partition 
>> and device does not boot. So it looks like a no way here.
>>
>> And rerunning SWUpdate as recovery, the diskpartitioner (at the end, 
>> libfdisk) reports that partitions are damaged and must be created.
>>
>>>
>>> But, I think that we could avoid to write the protective MBR if there 
>>> is already a protective MBR on the disk.
>>
>> According to gpt_write_disklabel, this is done by forcing a hybrid 
>> MBR, but sure it is not what we want.
> 
> Yes, the MBR is not written if the mode if hybrib.
> My idea is not to set the hybrid mode, but before writting a new MBR, 
> read the MBR on the disk, and compare it with
> the MRB that should be written. If they are the same, then the new MBR 
> is not written on the disk.
> 
>>
>>> And as we talk about a script to swap GPT partition, a protective MBR 
>>> should be already on the disk.
>>
>> Sure, agree.
>>
>>>
>>>
>>>> Can we rely on the mechanism you implemented ? Quite scared that the 
>>>> tables are rewritten at any toggle.
>>>>
>>>
>>> If we modify libfdisk to only write the MBR when it is usefull.
>>
>> Ok - the question is how to push this change mainline, and if this 
>> change can be mainlined. Surely a hack just for SWUpdate (in a meta 
>> layer) is not praticable, and not appliable for other systems (Debian 
>> for example).
>>
>> This is not so simple, because struct fdisk_context is opaque. And 
>> context is the only parameter for gpt_write_disklabel().
>>
>>> Before writting the MBR, libfdisk could read the MBR on the disk, and 
>>> if the MBR on the disk is the same
>>> that the MBR in memory, then the write is avoided.
>>> I think that with this change, the swap could be quite safe.
>>
>> Point is how to push such as changes, that are useful probably just in 
>> our use cases....
>>
> 
> I think that we should push a patch to the libfdisk community. If they 
> accept the patch, we could continue in this
> way, if they refuse the patch, we may need to consider another solution.
> I prepare this patch and I send it to the libfdisk community ASAP.
> 

Fully agree, go on !

> If this patch is accepted, I think that we could add a feature that 
> automatically check and repair GPT table partition.

Yes: if we can safe switch even in case of power-cut, it is ok for me.

> I think it could be done in u-boot, in the kernel or in an userspace 
> daemon.

The easiest way is in user space.

> 
> I am interested to add this feature (of course, enabled by a config) in 
> u-boot. The idea is quite simple,
> when u-boot starts, before parsing the GPT table to find the partition 
> for the kernel, u-boot checks and
> if necessary, repairs the GPT table. If the primary GPT table is broken, 
> the secondary GPT table is used
> to repair the primary GPT table. And same, in the other way, primary GPT 
> table is used to repair the
> secondary GPT table. A question would be about MBR, if the MBR is 
> invalid, but a GPT table is valid
> (primary or secondary), should we write a protective MBR ?.

If MBR is invalid, it does not boot - tested with U-Boot 2022.01 and 
Beaglebone. So I think we should.

Regards,
Stefano

> 
> 
>> Regards,
>> Stefano
>>
>>>
>>>> Regards,
>>>> Stefano
>>>
>>>
>>> Regards,
>>>
>>> Philippe
>>>
>>>
> 
> Regards,
> Philippe
> 
> 
> 
>>>
>>>>
>>>>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>>>>> ---
>>>>>   doc/source/handlers.rst     |  24 ++++
>>>>>   handlers/diskpart_handler.c | 244 
>>>>> ++++++++++++++++++++++++++++++++++++
>>>>>   2 files changed, 268 insertions(+)
>>>>>
>>>>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>>>>> index 8d2aeea..025d100 100644
>>>>> --- a/doc/source/handlers.rst
>>>>> +++ b/doc/source/handlers.rst
>>>>> @@ -943,6 +943,30 @@ MBR Example:
>>>>>          }
>>>>>       }
>>>>>   +gpt partition swap
>>>>> +..................
>>>>> +
>>>>> +There is a handler gptswap that allow to swap gpt partitions after 
>>>>> all the images were flashed.
>>>>> +This handler only swap the name of the partition. It coud be 
>>>>> usefull for a dual bank strategy.
>>>>> +This handler is a script for the point of view of swupdate, so the 
>>>>> node that provide it should
>>>>> +be added in the section scripts.
>>>>> +
>>>>> +Simple example:
>>>>> +
>>>>> +::
>>>>> +
>>>>> +        scripts: (
>>>>> +                {
>>>>> +                        type = "gptswap";
>>>>> +                        device = "/dev/vdb";
>>>>> +                        properties =
>>>>> +                        {
>>>>> +                                swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>>> +                                swap-1 = [ "kernel-0" , "kernel-1" ];
>>>>> +                        };
>>>>> +                },
>>>>> +         );
>>>>> +
>>>>>   Diskformat Handler
>>>>>   ------------------
>>>>>   diff --git a/handlers/diskpart_handler.c 
>>>>> b/handlers/diskpart_handler.c
>>>>> index 0af4e60..0ff5968 100644
>>>>> --- a/handlers/diskpart_handler.c
>>>>> +++ b/handlers/diskpart_handler.c
>>>>> @@ -14,8 +14,10 @@
>>>>>   #include <errno.h>
>>>>>   #include <ctype.h>
>>>>>   #include <sys/file.h>
>>>>> +#include <sys/ioctl.h>
>>>>>   #include <sys/types.h>
>>>>>   #include <libfdisk/libfdisk.h>
>>>>> +#include <linux/fs.h>
>>>>>   #include <fs_interface.h>
>>>>>   #include <uuid/uuid.h>
>>>>>   #include "swupdate.h"
>>>>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>>>>> fdisk_context *cxt, struct diskpart_ta
>>>>>       return ret;
>>>>>   }
>>>>>   +static struct fdisk_partition *
>>>>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table *tb, 
>>>>> char *name)
>>>>> +{
>>>>> +    struct fdisk_partition *pa = NULL;
>>>>> +    struct fdisk_partition *ipa = NULL;
>>>>> +    struct fdisk_iter *itr;
>>>>> +    const char *iname;
>>>>> +
>>>>> +    if (!tb || !name)
>>>>> +        goto out;
>>>>> +
>>>>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>>>>> +
>>>>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>>>>> +        iname = fdisk_partition_get_name(ipa);
>>>>> +        if (iname && !strcmp(iname, name)) {
>>>>> +            pa = ipa;
>>>>> +            break;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    fdisk_free_iter(itr);
>>>>> +
>>>>> + out:
>>>>> +    return pa;
>>>>> +}
>>>>> +
>>>>> +static struct fdisk_partition *
>>>>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char *name)
>>>>> +{
>>>>> +    struct fdisk_partition *pa = NULL;
>>>>> +
>>>>> +    if (!tb || !name)
>>>>> +        goto out;
>>>>> +
>>>>> +    if (tb->parent)
>>>>> +        pa = 
>>>>> diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
>>>>> +
>>>>> + out:
>>>>> +    return pa;
>>>>> +}
>>>>> +
>>>>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>>>>> +                   struct create_table *createtable,
>>>>> +                   char *name1, char *name2)
>>>>> +{
>>>>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>>>>> +    int ret = -1;
>>>>> +
>>>>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>>>>> +    if (!pa1) {
>>>>> +        ERROR("Can't find partition %s", name1);
>>>>> +        goto out;
>>>>> +    }
>>>>> +
>>>>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>>>>> +    if (!pa2) {
>>>>> +        ERROR("Can't find partition %s", name2);
>>>>> +        goto out;
>>>>> +    }
>>>>> +
>>>>> +    ret = fdisk_partition_set_name(pa1, name2);
>>>>> +    if (ret)
>>>>> +        goto out;
>>>>> +    ret = fdisk_partition_set_name(pa2, name1);
>>>>> +    if (ret)
>>>>> +        goto out;
>>>>> +
>>>>> +    createtable->parent = true;
>>>>> +
>>>>> + out:
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>>>>                     struct partition_data *part,
>>>>>                     unsigned long sector_size,
>>>>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>>>>> fdisk_context *cxt)
>>>>>       fdisk_unref_context(cxt);
>>>>>   }
>>>>>   +static int diskpart_reread_partition(char *device)
>>>>> +{
>>>>> +    int fd, ret = -1;
>>>>> +
>>>>> +    fd = open(device, O_RDONLY);
>>>>> +    if (fd < 0) {
>>>>> +        ERROR("Device %s can't be opened", device);
>>>>> +        goto out;
>>>>> +    }
>>>>> +
>>>>> +    ret = ioctl(fd, BLKRRPART, NULL);
>>>>> +    if (ret < 0) {
>>>>> +        ERROR("Scan cannot be done on device %s", device);
>>>>> +        goto out_close;
>>>>> +    }
>>>>> +
>>>>> + out_close:
>>>>> +    close(fd);
>>>>> +
>>>>> + out:
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>>   static int diskpart(struct img_type *img,
>>>>>       void __attribute__ ((__unused__)) *data)
>>>>>   {
>>>>> @@ -1066,9 +1165,154 @@ handler_release:
>>>>>       return ret;
>>>>>   }
>>>>>   +static int gpt_swap_partition(struct img_type *img, void *data)
>>>>> +{
>>>>> +    struct fdisk_context *cxt = NULL;
>>>>> +    struct diskpart_table *tb = NULL;
>>>>> +    int ret = 0;
>>>>> +    unsigned long hybrid = 0;
>>>>> +    struct hnd_priv priv =  {
>>>>> +        .labeltype = FDISK_DISKLABEL_DOS,
>>>>> +    };
>>>>> +    struct create_table *createtable = NULL;
>>>>> +    struct script_handler_data *script_data = data;
>>>>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>>>>> +    struct dict_list *partitions;
>>>>> +    struct dict_list_elem *partition;
>>>>> +    int num, count = 0;
>>>>> +    char *name[2];
>>>>> +
>>>>> +    if (!script_data)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    /*
>>>>> +     * Call only in case of postinstall
>>>>> +     */
>>>>> +    if (script_data->scriptfn != POSTINSTALL)
>>>>> +        return 0;
>>>>> +
>>>>> +    LIST_INIT(&priv.listparts);
>>>>> +    if (!strlen(img->device)) {
>>>>> +        ERROR("Partition handler without setting the device");
>>>>> +        return -EINVAL;
>>>>> +    }
>>>>> +
>>>>> +    createtable = calloc(1, sizeof(*createtable));
>>>>> +    if (!createtable) {
>>>>> +        ERROR("OOM allocating createtable !");
>>>>> +        return -ENOMEM;
>>>>> +    }
>>>>> +
>>>>> +    /*
>>>>> +     * Reads flags: nolock and noinuse
>>>>> +     */
>>>>> +    priv.nolock = strtobool(dict_get_value(&img->properties, 
>>>>> "nolock"));
>>>>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>>>>> "noinuse"));
>>>>> +
>>>>> +        /*
>>>>> +         * Create context
>>>>> +         */
>>>>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, 
>>>>> createtable);
>>>>> +    if (ret == -EACCES)
>>>>> +        goto handler_release;
>>>>> +    else if (ret)
>>>>> +        goto handler_exit;
>>>>> +
>>>>> +    tb = calloc(1, sizeof(*tb));
>>>>> +    if (!tb) {
>>>>> +        ERROR("OOM loading partitions !");
>>>>> +        ret = -ENOMEM;
>>>>> +        goto handler_exit;
>>>>> +    }
>>>>> +
>>>>> +    /*
>>>>> +     * Fill the in-memory partition table from the disk.
>>>>> +     */
>>>>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>>>>> +    if (ret)
>>>>> +        goto handler_exit;
>>>>> +
>>>>> +    while (1) {
>>>>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>>>>> +        partitions = dict_get_list(&img->properties, prop);
>>>>> +        if (!partitions)
>>>>> +            break;
>>>>> +
>>>>> +        num = 0;
>>>>> +        LIST_FOREACH(partition, partitions, next) {
>>>>> +            if (num >= 2) {
>>>>> +                ERROR("Too many partition (%s)", prop);
>>>>> +                goto handler_exit;
>>>>> +            }
>>>>> +
>>>>> +            name[num] = partition->value;
>>>>> +            num++;
>>>>> +        }
>>>>> +
>>>>> +        if (num != 2) {
>>>>> +            ERROR("Invalid number (%d) of partition (%s)", num, 
>>>>> prop);
>>>>> +            goto handler_exit;
>>>>> +        }
>>>>> +
>>>>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>>>>> +
>>>>> +        ret = diskpart_swap_partition(tb, createtable, name[0], 
>>>>> name[1]);
>>>>> +        if (ret) {
>>>>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>>>>> +            break;
>>>>> +        }
>>>>> +
>>>>> +        count++;
>>>>> +    }
>>>>> +
>>>>> +    /* Reload table for parent */
>>>>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>>>>> +    if (ret) {
>>>>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>>>>> +        goto handler_exit;
>>>>> +    }
>>>>> +
>>>>> +    /* Reload table for child */
>>>>> +    if (IS_HYBRID(cxt)) {
>>>>> +        ret = diskpart_reload_table(cxt, tb->child);
>>>>> +        if (ret) {
>>>>> +            ERROR("Can't reload table for child (err = %d)", ret);
>>>>> +            goto handler_exit;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    /* Write table */
>>>>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>>>>> priv.noinuse);
>>>>> +    if (ret) {
>>>>> +        ERROR("Can't write table (err = %d)", ret);
>>>>> +        goto handler_exit;
>>>>> +    }
>>>>> +
>>>>> +handler_exit:
>>>>> +    if (tb)
>>>>> +        diskpart_unref_table(tb);
>>>>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>>>>> +        if (fdisk_deassign_device(cxt, 0))
>>>>> +            WARN("Error deassign device %s", img->device);
>>>>> +
>>>>> +handler_release:
>>>>> +    if (cxt)
>>>>> +        diskpart_unref_context(cxt);
>>>>> +
>>>>> +    /*
>>>>> +     * Re-read the partition table to be sure that SWupdate does not
>>>>> +     * try to acces the partitions before the kernel is ready
>>>>> +     */
>>>>> +    diskpart_reread_partition(img->device);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>>   __attribute__((constructor))
>>>>>   void diskpart_handler(void)
>>>>>   {
>>>>>       register_handler("diskpart", diskpart,
>>>>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>>>>> +        register_handler("gptswap", gpt_swap_partition,
>>>>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>   }
>>>>
>>>>
>>>
>>
>>
>
Philippe REYNES April 13, 2022, 5:25 p.m. UTC | #6
Hi Stefano,

Le 07/04/2022 à 15:28, Stefano Babic a écrit :
> Hi Philippe,
>
> On 07.04.22 15:15, Philippe REYNES wrote:
>> Hi Stefano,
>>
>>
>> Le 06/04/2022 à 18:27, Stefano Babic a écrit :
>>> Hi Philippe,
>>>
>>> On 06.04.22 17:30, Philippe REYNES wrote:
>>>> Hi Stefano,
>>>>
>>>>
>>>> Le 05/04/2022 à 16:17, Stefano Babic a écrit :
>>>>> Hi Philippe,
>>>>>
>>>>> On 05.04.22 15:17, Philippe Reynes wrote:
>>>>>> Add a handler gptswap to swap gpt partition.
>>>>>> This handler is a script that can be used to swap gpt partition
>>>>>> names after all the image were flashed (only partition names are
>>>>>> swapped). This script may be usefull for a dual bank strategy.
>>>>>>
>>>>>> For example:
>>>>>> scripts: (
>>>>>>     {
>>>>>>         type = "gptswap";
>>>>>>         device = "/dev/vdb";
>>>>>>         properties =
>>>>>>         {
>>>>>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>>>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>>>>>         };
>>>>>>     },
>>>>>> );
>>>>>>
>>>>>
>>>>> I would like first to discuss about the way to do, not the patch 
>>>>> (I will review later). My concerns are related if this can be atomic.
>>>>>
>>>>> The procedure you implement is something like:
>>>>>
>>>>> - you read the partitions from disk
>>>>> - you exchange the names of partitions according to the rule in 
>>>>> sw-description. This happens in memory, no problem up now.
>>>>>
>>>>> - call diskpart_write_table() and rewrite the whole table, as it 
>>>>> should be.
>>>>>
>>>>> At the end, the whole partition table is rewritten. Because of 
>>>>> that, and also because the data should then written by the low 
>>>>> level driver (for example a SDHC driver), I have the feeling that 
>>>>> it is not atomic. We can also have two partitions with the same 
>>>>> name after a power cut, or even a corrupted table. Or do you have 
>>>>> some other thoughts ?
>>>>>
>>>>> I have thought to implement this in a less invasive way, just 
>>>>> setting a "bootflag" - we have not on GPT, but we can use an 
>>>>> attribute 
>>>>> (https://fra01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fftp.ntu.edu.tw%2Flinux%2Futils%2Futil-linux%2Fv2.34%2Flibfdisk-docs%2Flibfdisk-UEFI-GPT.html%23fdisk-gpt-get-partition-attrs&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7Cca7c8682b7ab4cd9006608da189a7bc9%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637849349466801569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C2000&amp;sdata=mSo3ZNeEfqnCKQbdkBirPP4tfqZSrCT8D1%2BGxLdXRNg%3D&amp;reserved=0), 
>>>>> maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() 
>>>>> with one "user" flag.
>>>>>
>>>>
>>>> In my understanding, partitions flags are set in memory, and then 
>>>> the whole GPT partition is written to the disk.
>>>
>>> Right.
>>>
>>>> So the behavior is the same if we swap the partition name or a 
>>>> partition flag.
>>>
>>> Ok, got it. So because we rewrite the whole GPT header, we recompute 
>>> the CRC, and then it is not important if we change a string (=name) 
>>> or a single bit (=flag). I think this is fine, so let's proceed.
>>>
>>>>
>>>>
>>>> I have looked the function gpt_write_disklabel (in util-linux)
>>>
>>> ...yes, util-linux is the source of inspiration for most code around 
>>> disks in SWUpdate....
>>>
>>>> that is used to write the full GPT partition.
>>>>
>>>> This comment describes how it happens:
>>>>
>>>>      /*
>>>>
>>>>       * UEFI requires writing in this specific order:
>>>>
>>>>       *   1) backup partition tables
>>>>
>>>>       *   2) backup GPT header
>>>>
>>>>       *   3) primary partition tables
>>>>
>>>>       *   4) primary GPT header
>>>>
>>>>       *   5) protective MBR
>>>>
>>>>       *
>>>>
>>>>       * If any write fails, we abort the rest.
>>>>
>>>>       */
>>>>
>>>> When GPT is used on a disk, there are 5 elements:
>>>> - protective MBR (LBA 0)
>>>> - primary GPT table header (LBA 1)
>>>> - primary GPT table partition (LBA 2 -> 33)
>>>> - secondary GPT table partition  (LBA -2 -> -33)
>>>> - secondary GPT table header (LBA - 1)
>>>> And there are two CRCs in the GPT table header, one that protect 
>>>> the GPT table header, and one that protect the GPT table partition.
>>>
>>> Ok
>>>
>>>>
>>>> if step 1 or 2 fail, the secondary GPT is invalid and it can be 
>>>> recovered using the primary GPT (and the swap is not applied).
>>>
>>> More as one step fails, I am bothering what is happening in case of 
>>> power-cut and if some of these steps are not atomic, and system does 
>>> not boot or boot with the wrong partition.
>>>
>>> My first concern is who is then in charge to repair : we have a 
>>> working system with full GPT protection (both header are valid), we 
>>> update and we have a power-cut. Good, backup header is corrupted, 
>>> but we can boot again. But as far as I know, the kernel just mounts 
>>> the partition and the GPT is not explicitely set or repaired, 
>>> decreasing our reliability.
>>>
>>> What do you think ?
>>
>> I think that we should look for a way to automatically check and 
>> repair GPT partition at boot.
>> I don't found a way to do that. So may be we will need to provide a 
>> tool/patch to do this check and repair.
>
> You can fix it with fdisk / gdisk (I do npot know with sfdisk), or a 
> small code doing this. Anyway, fixable.
>
>>
>>>
>>>> If step 3 or 4 fail, the primary GPT is invalid and it can be 
>>>> recovered using the secondary GPT (and the swap is applied).
>>>
>>> Ok - fine, with the exception that I do not know who will repair the 
>>> primary GPT header.
>>>
>>>> If step 5 fails, I am afraid that protective MBR could be 
>>>> corrupted, so the GPT table may be invalid.
>>>
>>> I just tested with a not very recent U-Boot, in any case a device 
>>> with GPT table. Dropping protective MBR, U-Boot does not find any 
>>> partition and device does not boot. So it looks like a no way here.
>>>
>>> And rerunning SWUpdate as recovery, the diskpartitioner (at the end, 
>>> libfdisk) reports that partitions are damaged and must be created.
>>>
>>>>
>>>> But, I think that we could avoid to write the protective MBR if 
>>>> there is already a protective MBR on the disk.
>>>
>>> According to gpt_write_disklabel, this is done by forcing a hybrid 
>>> MBR, but sure it is not what we want.
>>
>> Yes, the MBR is not written if the mode if hybrib.
>> My idea is not to set the hybrid mode, but before writting a new MBR, 
>> read the MBR on the disk, and compare it with
>> the MRB that should be written. If they are the same, then the new 
>> MBR is not written on the disk.
>>
>>>
>>>> And as we talk about a script to swap GPT partition, a protective 
>>>> MBR should be already on the disk.
>>>
>>> Sure, agree.
>>>
>>>>
>>>>
>>>>> Can we rely on the mechanism you implemented ? Quite scared that 
>>>>> the tables are rewritten at any toggle.
>>>>>
>>>>
>>>> If we modify libfdisk to only write the MBR when it is usefull.
>>>
>>> Ok - the question is how to push this change mainline, and if this 
>>> change can be mainlined. Surely a hack just for SWUpdate (in a meta 
>>> layer) is not praticable, and not appliable for other systems 
>>> (Debian for example).
>>>
>>> This is not so simple, because struct fdisk_context is opaque. And 
>>> context is the only parameter for gpt_write_disklabel().
>>>
>>>> Before writting the MBR, libfdisk could read the MBR on the disk, 
>>>> and if the MBR on the disk is the same
>>>> that the MBR in memory, then the write is avoided.
>>>> I think that with this change, the swap could be quite safe.
>>>
>>> Point is how to push such as changes, that are useful probably just 
>>> in our use cases....
>>>
>>
>> I think that we should push a patch to the libfdisk community. If 
>> they accept the patch, we could continue in this
>> way, if they refuse the patch, we may need to consider another solution.
>> I prepare this patch and I send it to the libfdisk community ASAP.
>>
>
> Fully agree, go on !

I  have sent a patch and it has been accepted :
https://git.kernel.org/pub/scm/utils/util-linux/util-linux.git/commit/?id=17b4496a1819b8bf1b78683e49e3ade1f8941b3f
So, if you agree,  I think we can continue to work on gpt partition swap.


>
>> If this patch is accepted, I think that we could add a feature that 
>> automatically check and repair GPT table partition.
>
> Yes: if we can safe switch even in case of power-cut, it is ok for me.
>
>> I think it could be done in u-boot, in the kernel or in an userspace 
>> daemon.
>
> The easiest way is in user space.

I agree, but u-boot use the gpt before.
I strongly thought about adding two sub functions "check" and "repair" 
in the command gpt.

>
>>
>> I am interested to add this feature (of course, enabled by a config) 
>> in u-boot. The idea is quite simple,
>> when u-boot starts, before parsing the GPT table to find the 
>> partition for the kernel, u-boot checks and
>> if necessary, repairs the GPT table. If the primary GPT table is 
>> broken, the secondary GPT table is used
>> to repair the primary GPT table. And same, in the other way, primary 
>> GPT table is used to repair the
>> secondary GPT table. A question would be about MBR, if the MBR is 
>> invalid, but a GPT table is valid
>> (primary or secondary), should we write a protective MBR ?.
>
> If MBR is invalid, it does not boot - tested with U-Boot 2022.01 and 
> Beaglebone. So I think we should.

If the MBR is invalid, I think that there is two solutions, either it 
was a protective MBR and it could
be restaured or it was an hybrib MBR, and in this case, the dos 
partition are lost.
In both case, it is possible to save the gpt partition (if at least 
primary or secondary are valid).


>
> Regards,
> Stefano


Regards,
Philippe



>
>>
>>
>>> Regards,
>>> Stefano
>>>
>>>>
>>>>> Regards,
>>>>> Stefano
>>>>
>>>>
>>>> Regards,
>>>>
>>>> Philippe
>>>>
>>>>
>>
>> Regards,
>> Philippe
>>
>>
>>
>>>>
>>>>>
>>>>>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>>>>>> ---
>>>>>>   doc/source/handlers.rst     |  24 ++++
>>>>>>   handlers/diskpart_handler.c | 244 
>>>>>> ++++++++++++++++++++++++++++++++++++
>>>>>>   2 files changed, 268 insertions(+)
>>>>>>
>>>>>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>>>>>> index 8d2aeea..025d100 100644
>>>>>> --- a/doc/source/handlers.rst
>>>>>> +++ b/doc/source/handlers.rst
>>>>>> @@ -943,6 +943,30 @@ MBR Example:
>>>>>>          }
>>>>>>       }
>>>>>>   +gpt partition swap
>>>>>> +..................
>>>>>> +
>>>>>> +There is a handler gptswap that allow to swap gpt partitions 
>>>>>> after all the images were flashed.
>>>>>> +This handler only swap the name of the partition. It coud be 
>>>>>> usefull for a dual bank strategy.
>>>>>> +This handler is a script for the point of view of swupdate, so 
>>>>>> the node that provide it should
>>>>>> +be added in the section scripts.
>>>>>> +
>>>>>> +Simple example:
>>>>>> +
>>>>>> +::
>>>>>> +
>>>>>> +        scripts: (
>>>>>> +                {
>>>>>> +                        type = "gptswap";
>>>>>> +                        device = "/dev/vdb";
>>>>>> +                        properties =
>>>>>> +                        {
>>>>>> +                                swap-0 = [ "u-boot-0" , 
>>>>>> "u-boot-1" ];
>>>>>> +                                swap-1 = [ "kernel-0" , 
>>>>>> "kernel-1" ];
>>>>>> +                        };
>>>>>> +                },
>>>>>> +         );
>>>>>> +
>>>>>>   Diskformat Handler
>>>>>>   ------------------
>>>>>>   diff --git a/handlers/diskpart_handler.c 
>>>>>> b/handlers/diskpart_handler.c
>>>>>> index 0af4e60..0ff5968 100644
>>>>>> --- a/handlers/diskpart_handler.c
>>>>>> +++ b/handlers/diskpart_handler.c
>>>>>> @@ -14,8 +14,10 @@
>>>>>>   #include <errno.h>
>>>>>>   #include <ctype.h>
>>>>>>   #include <sys/file.h>
>>>>>> +#include <sys/ioctl.h>
>>>>>>   #include <sys/types.h>
>>>>>>   #include <libfdisk/libfdisk.h>
>>>>>> +#include <linux/fs.h>
>>>>>>   #include <fs_interface.h>
>>>>>>   #include <uuid/uuid.h>
>>>>>>   #include "swupdate.h"
>>>>>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>>>>>> fdisk_context *cxt, struct diskpart_ta
>>>>>>       return ret;
>>>>>>   }
>>>>>>   +static struct fdisk_partition *
>>>>>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table 
>>>>>> *tb, char *name)
>>>>>> +{
>>>>>> +    struct fdisk_partition *pa = NULL;
>>>>>> +    struct fdisk_partition *ipa = NULL;
>>>>>> +    struct fdisk_iter *itr;
>>>>>> +    const char *iname;
>>>>>> +
>>>>>> +    if (!tb || !name)
>>>>>> +        goto out;
>>>>>> +
>>>>>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>>>>>> +
>>>>>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>>>>>> +        iname = fdisk_partition_get_name(ipa);
>>>>>> +        if (iname && !strcmp(iname, name)) {
>>>>>> +            pa = ipa;
>>>>>> +            break;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    fdisk_free_iter(itr);
>>>>>> +
>>>>>> + out:
>>>>>> +    return pa;
>>>>>> +}
>>>>>> +
>>>>>> +static struct fdisk_partition *
>>>>>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char 
>>>>>> *name)
>>>>>> +{
>>>>>> +    struct fdisk_partition *pa = NULL;
>>>>>> +
>>>>>> +    if (!tb || !name)
>>>>>> +        goto out;
>>>>>> +
>>>>>> +    if (tb->parent)
>>>>>> +        pa = 
>>>>>> diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
>>>>>> +
>>>>>> + out:
>>>>>> +    return pa;
>>>>>> +}
>>>>>> +
>>>>>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>>>>>> +                   struct create_table *createtable,
>>>>>> +                   char *name1, char *name2)
>>>>>> +{
>>>>>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>>>>>> +    int ret = -1;
>>>>>> +
>>>>>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>>>>>> +    if (!pa1) {
>>>>>> +        ERROR("Can't find partition %s", name1);
>>>>>> +        goto out;
>>>>>> +    }
>>>>>> +
>>>>>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>>>>>> +    if (!pa2) {
>>>>>> +        ERROR("Can't find partition %s", name2);
>>>>>> +        goto out;
>>>>>> +    }
>>>>>> +
>>>>>> +    ret = fdisk_partition_set_name(pa1, name2);
>>>>>> +    if (ret)
>>>>>> +        goto out;
>>>>>> +    ret = fdisk_partition_set_name(pa2, name1);
>>>>>> +    if (ret)
>>>>>> +        goto out;
>>>>>> +
>>>>>> +    createtable->parent = true;
>>>>>> +
>>>>>> + out:
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>>>>>                     struct partition_data *part,
>>>>>>                     unsigned long sector_size,
>>>>>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>>>>>> fdisk_context *cxt)
>>>>>>       fdisk_unref_context(cxt);
>>>>>>   }
>>>>>>   +static int diskpart_reread_partition(char *device)
>>>>>> +{
>>>>>> +    int fd, ret = -1;
>>>>>> +
>>>>>> +    fd = open(device, O_RDONLY);
>>>>>> +    if (fd < 0) {
>>>>>> +        ERROR("Device %s can't be opened", device);
>>>>>> +        goto out;
>>>>>> +    }
>>>>>> +
>>>>>> +    ret = ioctl(fd, BLKRRPART, NULL);
>>>>>> +    if (ret < 0) {
>>>>>> +        ERROR("Scan cannot be done on device %s", device);
>>>>>> +        goto out_close;
>>>>>> +    }
>>>>>> +
>>>>>> + out_close:
>>>>>> +    close(fd);
>>>>>> +
>>>>>> + out:
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>>   static int diskpart(struct img_type *img,
>>>>>>       void __attribute__ ((__unused__)) *data)
>>>>>>   {
>>>>>> @@ -1066,9 +1165,154 @@ handler_release:
>>>>>>       return ret;
>>>>>>   }
>>>>>>   +static int gpt_swap_partition(struct img_type *img, void *data)
>>>>>> +{
>>>>>> +    struct fdisk_context *cxt = NULL;
>>>>>> +    struct diskpart_table *tb = NULL;
>>>>>> +    int ret = 0;
>>>>>> +    unsigned long hybrid = 0;
>>>>>> +    struct hnd_priv priv =  {
>>>>>> +        .labeltype = FDISK_DISKLABEL_DOS,
>>>>>> +    };
>>>>>> +    struct create_table *createtable = NULL;
>>>>>> +    struct script_handler_data *script_data = data;
>>>>>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>>>>>> +    struct dict_list *partitions;
>>>>>> +    struct dict_list_elem *partition;
>>>>>> +    int num, count = 0;
>>>>>> +    char *name[2];
>>>>>> +
>>>>>> +    if (!script_data)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Call only in case of postinstall
>>>>>> +     */
>>>>>> +    if (script_data->scriptfn != POSTINSTALL)
>>>>>> +        return 0;
>>>>>> +
>>>>>> +    LIST_INIT(&priv.listparts);
>>>>>> +    if (!strlen(img->device)) {
>>>>>> +        ERROR("Partition handler without setting the device");
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +
>>>>>> +    createtable = calloc(1, sizeof(*createtable));
>>>>>> +    if (!createtable) {
>>>>>> +        ERROR("OOM allocating createtable !");
>>>>>> +        return -ENOMEM;
>>>>>> +    }
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Reads flags: nolock and noinuse
>>>>>> +     */
>>>>>> +    priv.nolock = strtobool(dict_get_value(&img->properties, 
>>>>>> "nolock"));
>>>>>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>>>>>> "noinuse"));
>>>>>> +
>>>>>> +        /*
>>>>>> +         * Create context
>>>>>> +         */
>>>>>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, 
>>>>>> createtable);
>>>>>> +    if (ret == -EACCES)
>>>>>> +        goto handler_release;
>>>>>> +    else if (ret)
>>>>>> +        goto handler_exit;
>>>>>> +
>>>>>> +    tb = calloc(1, sizeof(*tb));
>>>>>> +    if (!tb) {
>>>>>> +        ERROR("OOM loading partitions !");
>>>>>> +        ret = -ENOMEM;
>>>>>> +        goto handler_exit;
>>>>>> +    }
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Fill the in-memory partition table from the disk.
>>>>>> +     */
>>>>>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>>>>>> +    if (ret)
>>>>>> +        goto handler_exit;
>>>>>> +
>>>>>> +    while (1) {
>>>>>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>>>>>> +        partitions = dict_get_list(&img->properties, prop);
>>>>>> +        if (!partitions)
>>>>>> +            break;
>>>>>> +
>>>>>> +        num = 0;
>>>>>> +        LIST_FOREACH(partition, partitions, next) {
>>>>>> +            if (num >= 2) {
>>>>>> +                ERROR("Too many partition (%s)", prop);
>>>>>> +                goto handler_exit;
>>>>>> +            }
>>>>>> +
>>>>>> +            name[num] = partition->value;
>>>>>> +            num++;
>>>>>> +        }
>>>>>> +
>>>>>> +        if (num != 2) {
>>>>>> +            ERROR("Invalid number (%d) of partition (%s)", num, 
>>>>>> prop);
>>>>>> +            goto handler_exit;
>>>>>> +        }
>>>>>> +
>>>>>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>>>>>> +
>>>>>> +        ret = diskpart_swap_partition(tb, createtable, name[0], 
>>>>>> name[1]);
>>>>>> +        if (ret) {
>>>>>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>>>>>> +            break;
>>>>>> +        }
>>>>>> +
>>>>>> +        count++;
>>>>>> +    }
>>>>>> +
>>>>>> +    /* Reload table for parent */
>>>>>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>>>>>> +    if (ret) {
>>>>>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>>>>>> +        goto handler_exit;
>>>>>> +    }
>>>>>> +
>>>>>> +    /* Reload table for child */
>>>>>> +    if (IS_HYBRID(cxt)) {
>>>>>> +        ret = diskpart_reload_table(cxt, tb->child);
>>>>>> +        if (ret) {
>>>>>> +            ERROR("Can't reload table for child (err = %d)", ret);
>>>>>> +            goto handler_exit;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    /* Write table */
>>>>>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>>>>>> priv.noinuse);
>>>>>> +    if (ret) {
>>>>>> +        ERROR("Can't write table (err = %d)", ret);
>>>>>> +        goto handler_exit;
>>>>>> +    }
>>>>>> +
>>>>>> +handler_exit:
>>>>>> +    if (tb)
>>>>>> +        diskpart_unref_table(tb);
>>>>>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>>>>>> +        if (fdisk_deassign_device(cxt, 0))
>>>>>> +            WARN("Error deassign device %s", img->device);
>>>>>> +
>>>>>> +handler_release:
>>>>>> +    if (cxt)
>>>>>> +        diskpart_unref_context(cxt);
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Re-read the partition table to be sure that SWupdate does 
>>>>>> not
>>>>>> +     * try to acces the partitions before the kernel is ready
>>>>>> +     */
>>>>>> +    diskpart_reread_partition(img->device);
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>>   __attribute__((constructor))
>>>>>>   void diskpart_handler(void)
>>>>>>   {
>>>>>>       register_handler("diskpart", diskpart,
>>>>>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>> +        register_handler("gptswap", gpt_swap_partition,
>>>>>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>>   }
>>>>>
>>>>>
>>>>
>>>
>>>
>>
>
>
Stefano Babic April 13, 2022, 8:27 p.m. UTC | #7
Hi Philippe,

On 13.04.22 19:25, Philippe REYNES wrote:
> Hi Stefano,
> 
> Le 07/04/2022 à 15:28, Stefano Babic a écrit :
>> Hi Philippe,
>>
>> On 07.04.22 15:15, Philippe REYNES wrote:
>>> Hi Stefano,
>>>
>>>
>>> Le 06/04/2022 à 18:27, Stefano Babic a écrit :
>>>> Hi Philippe,
>>>>
>>>> On 06.04.22 17:30, Philippe REYNES wrote:
>>>>> Hi Stefano,
>>>>>
>>>>>
>>>>> Le 05/04/2022 à 16:17, Stefano Babic a écrit :
>>>>>> Hi Philippe,
>>>>>>
>>>>>> On 05.04.22 15:17, Philippe Reynes wrote:
>>>>>>> Add a handler gptswap to swap gpt partition.
>>>>>>> This handler is a script that can be used to swap gpt partition
>>>>>>> names after all the image were flashed (only partition names are
>>>>>>> swapped). This script may be usefull for a dual bank strategy.
>>>>>>>
>>>>>>> For example:
>>>>>>> scripts: (
>>>>>>>     {
>>>>>>>         type = "gptswap";
>>>>>>>         device = "/dev/vdb";
>>>>>>>         properties =
>>>>>>>         {
>>>>>>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>>>>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>>>>>>         };
>>>>>>>     },
>>>>>>> );
>>>>>>>
>>>>>>
>>>>>> I would like first to discuss about the way to do, not the patch 
>>>>>> (I will review later). My concerns are related if this can be atomic.
>>>>>>
>>>>>> The procedure you implement is something like:
>>>>>>
>>>>>> - you read the partitions from disk
>>>>>> - you exchange the names of partitions according to the rule in 
>>>>>> sw-description. This happens in memory, no problem up now.
>>>>>>
>>>>>> - call diskpart_write_table() and rewrite the whole table, as it 
>>>>>> should be.
>>>>>>
>>>>>> At the end, the whole partition table is rewritten. Because of 
>>>>>> that, and also because the data should then written by the low 
>>>>>> level driver (for example a SDHC driver), I have the feeling that 
>>>>>> it is not atomic. We can also have two partitions with the same 
>>>>>> name after a power cut, or even a corrupted table. Or do you have 
>>>>>> some other thoughts ?
>>>>>>
>>>>>> I have thought to implement this in a less invasive way, just 
>>>>>> setting a "bootflag" - we have not on GPT, but we can use an 
>>>>>> attribute 
>>>>>> (https://fra01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fftp.ntu.edu.tw%2Flinux%2Futils%2Futil-linux%2Fv2.34%2Flibfdisk-docs%2Flibfdisk-UEFI-GPT.html%23fdisk-gpt-get-partition-attrs&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7Cca7c8682b7ab4cd9006608da189a7bc9%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637849349466801569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C2000&amp;sdata=mSo3ZNeEfqnCKQbdkBirPP4tfqZSrCT8D1%2BGxLdXRNg%3D&amp;reserved=0), 
>>>>>> maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() 
>>>>>> with one "user" flag.
>>>>>>
>>>>>
>>>>> In my understanding, partitions flags are set in memory, and then 
>>>>> the whole GPT partition is written to the disk.
>>>>
>>>> Right.
>>>>
>>>>> So the behavior is the same if we swap the partition name or a 
>>>>> partition flag.
>>>>
>>>> Ok, got it. So because we rewrite the whole GPT header, we recompute 
>>>> the CRC, and then it is not important if we change a string (=name) 
>>>> or a single bit (=flag). I think this is fine, so let's proceed.
>>>>
>>>>>
>>>>>
>>>>> I have looked the function gpt_write_disklabel (in util-linux)
>>>>
>>>> ...yes, util-linux is the source of inspiration for most code around 
>>>> disks in SWUpdate....
>>>>
>>>>> that is used to write the full GPT partition.
>>>>>
>>>>> This comment describes how it happens:
>>>>>
>>>>>      /*
>>>>>
>>>>>       * UEFI requires writing in this specific order:
>>>>>
>>>>>       *   1) backup partition tables
>>>>>
>>>>>       *   2) backup GPT header
>>>>>
>>>>>       *   3) primary partition tables
>>>>>
>>>>>       *   4) primary GPT header
>>>>>
>>>>>       *   5) protective MBR
>>>>>
>>>>>       *
>>>>>
>>>>>       * If any write fails, we abort the rest.
>>>>>
>>>>>       */
>>>>>
>>>>> When GPT is used on a disk, there are 5 elements:
>>>>> - protective MBR (LBA 0)
>>>>> - primary GPT table header (LBA 1)
>>>>> - primary GPT table partition (LBA 2 -> 33)
>>>>> - secondary GPT table partition  (LBA -2 -> -33)
>>>>> - secondary GPT table header (LBA - 1)
>>>>> And there are two CRCs in the GPT table header, one that protect 
>>>>> the GPT table header, and one that protect the GPT table partition.
>>>>
>>>> Ok
>>>>
>>>>>
>>>>> if step 1 or 2 fail, the secondary GPT is invalid and it can be 
>>>>> recovered using the primary GPT (and the swap is not applied).
>>>>
>>>> More as one step fails, I am bothering what is happening in case of 
>>>> power-cut and if some of these steps are not atomic, and system does 
>>>> not boot or boot with the wrong partition.
>>>>
>>>> My first concern is who is then in charge to repair : we have a 
>>>> working system with full GPT protection (both header are valid), we 
>>>> update and we have a power-cut. Good, backup header is corrupted, 
>>>> but we can boot again. But as far as I know, the kernel just mounts 
>>>> the partition and the GPT is not explicitely set or repaired, 
>>>> decreasing our reliability.
>>>>
>>>> What do you think ?
>>>
>>> I think that we should look for a way to automatically check and 
>>> repair GPT partition at boot.
>>> I don't found a way to do that. So may be we will need to provide a 
>>> tool/patch to do this check and repair.
>>
>> You can fix it with fdisk / gdisk (I do npot know with sfdisk), or a 
>> small code doing this. Anyway, fixable.
>>
>>>
>>>>
>>>>> If step 3 or 4 fail, the primary GPT is invalid and it can be 
>>>>> recovered using the secondary GPT (and the swap is applied).
>>>>
>>>> Ok - fine, with the exception that I do not know who will repair the 
>>>> primary GPT header.
>>>>
>>>>> If step 5 fails, I am afraid that protective MBR could be 
>>>>> corrupted, so the GPT table may be invalid.
>>>>
>>>> I just tested with a not very recent U-Boot, in any case a device 
>>>> with GPT table. Dropping protective MBR, U-Boot does not find any 
>>>> partition and device does not boot. So it looks like a no way here.
>>>>
>>>> And rerunning SWUpdate as recovery, the diskpartitioner (at the end, 
>>>> libfdisk) reports that partitions are damaged and must be created.
>>>>
>>>>>
>>>>> But, I think that we could avoid to write the protective MBR if 
>>>>> there is already a protective MBR on the disk.
>>>>
>>>> According to gpt_write_disklabel, this is done by forcing a hybrid 
>>>> MBR, but sure it is not what we want.
>>>
>>> Yes, the MBR is not written if the mode if hybrib.
>>> My idea is not to set the hybrid mode, but before writting a new MBR, 
>>> read the MBR on the disk, and compare it with
>>> the MRB that should be written. If they are the same, then the new 
>>> MBR is not written on the disk.
>>>
>>>>
>>>>> And as we talk about a script to swap GPT partition, a protective 
>>>>> MBR should be already on the disk.
>>>>
>>>> Sure, agree.
>>>>
>>>>>
>>>>>
>>>>>> Can we rely on the mechanism you implemented ? Quite scared that 
>>>>>> the tables are rewritten at any toggle.
>>>>>>
>>>>>
>>>>> If we modify libfdisk to only write the MBR when it is usefull.
>>>>
>>>> Ok - the question is how to push this change mainline, and if this 
>>>> change can be mainlined. Surely a hack just for SWUpdate (in a meta 
>>>> layer) is not praticable, and not appliable for other systems 
>>>> (Debian for example).
>>>>
>>>> This is not so simple, because struct fdisk_context is opaque. And 
>>>> context is the only parameter for gpt_write_disklabel().
>>>>
>>>>> Before writting the MBR, libfdisk could read the MBR on the disk, 
>>>>> and if the MBR on the disk is the same
>>>>> that the MBR in memory, then the write is avoided.
>>>>> I think that with this change, the swap could be quite safe.
>>>>
>>>> Point is how to push such as changes, that are useful probably just 
>>>> in our use cases....
>>>>
>>>
>>> I think that we should push a patch to the libfdisk community. If 
>>> they accept the patch, we could continue in this
>>> way, if they refuse the patch, we may need to consider another solution.
>>> I prepare this patch and I send it to the libfdisk community ASAP.
>>>
>>
>> Fully agree, go on !
> 
> I  have sent a patch and it has been accepted :
> https://git.kernel.org/pub/scm/utils/util-linux/util-linux.git/commit/?id=17b4496a1819b8bf1b78683e49e3ade1f8941b3f 
> 

Well done !

> So, if you agree,  I think we can continue to work on gpt partition swap.

Fully agree.

> 
> 
>>
>>> If this patch is accepted, I think that we could add a feature that 
>>> automatically check and repair GPT table partition.
>>
>> Yes: if we can safe switch even in case of power-cut, it is ok for me.
>>
>>> I think it could be done in u-boot, in the kernel or in an userspace 
>>> daemon.
>>
>> The easiest way is in user space.
> 
> I agree, but u-boot use the gpt before.
> I strongly thought about adding two sub functions "check" and "repair" 
> in the command gpt.

Ok good plan.
> 
>>
>>>
>>> I am interested to add this feature (of course, enabled by a config) 
>>> in u-boot. The idea is quite simple,
>>> when u-boot starts, before parsing the GPT table to find the 
>>> partition for the kernel, u-boot checks and
>>> if necessary, repairs the GPT table. If the primary GPT table is 
>>> broken, the secondary GPT table is used
>>> to repair the primary GPT table. And same, in the other way, primary 
>>> GPT table is used to repair the
>>> secondary GPT table. A question would be about MBR, if the MBR is 
>>> invalid, but a GPT table is valid
>>> (primary or secondary), should we write a protective MBR ?.
>>
>> If MBR is invalid, it does not boot - tested with U-Boot 2022.01 and 
>> Beaglebone. So I think we should.
> 
> If the MBR is invalid, I think that there is two solutions, either it 
> was a protective MBR and it could
> be restaured

Exactly.

> or it was an hybrib MBR, and in this case, the dos 
> partition are lost.
> In both case, it is possible to save the gpt partition (if at least 
> primary or secondary are valid).

Right - as far as I see, U-Boot (in part_efi.c) checks to see a valid 
PMBR, and discard the correct GPT entries in case of failure. This is 
not needed and drops the redundancy we have with the dual GPT headers.

Regards,
Stefano

> 
> 
>>
>> Regards,
>> Stefano
> 
> 
> Regards,
> Philippe
> 
> 
> 
>>
>>>
>>>
>>>> Regards,
>>>> Stefano
>>>>
>>>>>
>>>>>> Regards,
>>>>>> Stefano
>>>>>
>>>>>
>>>>> Regards,
>>>>>
>>>>> Philippe
>>>>>
>>>>>
>>>
>>> Regards,
>>> Philippe
>>>
>>>
>>>
>>>>>
>>>>>>
>>>>>>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>>>>>>> ---
>>>>>>>   doc/source/handlers.rst     |  24 ++++
>>>>>>>   handlers/diskpart_handler.c | 244 
>>>>>>> ++++++++++++++++++++++++++++++++++++
>>>>>>>   2 files changed, 268 insertions(+)
>>>>>>>
>>>>>>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>>>>>>> index 8d2aeea..025d100 100644
>>>>>>> --- a/doc/source/handlers.rst
>>>>>>> +++ b/doc/source/handlers.rst
>>>>>>> @@ -943,6 +943,30 @@ MBR Example:
>>>>>>>          }
>>>>>>>       }
>>>>>>>   +gpt partition swap
>>>>>>> +..................
>>>>>>> +
>>>>>>> +There is a handler gptswap that allow to swap gpt partitions 
>>>>>>> after all the images were flashed.
>>>>>>> +This handler only swap the name of the partition. It coud be 
>>>>>>> usefull for a dual bank strategy.
>>>>>>> +This handler is a script for the point of view of swupdate, so 
>>>>>>> the node that provide it should
>>>>>>> +be added in the section scripts.
>>>>>>> +
>>>>>>> +Simple example:
>>>>>>> +
>>>>>>> +::
>>>>>>> +
>>>>>>> +        scripts: (
>>>>>>> +                {
>>>>>>> +                        type = "gptswap";
>>>>>>> +                        device = "/dev/vdb";
>>>>>>> +                        properties =
>>>>>>> +                        {
>>>>>>> +                                swap-0 = [ "u-boot-0" , 
>>>>>>> "u-boot-1" ];
>>>>>>> +                                swap-1 = [ "kernel-0" , 
>>>>>>> "kernel-1" ];
>>>>>>> +                        };
>>>>>>> +                },
>>>>>>> +         );
>>>>>>> +
>>>>>>>   Diskformat Handler
>>>>>>>   ------------------
>>>>>>>   diff --git a/handlers/diskpart_handler.c 
>>>>>>> b/handlers/diskpart_handler.c
>>>>>>> index 0af4e60..0ff5968 100644
>>>>>>> --- a/handlers/diskpart_handler.c
>>>>>>> +++ b/handlers/diskpart_handler.c
>>>>>>> @@ -14,8 +14,10 @@
>>>>>>>   #include <errno.h>
>>>>>>>   #include <ctype.h>
>>>>>>>   #include <sys/file.h>
>>>>>>> +#include <sys/ioctl.h>
>>>>>>>   #include <sys/types.h>
>>>>>>>   #include <libfdisk/libfdisk.h>
>>>>>>> +#include <linux/fs.h>
>>>>>>>   #include <fs_interface.h>
>>>>>>>   #include <uuid/uuid.h>
>>>>>>>   #include "swupdate.h"
>>>>>>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>>>>>>> fdisk_context *cxt, struct diskpart_ta
>>>>>>>       return ret;
>>>>>>>   }
>>>>>>>   +static struct fdisk_partition *
>>>>>>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table 
>>>>>>> *tb, char *name)
>>>>>>> +{
>>>>>>> +    struct fdisk_partition *pa = NULL;
>>>>>>> +    struct fdisk_partition *ipa = NULL;
>>>>>>> +    struct fdisk_iter *itr;
>>>>>>> +    const char *iname;
>>>>>>> +
>>>>>>> +    if (!tb || !name)
>>>>>>> +        goto out;
>>>>>>> +
>>>>>>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>>>>>>> +
>>>>>>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>>>>>>> +        iname = fdisk_partition_get_name(ipa);
>>>>>>> +        if (iname && !strcmp(iname, name)) {
>>>>>>> +            pa = ipa;
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    fdisk_free_iter(itr);
>>>>>>> +
>>>>>>> + out:
>>>>>>> +    return pa;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static struct fdisk_partition *
>>>>>>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char 
>>>>>>> *name)
>>>>>>> +{
>>>>>>> +    struct fdisk_partition *pa = NULL;
>>>>>>> +
>>>>>>> +    if (!tb || !name)
>>>>>>> +        goto out;
>>>>>>> +
>>>>>>> +    if (tb->parent)
>>>>>>> +        pa = 
>>>>>>> diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
>>>>>>> +
>>>>>>> + out:
>>>>>>> +    return pa;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>>>>>>> +                   struct create_table *createtable,
>>>>>>> +                   char *name1, char *name2)
>>>>>>> +{
>>>>>>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>>>>>>> +    int ret = -1;
>>>>>>> +
>>>>>>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>>>>>>> +    if (!pa1) {
>>>>>>> +        ERROR("Can't find partition %s", name1);
>>>>>>> +        goto out;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>>>>>>> +    if (!pa2) {
>>>>>>> +        ERROR("Can't find partition %s", name2);
>>>>>>> +        goto out;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    ret = fdisk_partition_set_name(pa1, name2);
>>>>>>> +    if (ret)
>>>>>>> +        goto out;
>>>>>>> +    ret = fdisk_partition_set_name(pa2, name1);
>>>>>>> +    if (ret)
>>>>>>> +        goto out;
>>>>>>> +
>>>>>>> +    createtable->parent = true;
>>>>>>> +
>>>>>>> + out:
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>>>>>>                     struct partition_data *part,
>>>>>>>                     unsigned long sector_size,
>>>>>>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>>>>>>> fdisk_context *cxt)
>>>>>>>       fdisk_unref_context(cxt);
>>>>>>>   }
>>>>>>>   +static int diskpart_reread_partition(char *device)
>>>>>>> +{
>>>>>>> +    int fd, ret = -1;
>>>>>>> +
>>>>>>> +    fd = open(device, O_RDONLY);
>>>>>>> +    if (fd < 0) {
>>>>>>> +        ERROR("Device %s can't be opened", device);
>>>>>>> +        goto out;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    ret = ioctl(fd, BLKRRPART, NULL);
>>>>>>> +    if (ret < 0) {
>>>>>>> +        ERROR("Scan cannot be done on device %s", device);
>>>>>>> +        goto out_close;
>>>>>>> +    }
>>>>>>> +
>>>>>>> + out_close:
>>>>>>> +    close(fd);
>>>>>>> +
>>>>>>> + out:
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>>   static int diskpart(struct img_type *img,
>>>>>>>       void __attribute__ ((__unused__)) *data)
>>>>>>>   {
>>>>>>> @@ -1066,9 +1165,154 @@ handler_release:
>>>>>>>       return ret;
>>>>>>>   }
>>>>>>>   +static int gpt_swap_partition(struct img_type *img, void *data)
>>>>>>> +{
>>>>>>> +    struct fdisk_context *cxt = NULL;
>>>>>>> +    struct diskpart_table *tb = NULL;
>>>>>>> +    int ret = 0;
>>>>>>> +    unsigned long hybrid = 0;
>>>>>>> +    struct hnd_priv priv =  {
>>>>>>> +        .labeltype = FDISK_DISKLABEL_DOS,
>>>>>>> +    };
>>>>>>> +    struct create_table *createtable = NULL;
>>>>>>> +    struct script_handler_data *script_data = data;
>>>>>>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>>>>>>> +    struct dict_list *partitions;
>>>>>>> +    struct dict_list_elem *partition;
>>>>>>> +    int num, count = 0;
>>>>>>> +    char *name[2];
>>>>>>> +
>>>>>>> +    if (!script_data)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    /*
>>>>>>> +     * Call only in case of postinstall
>>>>>>> +     */
>>>>>>> +    if (script_data->scriptfn != POSTINSTALL)
>>>>>>> +        return 0;
>>>>>>> +
>>>>>>> +    LIST_INIT(&priv.listparts);
>>>>>>> +    if (!strlen(img->device)) {
>>>>>>> +        ERROR("Partition handler without setting the device");
>>>>>>> +        return -EINVAL;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    createtable = calloc(1, sizeof(*createtable));
>>>>>>> +    if (!createtable) {
>>>>>>> +        ERROR("OOM allocating createtable !");
>>>>>>> +        return -ENOMEM;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /*
>>>>>>> +     * Reads flags: nolock and noinuse
>>>>>>> +     */
>>>>>>> +    priv.nolock = strtobool(dict_get_value(&img->properties, 
>>>>>>> "nolock"));
>>>>>>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>>>>>>> "noinuse"));
>>>>>>> +
>>>>>>> +        /*
>>>>>>> +         * Create context
>>>>>>> +         */
>>>>>>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, 
>>>>>>> createtable);
>>>>>>> +    if (ret == -EACCES)
>>>>>>> +        goto handler_release;
>>>>>>> +    else if (ret)
>>>>>>> +        goto handler_exit;
>>>>>>> +
>>>>>>> +    tb = calloc(1, sizeof(*tb));
>>>>>>> +    if (!tb) {
>>>>>>> +        ERROR("OOM loading partitions !");
>>>>>>> +        ret = -ENOMEM;
>>>>>>> +        goto handler_exit;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /*
>>>>>>> +     * Fill the in-memory partition table from the disk.
>>>>>>> +     */
>>>>>>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>>>>>>> +    if (ret)
>>>>>>> +        goto handler_exit;
>>>>>>> +
>>>>>>> +    while (1) {
>>>>>>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>>>>>>> +        partitions = dict_get_list(&img->properties, prop);
>>>>>>> +        if (!partitions)
>>>>>>> +            break;
>>>>>>> +
>>>>>>> +        num = 0;
>>>>>>> +        LIST_FOREACH(partition, partitions, next) {
>>>>>>> +            if (num >= 2) {
>>>>>>> +                ERROR("Too many partition (%s)", prop);
>>>>>>> +                goto handler_exit;
>>>>>>> +            }
>>>>>>> +
>>>>>>> +            name[num] = partition->value;
>>>>>>> +            num++;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        if (num != 2) {
>>>>>>> +            ERROR("Invalid number (%d) of partition (%s)", num, 
>>>>>>> prop);
>>>>>>> +            goto handler_exit;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>>>>>>> +
>>>>>>> +        ret = diskpart_swap_partition(tb, createtable, name[0], 
>>>>>>> name[1]);
>>>>>>> +        if (ret) {
>>>>>>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        count++;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /* Reload table for parent */
>>>>>>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>>>>>>> +    if (ret) {
>>>>>>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>>>>>>> +        goto handler_exit;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /* Reload table for child */
>>>>>>> +    if (IS_HYBRID(cxt)) {
>>>>>>> +        ret = diskpart_reload_table(cxt, tb->child);
>>>>>>> +        if (ret) {
>>>>>>> +            ERROR("Can't reload table for child (err = %d)", ret);
>>>>>>> +            goto handler_exit;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /* Write table */
>>>>>>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>>>>>>> priv.noinuse);
>>>>>>> +    if (ret) {
>>>>>>> +        ERROR("Can't write table (err = %d)", ret);
>>>>>>> +        goto handler_exit;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +handler_exit:
>>>>>>> +    if (tb)
>>>>>>> +        diskpart_unref_table(tb);
>>>>>>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>>>>>>> +        if (fdisk_deassign_device(cxt, 0))
>>>>>>> +            WARN("Error deassign device %s", img->device);
>>>>>>> +
>>>>>>> +handler_release:
>>>>>>> +    if (cxt)
>>>>>>> +        diskpart_unref_context(cxt);
>>>>>>> +
>>>>>>> +    /*
>>>>>>> +     * Re-read the partition table to be sure that SWupdate does 
>>>>>>> not
>>>>>>> +     * try to acces the partitions before the kernel is ready
>>>>>>> +     */
>>>>>>> +    diskpart_reread_partition(img->device);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>>   __attribute__((constructor))
>>>>>>>   void diskpart_handler(void)
>>>>>>>   {
>>>>>>>       register_handler("diskpart", diskpart,
>>>>>>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>>> +        register_handler("gptswap", gpt_swap_partition,
>>>>>>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>>>   }
>>>>>>
>>>>>>
>>>>>
>>>>
>>>>
>>>
>>
>>
Philippe REYNES June 15, 2022, 4:43 p.m. UTC | #8
Hi Stefano,


Le 13/04/2022 à 22:27, Stefano Babic a écrit :
> Hi Philippe,
>
> On 13.04.22 19:25, Philippe REYNES wrote:
>> Hi Stefano,
>>
>> Le 07/04/2022 à 15:28, Stefano Babic a écrit :
>>> Hi Philippe,
>>>
>>> On 07.04.22 15:15, Philippe REYNES wrote:
>>>> Hi Stefano,
>>>>
>>>>
>>>> Le 06/04/2022 à 18:27, Stefano Babic a écrit :
>>>>> Hi Philippe,
>>>>>
>>>>> On 06.04.22 17:30, Philippe REYNES wrote:
>>>>>> Hi Stefano,
>>>>>>
>>>>>>
>>>>>> Le 05/04/2022 à 16:17, Stefano Babic a écrit :
>>>>>>> Hi Philippe,
>>>>>>>
>>>>>>> On 05.04.22 15:17, Philippe Reynes wrote:
>>>>>>>> Add a handler gptswap to swap gpt partition.
>>>>>>>> This handler is a script that can be used to swap gpt partition
>>>>>>>> names after all the image were flashed (only partition names are
>>>>>>>> swapped). This script may be usefull for a dual bank strategy.
>>>>>>>>
>>>>>>>> For example:
>>>>>>>> scripts: (
>>>>>>>>     {
>>>>>>>>         type = "gptswap";
>>>>>>>>         device = "/dev/vdb";
>>>>>>>>         properties =
>>>>>>>>         {
>>>>>>>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>>>>>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>>>>>>>         };
>>>>>>>>     },
>>>>>>>> );
>>>>>>>>
>>>>>>>
>>>>>>> I would like first to discuss about the way to do, not the patch 
>>>>>>> (I will review later). My concerns are related if this can be 
>>>>>>> atomic.
>>>>>>>
>>>>>>> The procedure you implement is something like:
>>>>>>>
>>>>>>> - you read the partitions from disk
>>>>>>> - you exchange the names of partitions according to the rule in 
>>>>>>> sw-description. This happens in memory, no problem up now.
>>>>>>>
>>>>>>> - call diskpart_write_table() and rewrite the whole table, as it 
>>>>>>> should be.
>>>>>>>
>>>>>>> At the end, the whole partition table is rewritten. Because of 
>>>>>>> that, and also because the data should then written by the low 
>>>>>>> level driver (for example a SDHC driver), I have the feeling 
>>>>>>> that it is not atomic. We can also have two partitions with the 
>>>>>>> same name after a power cut, or even a corrupted table. Or do 
>>>>>>> you have some other thoughts ?
>>>>>>>
>>>>>>> I have thought to implement this in a less invasive way, just 
>>>>>>> setting a "bootflag" - we have not on GPT, but we can use an 
>>>>>>> attribute 
>>>>>>> (https://fra01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fftp.ntu.edu.tw%2Flinux%2Futils%2Futil-linux%2Fv2.34%2Flibfdisk-docs%2Flibfdisk-UEFI-GPT.html%23fdisk-gpt-get-partition-attrs&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7Cc5109ecf837f419a497708da1d8c1822%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637854784782837904%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=0BjUr5G%2BLSn5im14zOOMtPAju81LKmUpvfJr5eGQAs4%3D&amp;reserved=0), 
>>>>>>> maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() 
>>>>>>> with one "user" flag.
>>>>>>>
>>>>>>
>>>>>> In my understanding, partitions flags are set in memory, and then 
>>>>>> the whole GPT partition is written to the disk.
>>>>>
>>>>> Right.
>>>>>
>>>>>> So the behavior is the same if we swap the partition name or a 
>>>>>> partition flag.
>>>>>
>>>>> Ok, got it. So because we rewrite the whole GPT header, we 
>>>>> recompute the CRC, and then it is not important if we change a 
>>>>> string (=name) or a single bit (=flag). I think this is fine, so 
>>>>> let's proceed.
>>>>>
>>>>>>
>>>>>>
>>>>>> I have looked the function gpt_write_disklabel (in util-linux)
>>>>>
>>>>> ...yes, util-linux is the source of inspiration for most code 
>>>>> around disks in SWUpdate....
>>>>>
>>>>>> that is used to write the full GPT partition.
>>>>>>
>>>>>> This comment describes how it happens:
>>>>>>
>>>>>>      /*
>>>>>>
>>>>>>       * UEFI requires writing in this specific order:
>>>>>>
>>>>>>       *   1) backup partition tables
>>>>>>
>>>>>>       *   2) backup GPT header
>>>>>>
>>>>>>       *   3) primary partition tables
>>>>>>
>>>>>>       *   4) primary GPT header
>>>>>>
>>>>>>       *   5) protective MBR
>>>>>>
>>>>>>       *
>>>>>>
>>>>>>       * If any write fails, we abort the rest.
>>>>>>
>>>>>>       */
>>>>>>
>>>>>> When GPT is used on a disk, there are 5 elements:
>>>>>> - protective MBR (LBA 0)
>>>>>> - primary GPT table header (LBA 1)
>>>>>> - primary GPT table partition (LBA 2 -> 33)
>>>>>> - secondary GPT table partition  (LBA -2 -> -33)
>>>>>> - secondary GPT table header (LBA - 1)
>>>>>> And there are two CRCs in the GPT table header, one that protect 
>>>>>> the GPT table header, and one that protect the GPT table partition.
>>>>>
>>>>> Ok
>>>>>
>>>>>>
>>>>>> if step 1 or 2 fail, the secondary GPT is invalid and it can be 
>>>>>> recovered using the primary GPT (and the swap is not applied).
>>>>>
>>>>> More as one step fails, I am bothering what is happening in case 
>>>>> of power-cut and if some of these steps are not atomic, and system 
>>>>> does not boot or boot with the wrong partition.
>>>>>
>>>>> My first concern is who is then in charge to repair : we have a 
>>>>> working system with full GPT protection (both header are valid), 
>>>>> we update and we have a power-cut. Good, backup header is 
>>>>> corrupted, but we can boot again. But as far as I know, the kernel 
>>>>> just mounts the partition and the GPT is not explicitely set or 
>>>>> repaired, decreasing our reliability.
>>>>>
>>>>> What do you think ?
>>>>
>>>> I think that we should look for a way to automatically check and 
>>>> repair GPT partition at boot.
>>>> I don't found a way to do that. So may be we will need to provide a 
>>>> tool/patch to do this check and repair.
>>>
>>> You can fix it with fdisk / gdisk (I do npot know with sfdisk), or a 
>>> small code doing this. Anyway, fixable.
>>>
>>>>
>>>>>
>>>>>> If step 3 or 4 fail, the primary GPT is invalid and it can be 
>>>>>> recovered using the secondary GPT (and the swap is applied).
>>>>>
>>>>> Ok - fine, with the exception that I do not know who will repair 
>>>>> the primary GPT header.
>>>>>
>>>>>> If step 5 fails, I am afraid that protective MBR could be 
>>>>>> corrupted, so the GPT table may be invalid.
>>>>>
>>>>> I just tested with a not very recent U-Boot, in any case a device 
>>>>> with GPT table. Dropping protective MBR, U-Boot does not find any 
>>>>> partition and device does not boot. So it looks like a no way here.
>>>>>
>>>>> And rerunning SWUpdate as recovery, the diskpartitioner (at the 
>>>>> end, libfdisk) reports that partitions are damaged and must be 
>>>>> created.
>>>>>
>>>>>>
>>>>>> But, I think that we could avoid to write the protective MBR if 
>>>>>> there is already a protective MBR on the disk.
>>>>>
>>>>> According to gpt_write_disklabel, this is done by forcing a hybrid 
>>>>> MBR, but sure it is not what we want.
>>>>
>>>> Yes, the MBR is not written if the mode if hybrib.
>>>> My idea is not to set the hybrid mode, but before writting a new 
>>>> MBR, read the MBR on the disk, and compare it with
>>>> the MRB that should be written. If they are the same, then the new 
>>>> MBR is not written on the disk.
>>>>
>>>>>
>>>>>> And as we talk about a script to swap GPT partition, a protective 
>>>>>> MBR should be already on the disk.
>>>>>
>>>>> Sure, agree.
>>>>>
>>>>>>
>>>>>>
>>>>>>> Can we rely on the mechanism you implemented ? Quite scared that 
>>>>>>> the tables are rewritten at any toggle.
>>>>>>>
>>>>>>
>>>>>> If we modify libfdisk to only write the MBR when it is usefull.
>>>>>
>>>>> Ok - the question is how to push this change mainline, and if this 
>>>>> change can be mainlined. Surely a hack just for SWUpdate (in a 
>>>>> meta layer) is not praticable, and not appliable for other systems 
>>>>> (Debian for example).
>>>>>
>>>>> This is not so simple, because struct fdisk_context is opaque. And 
>>>>> context is the only parameter for gpt_write_disklabel().
>>>>>
>>>>>> Before writting the MBR, libfdisk could read the MBR on the disk, 
>>>>>> and if the MBR on the disk is the same
>>>>>> that the MBR in memory, then the write is avoided.
>>>>>> I think that with this change, the swap could be quite safe.
>>>>>
>>>>> Point is how to push such as changes, that are useful probably 
>>>>> just in our use cases....
>>>>>
>>>>
>>>> I think that we should push a patch to the libfdisk community. If 
>>>> they accept the patch, we could continue in this
>>>> way, if they refuse the patch, we may need to consider another 
>>>> solution.
>>>> I prepare this patch and I send it to the libfdisk community ASAP.
>>>>
>>>
>>> Fully agree, go on !
>>
>> I  have sent a patch and it has been accepted :
>> https://fra01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Futils%2Futil-linux%2Futil-linux.git%2Fcommit%2F%3Fid%3D17b4496a1819b8bf1b78683e49e3ade1f8941b3f&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7Cc5109ecf837f419a497708da1d8c1822%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637854784782837904%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=muxlEv4hdZMtvj4kGwOgi%2FA1%2F3Dg3R8fF%2B2a7csp9%2BM%3D&amp;reserved=0 
>>
>
> Well done !
>
>> So, if you agree,  I think we can continue to work on gpt partition 
>> swap.
>
> Fully agree.
>
>>
>>
>>>
>>>> If this patch is accepted, I think that we could add a feature that 
>>>> automatically check and repair GPT table partition.
>>>
>>> Yes: if we can safe switch even in case of power-cut, it is ok for me.
>>>
>>>> I think it could be done in u-boot, in the kernel or in an 
>>>> userspace daemon.
>>>
>>> The easiest way is in user space.
>>
>> I agree, but u-boot use the gpt before.
>> I strongly thought about adding two sub functions "check" and 
>> "repair" in the command gpt.
>
> Ok good plan.
>>
>>>
>>>>
>>>> I am interested to add this feature (of course, enabled by a 
>>>> config) in u-boot. The idea is quite simple,
>>>> when u-boot starts, before parsing the GPT table to find the 
>>>> partition for the kernel, u-boot checks and
>>>> if necessary, repairs the GPT table. If the primary GPT table is 
>>>> broken, the secondary GPT table is used
>>>> to repair the primary GPT table. And same, in the other way, 
>>>> primary GPT table is used to repair the
>>>> secondary GPT table. A question would be about MBR, if the MBR is 
>>>> invalid, but a GPT table is valid
>>>> (primary or secondary), should we write a protective MBR ?.
>>>
>>> If MBR is invalid, it does not boot - tested with U-Boot 2022.01 and 
>>> Beaglebone. So I think we should.
>>
>> If the MBR is invalid, I think that there is two solutions, either it 
>> was a protective MBR and it could
>> be restaured
>
> Exactly.
>
>> or it was an hybrib MBR, and in this case, the dos partition are lost.
>> In both case, it is possible to save the gpt partition (if at least 
>> primary or secondary are valid).
>
> Right - as far as I see, U-Boot (in part_efi.c) checks to see a valid 
> PMBR, and discard the correct GPT entries in case of failure. This is 
> not needed and drops the redundancy we have with the dual GPT headers.
>
> Regards,
> Stefano
>

Finaly, I have upstreamed the support of the command gpt repair in u-boot :
https://source.denx.de/u-boot/u-boot/-/commit/26f404c7665265946305f2883cfad1b785d35bb2
The support of gpt verify is already present in u-boot.

There is a commit in libfdisk that avoid to write the mbr when the new 
mbr is the same as the previous mbr,
and we also have a command to repair a gpt table in u-boot.

With those new features, do you think the script gpt swap and the 
handler gptpart may be integrated in swupdate ?
If you agree to integrate those features, I may sent a v2 of my patch 
with a rebase on the branch master (and of course,
with some changes if you have some feedbacks).

Regards,
Philippe



>>
>>
>>>
>>> Regards,
>>> Stefano
>>
>>
>> Regards,
>> Philippe
>>
>>
>>
>>>
>>>>
>>>>
>>>>> Regards,
>>>>> Stefano
>>>>>
>>>>>>
>>>>>>> Regards,
>>>>>>> Stefano
>>>>>>
>>>>>>
>>>>>> Regards,
>>>>>>
>>>>>> Philippe
>>>>>>
>>>>>>
>>>>
>>>> Regards,
>>>> Philippe
>>>>
>>>>
>>>>
>>>>>>
>>>>>>>
>>>>>>>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>>>>>>>> ---
>>>>>>>>   doc/source/handlers.rst     |  24 ++++
>>>>>>>>   handlers/diskpart_handler.c | 244 
>>>>>>>> ++++++++++++++++++++++++++++++++++++
>>>>>>>>   2 files changed, 268 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>>>>>>>> index 8d2aeea..025d100 100644
>>>>>>>> --- a/doc/source/handlers.rst
>>>>>>>> +++ b/doc/source/handlers.rst
>>>>>>>> @@ -943,6 +943,30 @@ MBR Example:
>>>>>>>>          }
>>>>>>>>       }
>>>>>>>>   +gpt partition swap
>>>>>>>> +..................
>>>>>>>> +
>>>>>>>> +There is a handler gptswap that allow to swap gpt partitions 
>>>>>>>> after all the images were flashed.
>>>>>>>> +This handler only swap the name of the partition. It coud be 
>>>>>>>> usefull for a dual bank strategy.
>>>>>>>> +This handler is a script for the point of view of swupdate, so 
>>>>>>>> the node that provide it should
>>>>>>>> +be added in the section scripts.
>>>>>>>> +
>>>>>>>> +Simple example:
>>>>>>>> +
>>>>>>>> +::
>>>>>>>> +
>>>>>>>> +        scripts: (
>>>>>>>> +                {
>>>>>>>> +                        type = "gptswap";
>>>>>>>> +                        device = "/dev/vdb";
>>>>>>>> +                        properties =
>>>>>>>> +                        {
>>>>>>>> +                                swap-0 = [ "u-boot-0" , 
>>>>>>>> "u-boot-1" ];
>>>>>>>> +                                swap-1 = [ "kernel-0" , 
>>>>>>>> "kernel-1" ];
>>>>>>>> +                        };
>>>>>>>> +                },
>>>>>>>> +         );
>>>>>>>> +
>>>>>>>>   Diskformat Handler
>>>>>>>>   ------------------
>>>>>>>>   diff --git a/handlers/diskpart_handler.c 
>>>>>>>> b/handlers/diskpart_handler.c
>>>>>>>> index 0af4e60..0ff5968 100644
>>>>>>>> --- a/handlers/diskpart_handler.c
>>>>>>>> +++ b/handlers/diskpart_handler.c
>>>>>>>> @@ -14,8 +14,10 @@
>>>>>>>>   #include <errno.h>
>>>>>>>>   #include <ctype.h>
>>>>>>>>   #include <sys/file.h>
>>>>>>>> +#include <sys/ioctl.h>
>>>>>>>>   #include <sys/types.h>
>>>>>>>>   #include <libfdisk/libfdisk.h>
>>>>>>>> +#include <linux/fs.h>
>>>>>>>>   #include <fs_interface.h>
>>>>>>>>   #include <uuid/uuid.h>
>>>>>>>>   #include "swupdate.h"
>>>>>>>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>>>>>>>> fdisk_context *cxt, struct diskpart_ta
>>>>>>>>       return ret;
>>>>>>>>   }
>>>>>>>>   +static struct fdisk_partition *
>>>>>>>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table 
>>>>>>>> *tb, char *name)
>>>>>>>> +{
>>>>>>>> +    struct fdisk_partition *pa = NULL;
>>>>>>>> +    struct fdisk_partition *ipa = NULL;
>>>>>>>> +    struct fdisk_iter *itr;
>>>>>>>> +    const char *iname;
>>>>>>>> +
>>>>>>>> +    if (!tb || !name)
>>>>>>>> +        goto out;
>>>>>>>> +
>>>>>>>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>>>>>>>> +
>>>>>>>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>>>>>>>> +        iname = fdisk_partition_get_name(ipa);
>>>>>>>> +        if (iname && !strcmp(iname, name)) {
>>>>>>>> +            pa = ipa;
>>>>>>>> +            break;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    fdisk_free_iter(itr);
>>>>>>>> +
>>>>>>>> + out:
>>>>>>>> +    return pa;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static struct fdisk_partition *
>>>>>>>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char 
>>>>>>>> *name)
>>>>>>>> +{
>>>>>>>> +    struct fdisk_partition *pa = NULL;
>>>>>>>> +
>>>>>>>> +    if (!tb || !name)
>>>>>>>> +        goto out;
>>>>>>>> +
>>>>>>>> +    if (tb->parent)
>>>>>>>> +        pa = 
>>>>>>>> diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
>>>>>>>> +
>>>>>>>> + out:
>>>>>>>> +    return pa;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>>>>>>>> +                   struct create_table *createtable,
>>>>>>>> +                   char *name1, char *name2)
>>>>>>>> +{
>>>>>>>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>>>>>>>> +    int ret = -1;
>>>>>>>> +
>>>>>>>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>>>>>>>> +    if (!pa1) {
>>>>>>>> +        ERROR("Can't find partition %s", name1);
>>>>>>>> +        goto out;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>>>>>>>> +    if (!pa2) {
>>>>>>>> +        ERROR("Can't find partition %s", name2);
>>>>>>>> +        goto out;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    ret = fdisk_partition_set_name(pa1, name2);
>>>>>>>> +    if (ret)
>>>>>>>> +        goto out;
>>>>>>>> +    ret = fdisk_partition_set_name(pa2, name1);
>>>>>>>> +    if (ret)
>>>>>>>> +        goto out;
>>>>>>>> +
>>>>>>>> +    createtable->parent = true;
>>>>>>>> +
>>>>>>>> + out:
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>>>>>>>                     struct partition_data *part,
>>>>>>>>                     unsigned long sector_size,
>>>>>>>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>>>>>>>> fdisk_context *cxt)
>>>>>>>>       fdisk_unref_context(cxt);
>>>>>>>>   }
>>>>>>>>   +static int diskpart_reread_partition(char *device)
>>>>>>>> +{
>>>>>>>> +    int fd, ret = -1;
>>>>>>>> +
>>>>>>>> +    fd = open(device, O_RDONLY);
>>>>>>>> +    if (fd < 0) {
>>>>>>>> +        ERROR("Device %s can't be opened", device);
>>>>>>>> +        goto out;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    ret = ioctl(fd, BLKRRPART, NULL);
>>>>>>>> +    if (ret < 0) {
>>>>>>>> +        ERROR("Scan cannot be done on device %s", device);
>>>>>>>> +        goto out_close;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> + out_close:
>>>>>>>> +    close(fd);
>>>>>>>> +
>>>>>>>> + out:
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>   static int diskpart(struct img_type *img,
>>>>>>>>       void __attribute__ ((__unused__)) *data)
>>>>>>>>   {
>>>>>>>> @@ -1066,9 +1165,154 @@ handler_release:
>>>>>>>>       return ret;
>>>>>>>>   }
>>>>>>>>   +static int gpt_swap_partition(struct img_type *img, void *data)
>>>>>>>> +{
>>>>>>>> +    struct fdisk_context *cxt = NULL;
>>>>>>>> +    struct diskpart_table *tb = NULL;
>>>>>>>> +    int ret = 0;
>>>>>>>> +    unsigned long hybrid = 0;
>>>>>>>> +    struct hnd_priv priv =  {
>>>>>>>> +        .labeltype = FDISK_DISKLABEL_DOS,
>>>>>>>> +    };
>>>>>>>> +    struct create_table *createtable = NULL;
>>>>>>>> +    struct script_handler_data *script_data = data;
>>>>>>>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>>>>>>>> +    struct dict_list *partitions;
>>>>>>>> +    struct dict_list_elem *partition;
>>>>>>>> +    int num, count = 0;
>>>>>>>> +    char *name[2];
>>>>>>>> +
>>>>>>>> +    if (!script_data)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    /*
>>>>>>>> +     * Call only in case of postinstall
>>>>>>>> +     */
>>>>>>>> +    if (script_data->scriptfn != POSTINSTALL)
>>>>>>>> +        return 0;
>>>>>>>> +
>>>>>>>> +    LIST_INIT(&priv.listparts);
>>>>>>>> +    if (!strlen(img->device)) {
>>>>>>>> +        ERROR("Partition handler without setting the device");
>>>>>>>> +        return -EINVAL;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    createtable = calloc(1, sizeof(*createtable));
>>>>>>>> +    if (!createtable) {
>>>>>>>> +        ERROR("OOM allocating createtable !");
>>>>>>>> +        return -ENOMEM;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    /*
>>>>>>>> +     * Reads flags: nolock and noinuse
>>>>>>>> +     */
>>>>>>>> +    priv.nolock = strtobool(dict_get_value(&img->properties, 
>>>>>>>> "nolock"));
>>>>>>>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>>>>>>>> "noinuse"));
>>>>>>>> +
>>>>>>>> +        /*
>>>>>>>> +         * Create context
>>>>>>>> +         */
>>>>>>>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, 
>>>>>>>> createtable);
>>>>>>>> +    if (ret == -EACCES)
>>>>>>>> +        goto handler_release;
>>>>>>>> +    else if (ret)
>>>>>>>> +        goto handler_exit;
>>>>>>>> +
>>>>>>>> +    tb = calloc(1, sizeof(*tb));
>>>>>>>> +    if (!tb) {
>>>>>>>> +        ERROR("OOM loading partitions !");
>>>>>>>> +        ret = -ENOMEM;
>>>>>>>> +        goto handler_exit;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    /*
>>>>>>>> +     * Fill the in-memory partition table from the disk.
>>>>>>>> +     */
>>>>>>>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>>>>>>>> +    if (ret)
>>>>>>>> +        goto handler_exit;
>>>>>>>> +
>>>>>>>> +    while (1) {
>>>>>>>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>>>>>>>> +        partitions = dict_get_list(&img->properties, prop);
>>>>>>>> +        if (!partitions)
>>>>>>>> +            break;
>>>>>>>> +
>>>>>>>> +        num = 0;
>>>>>>>> +        LIST_FOREACH(partition, partitions, next) {
>>>>>>>> +            if (num >= 2) {
>>>>>>>> +                ERROR("Too many partition (%s)", prop);
>>>>>>>> +                goto handler_exit;
>>>>>>>> +            }
>>>>>>>> +
>>>>>>>> +            name[num] = partition->value;
>>>>>>>> +            num++;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        if (num != 2) {
>>>>>>>> +            ERROR("Invalid number (%d) of partition (%s)", 
>>>>>>>> num, prop);
>>>>>>>> +            goto handler_exit;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>>>>>>>> +
>>>>>>>> +        ret = diskpart_swap_partition(tb, createtable, 
>>>>>>>> name[0], name[1]);
>>>>>>>> +        if (ret) {
>>>>>>>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>>>>>>>> +            break;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        count++;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    /* Reload table for parent */
>>>>>>>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>>>>>>>> +    if (ret) {
>>>>>>>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>>>>>>>> +        goto handler_exit;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    /* Reload table for child */
>>>>>>>> +    if (IS_HYBRID(cxt)) {
>>>>>>>> +        ret = diskpart_reload_table(cxt, tb->child);
>>>>>>>> +        if (ret) {
>>>>>>>> +            ERROR("Can't reload table for child (err = %d)", 
>>>>>>>> ret);
>>>>>>>> +            goto handler_exit;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    /* Write table */
>>>>>>>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>>>>>>>> priv.noinuse);
>>>>>>>> +    if (ret) {
>>>>>>>> +        ERROR("Can't write table (err = %d)", ret);
>>>>>>>> +        goto handler_exit;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +handler_exit:
>>>>>>>> +    if (tb)
>>>>>>>> +        diskpart_unref_table(tb);
>>>>>>>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>>>>>>>> +        if (fdisk_deassign_device(cxt, 0))
>>>>>>>> +            WARN("Error deassign device %s", img->device);
>>>>>>>> +
>>>>>>>> +handler_release:
>>>>>>>> +    if (cxt)
>>>>>>>> +        diskpart_unref_context(cxt);
>>>>>>>> +
>>>>>>>> +    /*
>>>>>>>> +     * Re-read the partition table to be sure that SWupdate 
>>>>>>>> does not
>>>>>>>> +     * try to acces the partitions before the kernel is ready
>>>>>>>> +     */
>>>>>>>> +    diskpart_reread_partition(img->device);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>   __attribute__((constructor))
>>>>>>>>   void diskpart_handler(void)
>>>>>>>>   {
>>>>>>>>       register_handler("diskpart", diskpart,
>>>>>>>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>>>> +        register_handler("gptswap", gpt_swap_partition,
>>>>>>>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>>>>   }
>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>>
>>>>
>>>
>>>
>
Stefano Babic June 16, 2022, 7:45 a.m. UTC | #9
Hi Philippe,

On 15.06.22 18:43, Philippe REYNES wrote:
> Hi Stefano,
> 
> 
> Le 13/04/2022 à 22:27, Stefano Babic a écrit :
>> Hi Philippe,
>>
>> On 13.04.22 19:25, Philippe REYNES wrote:
>>> Hi Stefano,
>>>
>>> Le 07/04/2022 à 15:28, Stefano Babic a écrit :
>>>> Hi Philippe,
>>>>
>>>> On 07.04.22 15:15, Philippe REYNES wrote:
>>>>> Hi Stefano,
>>>>>
>>>>>
>>>>> Le 06/04/2022 à 18:27, Stefano Babic a écrit :
>>>>>> Hi Philippe,
>>>>>>
>>>>>> On 06.04.22 17:30, Philippe REYNES wrote:
>>>>>>> Hi Stefano,
>>>>>>>
>>>>>>>
>>>>>>> Le 05/04/2022 à 16:17, Stefano Babic a écrit :
>>>>>>>> Hi Philippe,
>>>>>>>>
>>>>>>>> On 05.04.22 15:17, Philippe Reynes wrote:
>>>>>>>>> Add a handler gptswap to swap gpt partition.
>>>>>>>>> This handler is a script that can be used to swap gpt partition
>>>>>>>>> names after all the image were flashed (only partition names are
>>>>>>>>> swapped). This script may be usefull for a dual bank strategy.
>>>>>>>>>
>>>>>>>>> For example:
>>>>>>>>> scripts: (
>>>>>>>>>     {
>>>>>>>>>         type = "gptswap";
>>>>>>>>>         device = "/dev/vdb";
>>>>>>>>>         properties =
>>>>>>>>>         {
>>>>>>>>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>>>>>>>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>>>>>>>>         };
>>>>>>>>>     },
>>>>>>>>> );
>>>>>>>>>
>>>>>>>>
>>>>>>>> I would like first to discuss about the way to do, not the patch 
>>>>>>>> (I will review later). My concerns are related if this can be 
>>>>>>>> atomic.
>>>>>>>>
>>>>>>>> The procedure you implement is something like:
>>>>>>>>
>>>>>>>> - you read the partitions from disk
>>>>>>>> - you exchange the names of partitions according to the rule in 
>>>>>>>> sw-description. This happens in memory, no problem up now.
>>>>>>>>
>>>>>>>> - call diskpart_write_table() and rewrite the whole table, as it 
>>>>>>>> should be.
>>>>>>>>
>>>>>>>> At the end, the whole partition table is rewritten. Because of 
>>>>>>>> that, and also because the data should then written by the low 
>>>>>>>> level driver (for example a SDHC driver), I have the feeling 
>>>>>>>> that it is not atomic. We can also have two partitions with the 
>>>>>>>> same name after a power cut, or even a corrupted table. Or do 
>>>>>>>> you have some other thoughts ?
>>>>>>>>
>>>>>>>> I have thought to implement this in a less invasive way, just 
>>>>>>>> setting a "bootflag" - we have not on GPT, but we can use an 
>>>>>>>> attribute 
>>>>>>>> (https://fra01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fftp.ntu.edu.tw%2Flinux%2Futils%2Futil-linux%2Fv2.34%2Flibfdisk-docs%2Flibfdisk-UEFI-GPT.html%23fdisk-gpt-get-partition-attrs&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7Cc5109ecf837f419a497708da1d8c1822%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637854784782837904%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=0BjUr5G%2BLSn5im14zOOMtPAju81LKmUpvfJr5eGQAs4%3D&amp;reserved=0), 
>>>>>>>> maybe GPT_FLAG_LEGACYBOOT or using fdisk_toggle_partition_flag() 
>>>>>>>> with one "user" flag.
>>>>>>>>
>>>>>>>
>>>>>>> In my understanding, partitions flags are set in memory, and then 
>>>>>>> the whole GPT partition is written to the disk.
>>>>>>
>>>>>> Right.
>>>>>>
>>>>>>> So the behavior is the same if we swap the partition name or a 
>>>>>>> partition flag.
>>>>>>
>>>>>> Ok, got it. So because we rewrite the whole GPT header, we 
>>>>>> recompute the CRC, and then it is not important if we change a 
>>>>>> string (=name) or a single bit (=flag). I think this is fine, so 
>>>>>> let's proceed.
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> I have looked the function gpt_write_disklabel (in util-linux)
>>>>>>
>>>>>> ...yes, util-linux is the source of inspiration for most code 
>>>>>> around disks in SWUpdate....
>>>>>>
>>>>>>> that is used to write the full GPT partition.
>>>>>>>
>>>>>>> This comment describes how it happens:
>>>>>>>
>>>>>>>      /*
>>>>>>>
>>>>>>>       * UEFI requires writing in this specific order:
>>>>>>>
>>>>>>>       *   1) backup partition tables
>>>>>>>
>>>>>>>       *   2) backup GPT header
>>>>>>>
>>>>>>>       *   3) primary partition tables
>>>>>>>
>>>>>>>       *   4) primary GPT header
>>>>>>>
>>>>>>>       *   5) protective MBR
>>>>>>>
>>>>>>>       *
>>>>>>>
>>>>>>>       * If any write fails, we abort the rest.
>>>>>>>
>>>>>>>       */
>>>>>>>
>>>>>>> When GPT is used on a disk, there are 5 elements:
>>>>>>> - protective MBR (LBA 0)
>>>>>>> - primary GPT table header (LBA 1)
>>>>>>> - primary GPT table partition (LBA 2 -> 33)
>>>>>>> - secondary GPT table partition  (LBA -2 -> -33)
>>>>>>> - secondary GPT table header (LBA - 1)
>>>>>>> And there are two CRCs in the GPT table header, one that protect 
>>>>>>> the GPT table header, and one that protect the GPT table partition.
>>>>>>
>>>>>> Ok
>>>>>>
>>>>>>>
>>>>>>> if step 1 or 2 fail, the secondary GPT is invalid and it can be 
>>>>>>> recovered using the primary GPT (and the swap is not applied).
>>>>>>
>>>>>> More as one step fails, I am bothering what is happening in case 
>>>>>> of power-cut and if some of these steps are not atomic, and system 
>>>>>> does not boot or boot with the wrong partition.
>>>>>>
>>>>>> My first concern is who is then in charge to repair : we have a 
>>>>>> working system with full GPT protection (both header are valid), 
>>>>>> we update and we have a power-cut. Good, backup header is 
>>>>>> corrupted, but we can boot again. But as far as I know, the kernel 
>>>>>> just mounts the partition and the GPT is not explicitely set or 
>>>>>> repaired, decreasing our reliability.
>>>>>>
>>>>>> What do you think ?
>>>>>
>>>>> I think that we should look for a way to automatically check and 
>>>>> repair GPT partition at boot.
>>>>> I don't found a way to do that. So may be we will need to provide a 
>>>>> tool/patch to do this check and repair.
>>>>
>>>> You can fix it with fdisk / gdisk (I do npot know with sfdisk), or a 
>>>> small code doing this. Anyway, fixable.
>>>>
>>>>>
>>>>>>
>>>>>>> If step 3 or 4 fail, the primary GPT is invalid and it can be 
>>>>>>> recovered using the secondary GPT (and the swap is applied).
>>>>>>
>>>>>> Ok - fine, with the exception that I do not know who will repair 
>>>>>> the primary GPT header.
>>>>>>
>>>>>>> If step 5 fails, I am afraid that protective MBR could be 
>>>>>>> corrupted, so the GPT table may be invalid.
>>>>>>
>>>>>> I just tested with a not very recent U-Boot, in any case a device 
>>>>>> with GPT table. Dropping protective MBR, U-Boot does not find any 
>>>>>> partition and device does not boot. So it looks like a no way here.
>>>>>>
>>>>>> And rerunning SWUpdate as recovery, the diskpartitioner (at the 
>>>>>> end, libfdisk) reports that partitions are damaged and must be 
>>>>>> created.
>>>>>>
>>>>>>>
>>>>>>> But, I think that we could avoid to write the protective MBR if 
>>>>>>> there is already a protective MBR on the disk.
>>>>>>
>>>>>> According to gpt_write_disklabel, this is done by forcing a hybrid 
>>>>>> MBR, but sure it is not what we want.
>>>>>
>>>>> Yes, the MBR is not written if the mode if hybrib.
>>>>> My idea is not to set the hybrid mode, but before writting a new 
>>>>> MBR, read the MBR on the disk, and compare it with
>>>>> the MRB that should be written. If they are the same, then the new 
>>>>> MBR is not written on the disk.
>>>>>
>>>>>>
>>>>>>> And as we talk about a script to swap GPT partition, a protective 
>>>>>>> MBR should be already on the disk.
>>>>>>
>>>>>> Sure, agree.
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>> Can we rely on the mechanism you implemented ? Quite scared that 
>>>>>>>> the tables are rewritten at any toggle.
>>>>>>>>
>>>>>>>
>>>>>>> If we modify libfdisk to only write the MBR when it is usefull.
>>>>>>
>>>>>> Ok - the question is how to push this change mainline, and if this 
>>>>>> change can be mainlined. Surely a hack just for SWUpdate (in a 
>>>>>> meta layer) is not praticable, and not appliable for other systems 
>>>>>> (Debian for example).
>>>>>>
>>>>>> This is not so simple, because struct fdisk_context is opaque. And 
>>>>>> context is the only parameter for gpt_write_disklabel().
>>>>>>
>>>>>>> Before writting the MBR, libfdisk could read the MBR on the disk, 
>>>>>>> and if the MBR on the disk is the same
>>>>>>> that the MBR in memory, then the write is avoided.
>>>>>>> I think that with this change, the swap could be quite safe.
>>>>>>
>>>>>> Point is how to push such as changes, that are useful probably 
>>>>>> just in our use cases....
>>>>>>
>>>>>
>>>>> I think that we should push a patch to the libfdisk community. If 
>>>>> they accept the patch, we could continue in this
>>>>> way, if they refuse the patch, we may need to consider another 
>>>>> solution.
>>>>> I prepare this patch and I send it to the libfdisk community ASAP.
>>>>>
>>>>
>>>> Fully agree, go on !
>>>
>>> I  have sent a patch and it has been accepted :
>>> https://fra01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Futils%2Futil-linux%2Futil-linux.git%2Fcommit%2F%3Fid%3D17b4496a1819b8bf1b78683e49e3ade1f8941b3f&amp;data=04%7C01%7Cphilippe.reynes%40softathome.com%7Cc5109ecf837f419a497708da1d8c1822%7Caa10e044e4054c10835336b4d0cce511%7C0%7C0%7C637854784782837904%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=muxlEv4hdZMtvj4kGwOgi%2FA1%2F3Dg3R8fF%2B2a7csp9%2BM%3D&amp;reserved=0 
>>>
>>
>> Well done !
>>
>>> So, if you agree,  I think we can continue to work on gpt partition 
>>> swap.
>>
>> Fully agree.
>>
>>>
>>>
>>>>
>>>>> If this patch is accepted, I think that we could add a feature that 
>>>>> automatically check and repair GPT table partition.
>>>>
>>>> Yes: if we can safe switch even in case of power-cut, it is ok for me.
>>>>
>>>>> I think it could be done in u-boot, in the kernel or in an 
>>>>> userspace daemon.
>>>>
>>>> The easiest way is in user space.
>>>
>>> I agree, but u-boot use the gpt before.
>>> I strongly thought about adding two sub functions "check" and 
>>> "repair" in the command gpt.
>>
>> Ok good plan.
>>>
>>>>
>>>>>
>>>>> I am interested to add this feature (of course, enabled by a 
>>>>> config) in u-boot. The idea is quite simple,
>>>>> when u-boot starts, before parsing the GPT table to find the 
>>>>> partition for the kernel, u-boot checks and
>>>>> if necessary, repairs the GPT table. If the primary GPT table is 
>>>>> broken, the secondary GPT table is used
>>>>> to repair the primary GPT table. And same, in the other way, 
>>>>> primary GPT table is used to repair the
>>>>> secondary GPT table. A question would be about MBR, if the MBR is 
>>>>> invalid, but a GPT table is valid
>>>>> (primary or secondary), should we write a protective MBR ?.
>>>>
>>>> If MBR is invalid, it does not boot - tested with U-Boot 2022.01 and 
>>>> Beaglebone. So I think we should.
>>>
>>> If the MBR is invalid, I think that there is two solutions, either it 
>>> was a protective MBR and it could
>>> be restaured
>>
>> Exactly.
>>
>>> or it was an hybrib MBR, and in this case, the dos partition are lost.
>>> In both case, it is possible to save the gpt partition (if at least 
>>> primary or secondary are valid).
>>
>> Right - as far as I see, U-Boot (in part_efi.c) checks to see a valid 
>> PMBR, and discard the correct GPT entries in case of failure. This is 
>> not needed and drops the redundancy we have with the dual GPT headers.
>>
>> Regards,
>> Stefano
>>
> 
> Finaly, I have upstreamed the support of the command gpt repair in u-boot :
> https://source.denx.de/u-boot/u-boot/-/commit/26f404c7665265946305f2883cfad1b785d35bb2 
> 
> The support of gpt verify is already present in u-boot.
> 
> There is a commit in libfdisk that avoid to write the mbr when the new 
> mbr is the same as the previous mbr,
> and we also have a command to repair a gpt table in u-boot.

Well done !

> 
> With those new features, do you think the script gpt swap and the 
> handler gptpart may be integrated in swupdate ?

Yes, absolutely.

> If you agree to integrate those features, I may sent a v2 of my patch 
> with a rebase on the branch master (and of course,

Fully agree.

> with some changes if you have some feedbacks).
> 

Best regards,
Stefano

> Regards,
> Philippe
> 
> 
> 
>>>
>>>
>>>>
>>>> Regards,
>>>> Stefano
>>>
>>>
>>> Regards,
>>> Philippe
>>>
>>>
>>>
>>>>
>>>>>
>>>>>
>>>>>> Regards,
>>>>>> Stefano
>>>>>>
>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Stefano
>>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>>
>>>>>>> Philippe
>>>>>>>
>>>>>>>
>>>>>
>>>>> Regards,
>>>>> Philippe
>>>>>
>>>>>
>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>>>>>>>>> ---
>>>>>>>>>   doc/source/handlers.rst     |  24 ++++
>>>>>>>>>   handlers/diskpart_handler.c | 244 
>>>>>>>>> ++++++++++++++++++++++++++++++++++++
>>>>>>>>>   2 files changed, 268 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>>>>>>>>> index 8d2aeea..025d100 100644
>>>>>>>>> --- a/doc/source/handlers.rst
>>>>>>>>> +++ b/doc/source/handlers.rst
>>>>>>>>> @@ -943,6 +943,30 @@ MBR Example:
>>>>>>>>>          }
>>>>>>>>>       }
>>>>>>>>>   +gpt partition swap
>>>>>>>>> +..................
>>>>>>>>> +
>>>>>>>>> +There is a handler gptswap that allow to swap gpt partitions 
>>>>>>>>> after all the images were flashed.
>>>>>>>>> +This handler only swap the name of the partition. It coud be 
>>>>>>>>> usefull for a dual bank strategy.
>>>>>>>>> +This handler is a script for the point of view of swupdate, so 
>>>>>>>>> the node that provide it should
>>>>>>>>> +be added in the section scripts.
>>>>>>>>> +
>>>>>>>>> +Simple example:
>>>>>>>>> +
>>>>>>>>> +::
>>>>>>>>> +
>>>>>>>>> +        scripts: (
>>>>>>>>> +                {
>>>>>>>>> +                        type = "gptswap";
>>>>>>>>> +                        device = "/dev/vdb";
>>>>>>>>> +                        properties =
>>>>>>>>> +                        {
>>>>>>>>> +                                swap-0 = [ "u-boot-0" , 
>>>>>>>>> "u-boot-1" ];
>>>>>>>>> +                                swap-1 = [ "kernel-0" , 
>>>>>>>>> "kernel-1" ];
>>>>>>>>> +                        };
>>>>>>>>> +                },
>>>>>>>>> +         );
>>>>>>>>> +
>>>>>>>>>   Diskformat Handler
>>>>>>>>>   ------------------
>>>>>>>>>   diff --git a/handlers/diskpart_handler.c 
>>>>>>>>> b/handlers/diskpart_handler.c
>>>>>>>>> index 0af4e60..0ff5968 100644
>>>>>>>>> --- a/handlers/diskpart_handler.c
>>>>>>>>> +++ b/handlers/diskpart_handler.c
>>>>>>>>> @@ -14,8 +14,10 @@
>>>>>>>>>   #include <errno.h>
>>>>>>>>>   #include <ctype.h>
>>>>>>>>>   #include <sys/file.h>
>>>>>>>>> +#include <sys/ioctl.h>
>>>>>>>>>   #include <sys/types.h>
>>>>>>>>>   #include <libfdisk/libfdisk.h>
>>>>>>>>> +#include <linux/fs.h>
>>>>>>>>>   #include <fs_interface.h>
>>>>>>>>>   #include <uuid/uuid.h>
>>>>>>>>>   #include "swupdate.h"
>>>>>>>>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>>>>>>>>> fdisk_context *cxt, struct diskpart_ta
>>>>>>>>>       return ret;
>>>>>>>>>   }
>>>>>>>>>   +static struct fdisk_partition *
>>>>>>>>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table 
>>>>>>>>> *tb, char *name)
>>>>>>>>> +{
>>>>>>>>> +    struct fdisk_partition *pa = NULL;
>>>>>>>>> +    struct fdisk_partition *ipa = NULL;
>>>>>>>>> +    struct fdisk_iter *itr;
>>>>>>>>> +    const char *iname;
>>>>>>>>> +
>>>>>>>>> +    if (!tb || !name)
>>>>>>>>> +        goto out;
>>>>>>>>> +
>>>>>>>>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>>>>>>>>> +
>>>>>>>>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>>>>>>>>> +        iname = fdisk_partition_get_name(ipa);
>>>>>>>>> +        if (iname && !strcmp(iname, name)) {
>>>>>>>>> +            pa = ipa;
>>>>>>>>> +            break;
>>>>>>>>> +        }
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    fdisk_free_iter(itr);
>>>>>>>>> +
>>>>>>>>> + out:
>>>>>>>>> +    return pa;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static struct fdisk_partition *
>>>>>>>>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char 
>>>>>>>>> *name)
>>>>>>>>> +{
>>>>>>>>> +    struct fdisk_partition *pa = NULL;
>>>>>>>>> +
>>>>>>>>> +    if (!tb || !name)
>>>>>>>>> +        goto out;
>>>>>>>>> +
>>>>>>>>> +    if (tb->parent)
>>>>>>>>> +        pa = 
>>>>>>>>> diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
>>>>>>>>> +
>>>>>>>>> + out:
>>>>>>>>> +    return pa;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>>>>>>>>> +                   struct create_table *createtable,
>>>>>>>>> +                   char *name1, char *name2)
>>>>>>>>> +{
>>>>>>>>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>>>>>>>>> +    int ret = -1;
>>>>>>>>> +
>>>>>>>>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>>>>>>>>> +    if (!pa1) {
>>>>>>>>> +        ERROR("Can't find partition %s", name1);
>>>>>>>>> +        goto out;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>>>>>>>>> +    if (!pa2) {
>>>>>>>>> +        ERROR("Can't find partition %s", name2);
>>>>>>>>> +        goto out;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    ret = fdisk_partition_set_name(pa1, name2);
>>>>>>>>> +    if (ret)
>>>>>>>>> +        goto out;
>>>>>>>>> +    ret = fdisk_partition_set_name(pa2, name1);
>>>>>>>>> +    if (ret)
>>>>>>>>> +        goto out;
>>>>>>>>> +
>>>>>>>>> +    createtable->parent = true;
>>>>>>>>> +
>>>>>>>>> + out:
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>>>>>>>>                     struct partition_data *part,
>>>>>>>>>                     unsigned long sector_size,
>>>>>>>>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>>>>>>>>> fdisk_context *cxt)
>>>>>>>>>       fdisk_unref_context(cxt);
>>>>>>>>>   }
>>>>>>>>>   +static int diskpart_reread_partition(char *device)
>>>>>>>>> +{
>>>>>>>>> +    int fd, ret = -1;
>>>>>>>>> +
>>>>>>>>> +    fd = open(device, O_RDONLY);
>>>>>>>>> +    if (fd < 0) {
>>>>>>>>> +        ERROR("Device %s can't be opened", device);
>>>>>>>>> +        goto out;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    ret = ioctl(fd, BLKRRPART, NULL);
>>>>>>>>> +    if (ret < 0) {
>>>>>>>>> +        ERROR("Scan cannot be done on device %s", device);
>>>>>>>>> +        goto out_close;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> + out_close:
>>>>>>>>> +    close(fd);
>>>>>>>>> +
>>>>>>>>> + out:
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>   static int diskpart(struct img_type *img,
>>>>>>>>>       void __attribute__ ((__unused__)) *data)
>>>>>>>>>   {
>>>>>>>>> @@ -1066,9 +1165,154 @@ handler_release:
>>>>>>>>>       return ret;
>>>>>>>>>   }
>>>>>>>>>   +static int gpt_swap_partition(struct img_type *img, void *data)
>>>>>>>>> +{
>>>>>>>>> +    struct fdisk_context *cxt = NULL;
>>>>>>>>> +    struct diskpart_table *tb = NULL;
>>>>>>>>> +    int ret = 0;
>>>>>>>>> +    unsigned long hybrid = 0;
>>>>>>>>> +    struct hnd_priv priv =  {
>>>>>>>>> +        .labeltype = FDISK_DISKLABEL_DOS,
>>>>>>>>> +    };
>>>>>>>>> +    struct create_table *createtable = NULL;
>>>>>>>>> +    struct script_handler_data *script_data = data;
>>>>>>>>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>>>>>>>>> +    struct dict_list *partitions;
>>>>>>>>> +    struct dict_list_elem *partition;
>>>>>>>>> +    int num, count = 0;
>>>>>>>>> +    char *name[2];
>>>>>>>>> +
>>>>>>>>> +    if (!script_data)
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +
>>>>>>>>> +    /*
>>>>>>>>> +     * Call only in case of postinstall
>>>>>>>>> +     */
>>>>>>>>> +    if (script_data->scriptfn != POSTINSTALL)
>>>>>>>>> +        return 0;
>>>>>>>>> +
>>>>>>>>> +    LIST_INIT(&priv.listparts);
>>>>>>>>> +    if (!strlen(img->device)) {
>>>>>>>>> +        ERROR("Partition handler without setting the device");
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    createtable = calloc(1, sizeof(*createtable));
>>>>>>>>> +    if (!createtable) {
>>>>>>>>> +        ERROR("OOM allocating createtable !");
>>>>>>>>> +        return -ENOMEM;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    /*
>>>>>>>>> +     * Reads flags: nolock and noinuse
>>>>>>>>> +     */
>>>>>>>>> +    priv.nolock = strtobool(dict_get_value(&img->properties, 
>>>>>>>>> "nolock"));
>>>>>>>>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>>>>>>>>> "noinuse"));
>>>>>>>>> +
>>>>>>>>> +        /*
>>>>>>>>> +         * Create context
>>>>>>>>> +         */
>>>>>>>>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, 
>>>>>>>>> createtable);
>>>>>>>>> +    if (ret == -EACCES)
>>>>>>>>> +        goto handler_release;
>>>>>>>>> +    else if (ret)
>>>>>>>>> +        goto handler_exit;
>>>>>>>>> +
>>>>>>>>> +    tb = calloc(1, sizeof(*tb));
>>>>>>>>> +    if (!tb) {
>>>>>>>>> +        ERROR("OOM loading partitions !");
>>>>>>>>> +        ret = -ENOMEM;
>>>>>>>>> +        goto handler_exit;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    /*
>>>>>>>>> +     * Fill the in-memory partition table from the disk.
>>>>>>>>> +     */
>>>>>>>>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>>>>>>>>> +    if (ret)
>>>>>>>>> +        goto handler_exit;
>>>>>>>>> +
>>>>>>>>> +    while (1) {
>>>>>>>>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>>>>>>>>> +        partitions = dict_get_list(&img->properties, prop);
>>>>>>>>> +        if (!partitions)
>>>>>>>>> +            break;
>>>>>>>>> +
>>>>>>>>> +        num = 0;
>>>>>>>>> +        LIST_FOREACH(partition, partitions, next) {
>>>>>>>>> +            if (num >= 2) {
>>>>>>>>> +                ERROR("Too many partition (%s)", prop);
>>>>>>>>> +                goto handler_exit;
>>>>>>>>> +            }
>>>>>>>>> +
>>>>>>>>> +            name[num] = partition->value;
>>>>>>>>> +            num++;
>>>>>>>>> +        }
>>>>>>>>> +
>>>>>>>>> +        if (num != 2) {
>>>>>>>>> +            ERROR("Invalid number (%d) of partition (%s)", 
>>>>>>>>> num, prop);
>>>>>>>>> +            goto handler_exit;
>>>>>>>>> +        }
>>>>>>>>> +
>>>>>>>>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>>>>>>>>> +
>>>>>>>>> +        ret = diskpart_swap_partition(tb, createtable, 
>>>>>>>>> name[0], name[1]);
>>>>>>>>> +        if (ret) {
>>>>>>>>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>>>>>>>>> +            break;
>>>>>>>>> +        }
>>>>>>>>> +
>>>>>>>>> +        count++;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    /* Reload table for parent */
>>>>>>>>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>>>>>>>>> +    if (ret) {
>>>>>>>>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>>>>>>>>> +        goto handler_exit;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    /* Reload table for child */
>>>>>>>>> +    if (IS_HYBRID(cxt)) {
>>>>>>>>> +        ret = diskpart_reload_table(cxt, tb->child);
>>>>>>>>> +        if (ret) {
>>>>>>>>> +            ERROR("Can't reload table for child (err = %d)", 
>>>>>>>>> ret);
>>>>>>>>> +            goto handler_exit;
>>>>>>>>> +        }
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    /* Write table */
>>>>>>>>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>>>>>>>>> priv.noinuse);
>>>>>>>>> +    if (ret) {
>>>>>>>>> +        ERROR("Can't write table (err = %d)", ret);
>>>>>>>>> +        goto handler_exit;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +handler_exit:
>>>>>>>>> +    if (tb)
>>>>>>>>> +        diskpart_unref_table(tb);
>>>>>>>>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>>>>>>>>> +        if (fdisk_deassign_device(cxt, 0))
>>>>>>>>> +            WARN("Error deassign device %s", img->device);
>>>>>>>>> +
>>>>>>>>> +handler_release:
>>>>>>>>> +    if (cxt)
>>>>>>>>> +        diskpart_unref_context(cxt);
>>>>>>>>> +
>>>>>>>>> +    /*
>>>>>>>>> +     * Re-read the partition table to be sure that SWupdate 
>>>>>>>>> does not
>>>>>>>>> +     * try to acces the partitions before the kernel is ready
>>>>>>>>> +     */
>>>>>>>>> +    diskpart_reread_partition(img->device);
>>>>>>>>> +
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>   __attribute__((constructor))
>>>>>>>>>   void diskpart_handler(void)
>>>>>>>>>   {
>>>>>>>>>       register_handler("diskpart", diskpart,
>>>>>>>>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>>>>> +        register_handler("gptswap", gpt_swap_partition,
>>>>>>>>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>>>>>>>>   }
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>>
>>>>>
>>>>
>>>>
>>
>
Stefano Babic June 18, 2022, 8:45 a.m. UTC | #10
Hi Philippe,

On 05.04.22 15:17, Philippe Reynes wrote:
> Add a handler gptswap to swap gpt partition.
> This handler is a script that can be used to swap gpt partition
> names after all the image were flashed (only partition names are
> swapped). This script may be usefull for a dual bank strategy.
> 
> For example:
> scripts: (
> 	{
> 		type = "gptswap";
> 		device = "/dev/vdb";
> 		properties =
> 		{
> 			swap-0 = [ "u-boot-0" , "u-boot-1" ];
> 			swap-1 = [ "kernel-0" , "kernel-1" ];
> 		};
> 	},
> );
> 
> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
> ---
>   doc/source/handlers.rst     |  24 ++++
>   handlers/diskpart_handler.c | 244 ++++++++++++++++++++++++++++++++++++
>   2 files changed, 268 insertions(+)
> 
> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
> index 8d2aeea..025d100 100644
> --- a/doc/source/handlers.rst
> +++ b/doc/source/handlers.rst
> @@ -943,6 +943,30 @@ MBR Example:
>   	   }
>   	}
>   
> +gpt partition swap
> +..................
> +
> +There is a handler gptswap that allow to swap gpt partitions after all the images were flashed.
> +This handler only swap the name of the partition. It coud be usefull for a dual bank strategy.
> +This handler is a script for the point of view of swupdate, so the node that provide it should
> +be added in the section scripts.
> +
> +Simple example:
> +
> +::
> +
> +        scripts: (
> +                {
> +                        type = "gptswap";
> +                        device = "/dev/vdb";
> +                        properties =
> +                        {
> +                                swap-0 = [ "u-boot-0" , "u-boot-1" ];
> +                                swap-1 = [ "kernel-0" , "kernel-1" ];
> +                        };
> +                },
> +         );
> +
>   Diskformat Handler
>   ------------------
>   
> diff --git a/handlers/diskpart_handler.c b/handlers/diskpart_handler.c
> index 0af4e60..0ff5968 100644
> --- a/handlers/diskpart_handler.c
> +++ b/handlers/diskpart_handler.c
> @@ -14,8 +14,10 @@
>   #include <errno.h>
>   #include <ctype.h>
>   #include <sys/file.h>
> +#include <sys/ioctl.h>
>   #include <sys/types.h>
>   #include <libfdisk/libfdisk.h>
> +#include <linux/fs.h>
>   #include <fs_interface.h>
>   #include <uuid/uuid.h>
>   #include "swupdate.h"
> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct fdisk_context *cxt, struct diskpart_ta
>   	return ret;
>   }
>   
> +static struct fdisk_partition *
> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table *tb, char *name)
> +{
> +	struct fdisk_partition *pa = NULL;
> +	struct fdisk_partition *ipa = NULL;
> +	struct fdisk_iter *itr;
> +	const char *iname;
> +
> +	if (!tb || !name)
> +		goto out;
> +
> +	itr = fdisk_new_iter(FDISK_ITER_FORWARD);
> +
> +	while (!fdisk_table_next_partition(tb, itr, &ipa)) {
> +		iname = fdisk_partition_get_name(ipa);
> +		if (iname && !strcmp(iname, name)) {
> +			pa = ipa;
> +			break;
> +		}
> +	}
> +
> +	fdisk_free_iter(itr);
> +
> + out:
> +	return pa;
> +}
> +
> +static struct fdisk_partition *
> +diskpart_get_partition_by_name(struct diskpart_table *tb, char *name)
> +{
> +	struct fdisk_partition *pa = NULL;
> +
> +	if (!tb || !name)
> +		goto out;
> +
> +	if (tb->parent)
> +		pa = diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
> +
> + out:
> +	return pa;
> +}
> +
> +static int diskpart_swap_partition(struct diskpart_table *tb,
> +				   struct create_table *createtable,
> +				   char *name1, char *name2)
> +{
> +	struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
> +	int ret = -1;
> +
> +	pa1 = diskpart_get_partition_by_name(tb, name1);
> +	if (!pa1) {
> +		ERROR("Can't find partition %s", name1);
> +		goto out;
> +	}
> +
> +	pa2 = diskpart_get_partition_by_name(tb, name2);
> +	if (!pa2) {
> +		ERROR("Can't find partition %s", name2);
> +		goto out;
> +	}
> +
> +	ret = fdisk_partition_set_name(pa1, name2);
> +	if (ret)
> +		goto out;
> +	ret = fdisk_partition_set_name(pa2, name1);
> +	if (ret)
> +		goto out;
> +
> +	createtable->parent = true;
> +
> + out:
> +	return ret;
> +}
> +
>   static int diskpart_set_partition(struct fdisk_partition *pa,
>   				  struct partition_data *part,
>   				  unsigned long sector_size,
> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct fdisk_context *cxt)
>   	fdisk_unref_context(cxt);
>   }
>   
> +static int diskpart_reread_partition(char *device)
> +{
> +	int fd, ret = -1;
> +
> +	fd = open(device, O_RDONLY);
> +	if (fd < 0) {
> +		ERROR("Device %s can't be opened", device);
> +		goto out;
> +	}
> +
> +	ret = ioctl(fd, BLKRRPART, NULL);
> +	if (ret < 0) {
> +		ERROR("Scan cannot be done on device %s", device);
> +		goto out_close;
> +	}
> +
> + out_close:
> +	close(fd);
> +
> + out:
> +	return ret;
> +}
> +
>   static int diskpart(struct img_type *img,
>   	void __attribute__ ((__unused__)) *data)
>   {
> @@ -1066,9 +1165,154 @@ handler_release:
>   	return ret;
>   }
>   
> +static int gpt_swap_partition(struct img_type *img, void *data)
> +{
> +	struct fdisk_context *cxt = NULL;
> +	struct diskpart_table *tb = NULL;
> +	int ret = 0;
> +	unsigned long hybrid = 0;
> +	struct hnd_priv priv =  {
> +		.labeltype = FDISK_DISKLABEL_DOS,
> +	};
> +	struct create_table *createtable = NULL;
> +	struct script_handler_data *script_data = data;
> +	char prop[SWUPDATE_GENERAL_STRING_SIZE];
> +	struct dict_list *partitions;
> +	struct dict_list_elem *partition;
> +	int num, count = 0;
> +	char *name[2];
> +
> +	if (!script_data)
> +		return -EINVAL;
> +
> +	/*
> +	 * Call only in case of postinstall
> +	 */
> +	if (script_data->scriptfn != POSTINSTALL)
> +		return 0;
> +
> +	LIST_INIT(&priv.listparts);
> +	if (!strlen(img->device)) {
> +		ERROR("Partition handler without setting the device");
> +		return -EINVAL;
> +	}
> +
> +	createtable = calloc(1, sizeof(*createtable));
> +	if (!createtable) {
> +		ERROR("OOM allocating createtable !");
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * Reads flags: nolock and noinuse
> +	 */
> +	priv.nolock = strtobool(dict_get_value(&img->properties, "nolock"));
> +	priv.noinuse = strtobool(dict_get_value(&img->properties, "noinuse"));
> +
> +        /*
> +         * Create context
> +         */
> +	ret = diskpart_assign_context(&cxt, img, priv, hybrid, createtable);
> +	if (ret == -EACCES)
> +		goto handler_release;
> +	else if (ret)
> +		goto handler_exit;
> +
> +	tb = calloc(1, sizeof(*tb));
> +	if (!tb) {
> +		ERROR("OOM loading partitions !");
> +		ret = -ENOMEM;
> +		goto handler_exit;
> +	}
> +
> +	/*
> +	 * Fill the in-memory partition table from the disk.
> +	 */
> +	ret = diskpart_get_partitions(cxt, tb, createtable);
> +	if (ret)
> +		goto handler_exit;
> +
> +	while (1) {
> +		snprintf(prop, sizeof(prop), "swap-%d", count);
> +		partitions = dict_get_list(&img->properties, prop);
> +		if (!partitions)
> +			break;
> +
> +		num = 0;
> +		LIST_FOREACH(partition, partitions, next) {
> +			if (num >= 2) {
> +				ERROR("Too many partition (%s)", prop);
> +				goto handler_exit;
> +			}
> +
> +			name[num] = partition->value;
> +			num++;
> +		}
> +
> +		if (num != 2) {
> +			ERROR("Invalid number (%d) of partition (%s)", num, prop);
> +			goto handler_exit;
> +		}
> +
> +		TRACE("swap partition %s <-> %s", name[0], name[1]);
> +
> +		ret = diskpart_swap_partition(tb, createtable, name[0], name[1]);
> +		if (ret) {
> +			ERROR("Can't swap %s and %s", name[0], name[1]);
> +			break;
> +		}
> +
> +		count++;
> +	}
> +
> +	/* Reload table for parent */
> +	ret = diskpart_reload_table(PARENT(cxt), tb->parent);
> +	if (ret) {
> +		ERROR("Can't reload table for parent (err = %d)", ret);
> +		goto handler_exit;
> +	}
> +
> +	/* Reload table for child */
> +	if (IS_HYBRID(cxt)) {
> +		ret = diskpart_reload_table(cxt, tb->child);
> +		if (ret) {
> +			ERROR("Can't reload table for child (err = %d)", ret);
> +			goto handler_exit;
> +		}
> +	}
> +
> +	/* Write table */
> +	ret = diskpart_write_table(cxt, createtable, priv.nolock, priv.noinuse);
> +	if (ret) {
> +		ERROR("Can't write table (err = %d)", ret);
> +		goto handler_exit;
> +	}
> +
> +handler_exit:
> +	if (tb)
> +		diskpart_unref_table(tb);
> +	if (cxt && fdisk_get_devfd(cxt) >= 0)
> +		if (fdisk_deassign_device(cxt, 0))
> +			WARN("Error deassign device %s", img->device);
> +
> +handler_release:
> +	if (cxt)
> +		diskpart_unref_context(cxt);
> +
> +	/*
> +	 * Re-read the partition table to be sure that SWupdate does not
> +	 * try to acces the partitions before the kernel is ready
> +	 */
> +	diskpart_reread_partition(img->device);
> +
> +	return ret;
> +}
> +
>   __attribute__((constructor))
>   void diskpart_handler(void)
>   {
>   	register_handler("diskpart", diskpart,
>   				PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
> +        register_handler("gptswap", gpt_swap_partition,
> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>   }


Acked-by: Stefano Babic <sbabic@denx.de>

Best regards,
Stefano Babic
Stefano Babic June 18, 2022, 8:46 a.m. UTC | #11
On 18.06.22 10:45, Stefano Babic wrote:
> Hi Philippe,
> 
> On 05.04.22 15:17, Philippe Reynes wrote:
>> Add a handler gptswap to swap gpt partition.
>> This handler is a script that can be used to swap gpt partition
>> names after all the image were flashed (only partition names are
>> swapped). This script may be usefull for a dual bank strategy.
>>
>> For example:
>> scripts: (
>>     {
>>         type = "gptswap";
>>         device = "/dev/vdb";
>>         properties =
>>         {
>>             swap-0 = [ "u-boot-0" , "u-boot-1" ];
>>             swap-1 = [ "kernel-0" , "kernel-1" ];
>>         };
>>     },
>> );
>>
>> Signed-off-by: Philippe Reynes <philippe.reynes@softathome.com>
>> ---
>>   doc/source/handlers.rst     |  24 ++++
>>   handlers/diskpart_handler.c | 244 ++++++++++++++++++++++++++++++++++++
>>   2 files changed, 268 insertions(+)
>>
>> diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
>> index 8d2aeea..025d100 100644
>> --- a/doc/source/handlers.rst
>> +++ b/doc/source/handlers.rst
>> @@ -943,6 +943,30 @@ MBR Example:
>>          }
>>       }
>> +gpt partition swap
>> +..................
>> +
>> +There is a handler gptswap that allow to swap gpt partitions after 
>> all the images were flashed.
>> +This handler only swap the name of the partition. It coud be usefull 
>> for a dual bank strategy.
>> +This handler is a script for the point of view of swupdate, so the 
>> node that provide it should
>> +be added in the section scripts.
>> +
>> +Simple example:
>> +
>> +::
>> +
>> +        scripts: (
>> +                {
>> +                        type = "gptswap";
>> +                        device = "/dev/vdb";
>> +                        properties =
>> +                        {
>> +                                swap-0 = [ "u-boot-0" , "u-boot-1" ];
>> +                                swap-1 = [ "kernel-0" , "kernel-1" ];
>> +                        };
>> +                },
>> +         );
>> +
>>   Diskformat Handler
>>   ------------------
>> diff --git a/handlers/diskpart_handler.c b/handlers/diskpart_handler.c
>> index 0af4e60..0ff5968 100644
>> --- a/handlers/diskpart_handler.c
>> +++ b/handlers/diskpart_handler.c
>> @@ -14,8 +14,10 @@
>>   #include <errno.h>
>>   #include <ctype.h>
>>   #include <sys/file.h>
>> +#include <sys/ioctl.h>
>>   #include <sys/types.h>
>>   #include <libfdisk/libfdisk.h>
>> +#include <linux/fs.h>
>>   #include <fs_interface.h>
>>   #include <uuid/uuid.h>
>>   #include "swupdate.h"
>> @@ -285,6 +287,80 @@ static int diskpart_get_partitions(struct 
>> fdisk_context *cxt, struct diskpart_ta
>>       return ret;
>>   }
>> +static struct fdisk_partition *
>> +diskpart_fdisk_table_get_partition_by_name(struct fdisk_table *tb, 
>> char *name)
>> +{
>> +    struct fdisk_partition *pa = NULL;
>> +    struct fdisk_partition *ipa = NULL;
>> +    struct fdisk_iter *itr;
>> +    const char *iname;
>> +
>> +    if (!tb || !name)
>> +        goto out;
>> +
>> +    itr = fdisk_new_iter(FDISK_ITER_FORWARD);
>> +
>> +    while (!fdisk_table_next_partition(tb, itr, &ipa)) {
>> +        iname = fdisk_partition_get_name(ipa);
>> +        if (iname && !strcmp(iname, name)) {
>> +            pa = ipa;
>> +            break;
>> +        }
>> +    }
>> +
>> +    fdisk_free_iter(itr);
>> +
>> + out:
>> +    return pa;
>> +}
>> +
>> +static struct fdisk_partition *
>> +diskpart_get_partition_by_name(struct diskpart_table *tb, char *name)
>> +{
>> +    struct fdisk_partition *pa = NULL;
>> +
>> +    if (!tb || !name)
>> +        goto out;
>> +
>> +    if (tb->parent)
>> +        pa = diskpart_fdisk_table_get_partition_by_name(tb->parent, 
>> name);
>> +
>> + out:
>> +    return pa;
>> +}
>> +
>> +static int diskpart_swap_partition(struct diskpart_table *tb,
>> +                   struct create_table *createtable,
>> +                   char *name1, char *name2)
>> +{
>> +    struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
>> +    int ret = -1;
>> +
>> +    pa1 = diskpart_get_partition_by_name(tb, name1);
>> +    if (!pa1) {
>> +        ERROR("Can't find partition %s", name1);
>> +        goto out;
>> +    }
>> +
>> +    pa2 = diskpart_get_partition_by_name(tb, name2);
>> +    if (!pa2) {
>> +        ERROR("Can't find partition %s", name2);
>> +        goto out;
>> +    }
>> +
>> +    ret = fdisk_partition_set_name(pa1, name2);
>> +    if (ret)
>> +        goto out;
>> +    ret = fdisk_partition_set_name(pa2, name1);
>> +    if (ret)
>> +        goto out;
>> +
>> +    createtable->parent = true;
>> +
>> + out:
>> +    return ret;
>> +}
>> +
>>   static int diskpart_set_partition(struct fdisk_partition *pa,
>>                     struct partition_data *part,
>>                     unsigned long sector_size,
>> @@ -781,6 +857,29 @@ static void diskpart_unref_context(struct 
>> fdisk_context *cxt)
>>       fdisk_unref_context(cxt);
>>   }
>> +static int diskpart_reread_partition(char *device)
>> +{
>> +    int fd, ret = -1;
>> +
>> +    fd = open(device, O_RDONLY);
>> +    if (fd < 0) {
>> +        ERROR("Device %s can't be opened", device);
>> +        goto out;
>> +    }
>> +
>> +    ret = ioctl(fd, BLKRRPART, NULL);
>> +    if (ret < 0) {
>> +        ERROR("Scan cannot be done on device %s", device);
>> +        goto out_close;
>> +    }
>> +
>> + out_close:
>> +    close(fd);
>> +
>> + out:
>> +    return ret;
>> +}
>> +
>>   static int diskpart(struct img_type *img,
>>       void __attribute__ ((__unused__)) *data)
>>   {
>> @@ -1066,9 +1165,154 @@ handler_release:
>>       return ret;
>>   }
>> +static int gpt_swap_partition(struct img_type *img, void *data)
>> +{
>> +    struct fdisk_context *cxt = NULL;
>> +    struct diskpart_table *tb = NULL;
>> +    int ret = 0;
>> +    unsigned long hybrid = 0;
>> +    struct hnd_priv priv =  {
>> +        .labeltype = FDISK_DISKLABEL_DOS,
>> +    };
>> +    struct create_table *createtable = NULL;
>> +    struct script_handler_data *script_data = data;
>> +    char prop[SWUPDATE_GENERAL_STRING_SIZE];
>> +    struct dict_list *partitions;
>> +    struct dict_list_elem *partition;
>> +    int num, count = 0;
>> +    char *name[2];
>> +
>> +    if (!script_data)
>> +        return -EINVAL;
>> +
>> +    /*
>> +     * Call only in case of postinstall
>> +     */
>> +    if (script_data->scriptfn != POSTINSTALL)
>> +        return 0;
>> +
>> +    LIST_INIT(&priv.listparts);
>> +    if (!strlen(img->device)) {
>> +        ERROR("Partition handler without setting the device");
>> +        return -EINVAL;
>> +    }
>> +
>> +    createtable = calloc(1, sizeof(*createtable));
>> +    if (!createtable) {
>> +        ERROR("OOM allocating createtable !");
>> +        return -ENOMEM;
>> +    }
>> +
>> +    /*
>> +     * Reads flags: nolock and noinuse
>> +     */
>> +    priv.nolock = strtobool(dict_get_value(&img->properties, "nolock"));
>> +    priv.noinuse = strtobool(dict_get_value(&img->properties, 
>> "noinuse"));
>> +
>> +        /*
>> +         * Create context
>> +         */
>> +    ret = diskpart_assign_context(&cxt, img, priv, hybrid, createtable);
>> +    if (ret == -EACCES)
>> +        goto handler_release;
>> +    else if (ret)
>> +        goto handler_exit;
>> +
>> +    tb = calloc(1, sizeof(*tb));
>> +    if (!tb) {
>> +        ERROR("OOM loading partitions !");
>> +        ret = -ENOMEM;
>> +        goto handler_exit;
>> +    }
>> +
>> +    /*
>> +     * Fill the in-memory partition table from the disk.
>> +     */
>> +    ret = diskpart_get_partitions(cxt, tb, createtable);
>> +    if (ret)
>> +        goto handler_exit;
>> +
>> +    while (1) {
>> +        snprintf(prop, sizeof(prop), "swap-%d", count);
>> +        partitions = dict_get_list(&img->properties, prop);
>> +        if (!partitions)
>> +            break;
>> +
>> +        num = 0;
>> +        LIST_FOREACH(partition, partitions, next) {
>> +            if (num >= 2) {
>> +                ERROR("Too many partition (%s)", prop);
>> +                goto handler_exit;
>> +            }
>> +
>> +            name[num] = partition->value;
>> +            num++;
>> +        }
>> +
>> +        if (num != 2) {
>> +            ERROR("Invalid number (%d) of partition (%s)", num, prop);
>> +            goto handler_exit;
>> +        }
>> +
>> +        TRACE("swap partition %s <-> %s", name[0], name[1]);
>> +
>> +        ret = diskpart_swap_partition(tb, createtable, name[0], 
>> name[1]);
>> +        if (ret) {
>> +            ERROR("Can't swap %s and %s", name[0], name[1]);
>> +            break;
>> +        }
>> +
>> +        count++;
>> +    }
>> +
>> +    /* Reload table for parent */
>> +    ret = diskpart_reload_table(PARENT(cxt), tb->parent);
>> +    if (ret) {
>> +        ERROR("Can't reload table for parent (err = %d)", ret);
>> +        goto handler_exit;
>> +    }
>> +
>> +    /* Reload table for child */
>> +    if (IS_HYBRID(cxt)) {
>> +        ret = diskpart_reload_table(cxt, tb->child);
>> +        if (ret) {
>> +            ERROR("Can't reload table for child (err = %d)", ret);
>> +            goto handler_exit;
>> +        }
>> +    }
>> +
>> +    /* Write table */
>> +    ret = diskpart_write_table(cxt, createtable, priv.nolock, 
>> priv.noinuse);
>> +    if (ret) {
>> +        ERROR("Can't write table (err = %d)", ret);
>> +        goto handler_exit;
>> +    }
>> +
>> +handler_exit:
>> +    if (tb)
>> +        diskpart_unref_table(tb);
>> +    if (cxt && fdisk_get_devfd(cxt) >= 0)
>> +        if (fdisk_deassign_device(cxt, 0))
>> +            WARN("Error deassign device %s", img->device);
>> +
>> +handler_release:
>> +    if (cxt)
>> +        diskpart_unref_context(cxt);
>> +
>> +    /*
>> +     * Re-read the partition table to be sure that SWupdate does not
>> +     * try to acces the partitions before the kernel is ready
>> +     */
>> +    diskpart_reread_partition(img->device);
>> +
>> +    return ret;
>> +}
>> +
>>   __attribute__((constructor))
>>   void diskpart_handler(void)
>>   {
>>       register_handler("diskpart", diskpart,
>>                   PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
>> +        register_handler("gptswap", gpt_swap_partition,
>> +                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
>>   }
> 
> 
> Acked-by: Stefano Babic <sbabic@denx.de>

I mean on V2, of course..

Stefano

> 
> Best regards,
> Stefano Babic
>
diff mbox series

Patch

diff --git a/doc/source/handlers.rst b/doc/source/handlers.rst
index 8d2aeea..025d100 100644
--- a/doc/source/handlers.rst
+++ b/doc/source/handlers.rst
@@ -943,6 +943,30 @@  MBR Example:
 	   }
 	}
 
+gpt partition swap
+..................
+
+There is a handler gptswap that allow to swap gpt partitions after all the images were flashed.
+This handler only swap the name of the partition. It coud be usefull for a dual bank strategy.
+This handler is a script for the point of view of swupdate, so the node that provide it should
+be added in the section scripts.
+
+Simple example:
+
+::
+
+        scripts: (
+                {
+                        type = "gptswap";
+                        device = "/dev/vdb";
+                        properties =
+                        {
+                                swap-0 = [ "u-boot-0" , "u-boot-1" ];
+                                swap-1 = [ "kernel-0" , "kernel-1" ];
+                        };
+                },
+         );
+
 Diskformat Handler
 ------------------
 
diff --git a/handlers/diskpart_handler.c b/handlers/diskpart_handler.c
index 0af4e60..0ff5968 100644
--- a/handlers/diskpart_handler.c
+++ b/handlers/diskpart_handler.c
@@ -14,8 +14,10 @@ 
 #include <errno.h>
 #include <ctype.h>
 #include <sys/file.h>
+#include <sys/ioctl.h>
 #include <sys/types.h>
 #include <libfdisk/libfdisk.h>
+#include <linux/fs.h>
 #include <fs_interface.h>
 #include <uuid/uuid.h>
 #include "swupdate.h"
@@ -285,6 +287,80 @@  static int diskpart_get_partitions(struct fdisk_context *cxt, struct diskpart_ta
 	return ret;
 }
 
+static struct fdisk_partition *
+diskpart_fdisk_table_get_partition_by_name(struct fdisk_table *tb, char *name)
+{
+	struct fdisk_partition *pa = NULL;
+	struct fdisk_partition *ipa = NULL;
+	struct fdisk_iter *itr;
+	const char *iname;
+
+	if (!tb || !name)
+		goto out;
+
+	itr = fdisk_new_iter(FDISK_ITER_FORWARD);
+
+	while (!fdisk_table_next_partition(tb, itr, &ipa)) {
+		iname = fdisk_partition_get_name(ipa);
+		if (iname && !strcmp(iname, name)) {
+			pa = ipa;
+			break;
+		}
+	}
+
+	fdisk_free_iter(itr);
+
+ out:
+	return pa;
+}
+
+static struct fdisk_partition *
+diskpart_get_partition_by_name(struct diskpart_table *tb, char *name)
+{
+	struct fdisk_partition *pa = NULL;
+
+	if (!tb || !name)
+		goto out;
+
+	if (tb->parent)
+		pa = diskpart_fdisk_table_get_partition_by_name(tb->parent, name);
+
+ out:
+	return pa;
+}
+
+static int diskpart_swap_partition(struct diskpart_table *tb,
+				   struct create_table *createtable,
+				   char *name1, char *name2)
+{
+	struct fdisk_partition *pa1 = NULL, *pa2 = NULL;
+	int ret = -1;
+
+	pa1 = diskpart_get_partition_by_name(tb, name1);
+	if (!pa1) {
+		ERROR("Can't find partition %s", name1);
+		goto out;
+	}
+
+	pa2 = diskpart_get_partition_by_name(tb, name2);
+	if (!pa2) {
+		ERROR("Can't find partition %s", name2);
+		goto out;
+	}
+
+	ret = fdisk_partition_set_name(pa1, name2);
+	if (ret)
+		goto out;
+	ret = fdisk_partition_set_name(pa2, name1);
+	if (ret)
+		goto out;
+
+	createtable->parent = true;
+
+ out:
+	return ret;
+}
+
 static int diskpart_set_partition(struct fdisk_partition *pa,
 				  struct partition_data *part,
 				  unsigned long sector_size,
@@ -781,6 +857,29 @@  static void diskpart_unref_context(struct fdisk_context *cxt)
 	fdisk_unref_context(cxt);
 }
 
+static int diskpart_reread_partition(char *device)
+{
+	int fd, ret = -1;
+
+	fd = open(device, O_RDONLY);
+	if (fd < 0) {
+		ERROR("Device %s can't be opened", device);
+		goto out;
+	}
+
+	ret = ioctl(fd, BLKRRPART, NULL);
+	if (ret < 0) {
+		ERROR("Scan cannot be done on device %s", device);
+		goto out_close;
+	}
+
+ out_close:
+	close(fd);
+
+ out:
+	return ret;
+}
+
 static int diskpart(struct img_type *img,
 	void __attribute__ ((__unused__)) *data)
 {
@@ -1066,9 +1165,154 @@  handler_release:
 	return ret;
 }
 
+static int gpt_swap_partition(struct img_type *img, void *data)
+{
+	struct fdisk_context *cxt = NULL;
+	struct diskpart_table *tb = NULL;
+	int ret = 0;
+	unsigned long hybrid = 0;
+	struct hnd_priv priv =  {
+		.labeltype = FDISK_DISKLABEL_DOS,
+	};
+	struct create_table *createtable = NULL;
+	struct script_handler_data *script_data = data;
+	char prop[SWUPDATE_GENERAL_STRING_SIZE];
+	struct dict_list *partitions;
+	struct dict_list_elem *partition;
+	int num, count = 0;
+	char *name[2];
+
+	if (!script_data)
+		return -EINVAL;
+
+	/*
+	 * Call only in case of postinstall
+	 */
+	if (script_data->scriptfn != POSTINSTALL)
+		return 0;
+
+	LIST_INIT(&priv.listparts);
+	if (!strlen(img->device)) {
+		ERROR("Partition handler without setting the device");
+		return -EINVAL;
+	}
+
+	createtable = calloc(1, sizeof(*createtable));
+	if (!createtable) {
+		ERROR("OOM allocating createtable !");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Reads flags: nolock and noinuse
+	 */
+	priv.nolock = strtobool(dict_get_value(&img->properties, "nolock"));
+	priv.noinuse = strtobool(dict_get_value(&img->properties, "noinuse"));
+
+        /*
+         * Create context
+         */
+	ret = diskpart_assign_context(&cxt, img, priv, hybrid, createtable);
+	if (ret == -EACCES)
+		goto handler_release;
+	else if (ret)
+		goto handler_exit;
+
+	tb = calloc(1, sizeof(*tb));
+	if (!tb) {
+		ERROR("OOM loading partitions !");
+		ret = -ENOMEM;
+		goto handler_exit;
+	}
+
+	/*
+	 * Fill the in-memory partition table from the disk.
+	 */
+	ret = diskpart_get_partitions(cxt, tb, createtable);
+	if (ret)
+		goto handler_exit;
+
+	while (1) {
+		snprintf(prop, sizeof(prop), "swap-%d", count);
+		partitions = dict_get_list(&img->properties, prop);
+		if (!partitions)
+			break;
+
+		num = 0;
+		LIST_FOREACH(partition, partitions, next) {
+			if (num >= 2) {
+				ERROR("Too many partition (%s)", prop);
+				goto handler_exit;
+			}
+
+			name[num] = partition->value;
+			num++;
+		}
+
+		if (num != 2) {
+			ERROR("Invalid number (%d) of partition (%s)", num, prop);
+			goto handler_exit;
+		}
+
+		TRACE("swap partition %s <-> %s", name[0], name[1]);
+
+		ret = diskpart_swap_partition(tb, createtable, name[0], name[1]);
+		if (ret) {
+			ERROR("Can't swap %s and %s", name[0], name[1]);
+			break;
+		}
+
+		count++;
+	}
+
+	/* Reload table for parent */
+	ret = diskpart_reload_table(PARENT(cxt), tb->parent);
+	if (ret) {
+		ERROR("Can't reload table for parent (err = %d)", ret);
+		goto handler_exit;
+	}
+
+	/* Reload table for child */
+	if (IS_HYBRID(cxt)) {
+		ret = diskpart_reload_table(cxt, tb->child);
+		if (ret) {
+			ERROR("Can't reload table for child (err = %d)", ret);
+			goto handler_exit;
+		}
+	}
+
+	/* Write table */
+	ret = diskpart_write_table(cxt, createtable, priv.nolock, priv.noinuse);
+	if (ret) {
+		ERROR("Can't write table (err = %d)", ret);
+		goto handler_exit;
+	}
+
+handler_exit:
+	if (tb)
+		diskpart_unref_table(tb);
+	if (cxt && fdisk_get_devfd(cxt) >= 0)
+		if (fdisk_deassign_device(cxt, 0))
+			WARN("Error deassign device %s", img->device);
+
+handler_release:
+	if (cxt)
+		diskpart_unref_context(cxt);
+
+	/*
+	 * Re-read the partition table to be sure that SWupdate does not
+	 * try to acces the partitions before the kernel is ready
+	 */
+	diskpart_reread_partition(img->device);
+
+	return ret;
+}
+
 __attribute__((constructor))
 void diskpart_handler(void)
 {
 	register_handler("diskpart", diskpart,
 				PARTITION_HANDLER | NO_DATA_HANDLER, NULL);
+        register_handler("gptswap", gpt_swap_partition,
+                         SCRIPT_HANDLER | NO_DATA_HANDLER, NULL);
 }