diff mbox series

core/artifacts_version: compare oldstyle versions directly

Message ID 20211014061815.2009528-1-dominique.martinet@atmark-techno.com
State Changes Requested
Headers show
Series core/artifacts_version: compare oldstyle versions directly | expand

Commit Message

Dominique MARTINET Oct. 14, 2021, 6:18 a.m. UTC
version_to_number() has a number of issues:
 - versions with more than 4 elements had their trailing part ignored
(e.g. '1.2.3.4', '1.2.3.4.5', '1.2.3.4.12' are all equal)
 - version token bigger than 65535 were truncated down to 0xffff
(e.g. '1.65536' and '1.0' are equal. '65536' is smaller than '10')

workaround the issue by not converting to an intermediate u64.

This still fails to handle versions bigger than ULONG_MAX but at least
prints a TRACE if that case happens.

Signed-off-by: Dominique Martinet <dominique.martinet@atmark-techno.com>
---
 core/artifacts_versions.c | 73 ++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 35 deletions(-)

Comments

Stefano Babic Oct. 14, 2021, 7:36 a.m. UTC | #1
Hi Dominique,

On 14.10.21 08:18, Dominique Martinet wrote:
> version_to_number() has a number of issues:
>   - versions with more than 4 elements had their trailing part ignored
> (e.g. '1.2.3.4', '1.2.3.4.5', '1.2.3.4.12' are all equal)
>   - version token bigger than 65535 were truncated down to 0xffff
> (e.g. '1.65536' and '1.0' are equal. '65536' is smaller than '10')
> 

mmhhhh...the issues reported are really the conditions to use plain 
numbers as version, as it is written in description of the 
version_to_number() function:

- version must be in format x.y.z.t, just numbers, as 
major.minor.revision.build
- each field is a 16 bit number, that is between 0 and 65535

And of course, any attempt to break rule 1 or 2 leads to a wrong 
comparison. A 16 bit field for each subversion still seems very big, and 
well, having more as that suggests to switch to another schema.

When the above conversion is not enough, there is support for semantic 
version in SWUpdate, and alphanumeric versions can be used.

I am sure from projects that they rely on rule 1), and they use 
additional fields just as buildtime / counter, but the version is the 
same. That is, a 1.2.3.4, 1.2.3.4.5 and 1.2.3.4.12 are the same version 
and SWUpdate must skip (as it is currently doing) because it is not a 
new version.

I am quite surprise that the current schema is not enough, are you 
unsure you cannot use semantic version if 64 bit (sigh !) are not enough 
for you ?

> workaround the issue by not converting to an intermediate u64.
> 
> This still fails to handle versions bigger than ULONG_MAX but at least
> prints a TRACE if that case happens.
> 
> Signed-off-by: Dominique Martinet <dominique.martinet@atmark-techno.com>
> ---
>   core/artifacts_versions.c | 73 ++++++++++++++++++++-------------------
>   1 file changed, 38 insertions(+), 35 deletions(-)
> 
> diff --git a/core/artifacts_versions.c b/core/artifacts_versions.c
> index 433125862014..eb605bc403f1 100644
> --- a/core/artifacts_versions.c
> +++ b/core/artifacts_versions.c
> @@ -161,39 +161,52 @@ static bool is_oldstyle_version(const char* version_string)
>   }
>   
>   /*
> - * convert a version string into a number
> - * version string is in the format:
> + * compare version strings in the following format:
>    *
>    * 	major.minor.revision.buildinfo
>    *
> - * but they do not need to have all fields.
> - * Also major.minor or major.minor.revision are allowed
> - * The conversion generates a 64 bit value that can be compared
> + * they do not need to have all fields.
>    */
> -static __u64 version_to_number(const char *version_string)
> +static int compare_oldstyle_version(const char *left_version,
> +				    const char *right_version)
>   {
> -	char **versions = NULL;
> -	char **ver;
> -	unsigned int count = 0;
> -	__u64 version = 0;
> +	char **left_versions = NULL, **right_versions = NULL;
> +	size_t i;
> +	int compare;
>   
> -	versions = string_split(version_string, '.');
> -	for (ver = versions; *ver != NULL; ver++, count ++) {
> -		if (count < 4) {
> -			unsigned long int fld = strtoul(*ver, NULL, 10);
> -			/* check for return of strtoul, mandatory */
> -			if (fld != ULONG_MAX) {
> -				fld &= 0xffff;
> -				version = (version << 16) | fld;
> -			}
> +	left_versions = string_split(left_version, '.');
> +	right_versions = string_split(right_version, '.');
> +	for (i = 0; left_versions[i] != NULL && right_versions[i] != NULL; i++) {
> +		unsigned long int left_ver = strtoul(left_versions[i], NULL, 10);
> +		unsigned long int right_ver = strtoul(right_versions[i], NULL, 10);
> +		if (left_ver == ULONG_MAX)
> +			TRACE("version overflow: %s\n", left_versions[i]);
> +		if (right_ver == ULONG_MAX)
> +			TRACE("version overflow: %s\n", right_versions[i]);
> +
> +		if (left_ver < right_ver) {
> +			compare = -1;
> +			goto out;
> +		} else if (left_ver > right_ver) {
> +			compare = 1;
> +			goto out;
>   		}
> -		free(*ver);
>   	}
> -	if ((count < 4) && (count > 0))
> -		version <<= 16 * (4 - count);
> -	free(versions);
> +	if (left_versions[i] != NULL)
> +		compare = 1;
> +	else if (right_versions[i] != NULL)
> +		compare = -1;
> +	else
> +		compare = 0;
> +out:
> +	for (i = 0; left_versions[i] != NULL; i++)
> +		free(left_versions[i]);
> +	free(left_versions);
> +	for (i = 0; right_versions[i] != NULL; i++)
> +		free(right_versions[i]);
> +	free(right_versions);
>   
> -	return version;
> +	return compare;
>   }
>   
>   /*
> @@ -211,18 +224,8 @@ int compare_versions(const char* left_version, const char* right_version)
>   {
>   	if (is_oldstyle_version(left_version) && is_oldstyle_version(right_version))
>   	{
> -		__u64 left_u64 = version_to_number(left_version);
> -		__u64 right_u64 = version_to_number(right_version);
> -
>   		DEBUG("Comparing old-style versions '%s' <-> '%s'", left_version, right_version);
> -		TRACE("Parsed: '%llu' <-> '%llu'", left_u64, right_u64);
> -
> -		if (left_u64 < right_u64)
> -			return -1;
> -		else if (left_u64 > right_u64)
> -			return 1;
> -		else
> -			return 0;
> +		return compare_oldstyle_version(left_version, right_version);
>   	}
>   	else
>   	{
> 

Best regards,
Stefano Babic
Dominique MARTINET Oct. 14, 2021, 7:50 a.m. UTC | #2
Stefano Babic wrote on Thu, Oct 14, 2021 at 09:36:10AM +0200:
> mmhhhh...the issues reported are really the conditions to use plain numbers
> as version, as it is written in description of the version_to_number()
> function:
> 
> - version must be in format x.y.z.t, just numbers, as
> major.minor.revision.build
> - each field is a 16 bit number, that is between 0 and 65535
> 
> And of course, any attempt to break rule 1 or 2 leads to a wrong comparison.
> A 16 bit field for each subversion still seems very big, and well, having
> more as that suggests to switch to another schema.
>
> When the above conversion is not enough, there is support for semantic
> version in SWUpdate, and alphanumeric versions can be used.
> 
> I am sure from projects that they rely on rule 1), and they use additional
> fields just as buildtime / counter, but the version is the same. That is, a
> 1.2.3.4, 1.2.3.4.5 and 1.2.3.4.12 are the same version and SWUpdate must
> skip (as it is currently doing) because it is not a new version.
> 
> I am quite surprise that the current schema is not enough, are you unsure
> you cannot use semantic version if 64 bit (sigh !) are not enough for you ?

My problem is that I am not the final user of swupdate for our boards,
but our customers will generate swu files to install.

I have added restrictions to the swu generation tool to limit to four
fields or semver, but I have seen people use larger than 16 bit versions
(for example using current date as a version, e.g. 211014 or 20211014)...
Which look like it works in practice, but will eventually break down the
line so I wanted to make sure swupdate allows this.

Alternatively trying to parse semver first and falling back to old
versions would work for me as well, is that better?
It would make up to 3 fields accept up to 2^31 numbers, which is enough
for dates I have seen (haven't seen anyone pile up hour minutes second
as well... yet)

allowing more than 4 numbers just came free with the solution I had in
mind, but I am not fussy about this point even if if an error message or
warning probably ought to be displayed in case something passes my
filter during image generation.
Stefano Babic Oct. 14, 2021, 8:27 a.m. UTC | #3
Hi Dominique,

On 14.10.21 09:50, Dominique Martinet wrote:
> Stefano Babic wrote on Thu, Oct 14, 2021 at 09:36:10AM +0200:
>> mmhhhh...the issues reported are really the conditions to use plain numbers
>> as version, as it is written in description of the version_to_number()
>> function:
>>
>> - version must be in format x.y.z.t, just numbers, as
>> major.minor.revision.build
>> - each field is a 16 bit number, that is between 0 and 65535
>>
>> And of course, any attempt to break rule 1 or 2 leads to a wrong comparison.
>> A 16 bit field for each subversion still seems very big, and well, having
>> more as that suggests to switch to another schema.
>>
>> When the above conversion is not enough, there is support for semantic
>> version in SWUpdate, and alphanumeric versions can be used.
>>
>> I am sure from projects that they rely on rule 1), and they use additional
>> fields just as buildtime / counter, but the version is the same. That is, a
>> 1.2.3.4, 1.2.3.4.5 and 1.2.3.4.12 are the same version and SWUpdate must
>> skip (as it is currently doing) because it is not a new version.
>>
>> I am quite surprise that the current schema is not enough, are you unsure
>> you cannot use semantic version if 64 bit (sigh !) are not enough for you ?
> 
> My problem is that I am not the final user of swupdate for our boards,
> but our customers will generate swu files to install.
> 

Ok, and customers are very creative....

> I have added restrictions to the swu generation tool to limit to four
> fields or semver, but I have seen people use larger than 16 bit versions
> (for example using current date as a version, e.g. 211014 or 20211014)...
> Which look like it works in practice, but will eventually break down the
> line so I wanted to make sure swupdate allows this.
> 
> Alternatively trying to parse semver first and falling back to old
> versions would work for me as well, is that better?

I guess that all versions are treated as semver - this is the reason why 
old versions are checked first.

But you can extend the is_oldstyle_version() and version_to_number() to 
perform more checks. is_old_style() could call version_to_number(), and 
the last one returns ULONG_MAX (as 64 bit number) in case of errors, 
that should not accepted at all, for example if a field is > 65535. Then 
you switch to semver in those cases.

> It would make up to 3 fields accept up to 2^31 numbers, which is enough
> for dates I have seen (haven't seen anyone pile up hour minutes second
> as well... yet)
> 
> allowing more than 4 numbers just came free with the solution I had in
> mind, but I am not fussy about this point even if if an error message or
> warning probably ought to be displayed in case something passes my
> filter during image generation.
> 

Best regards,
Stefano
Dominique MARTINET Oct. 14, 2021, 8:45 a.m. UTC | #4
Hi Stefano,

Stefano Babic wrote on Thu, Oct 14, 2021 at 10:27:10AM +0200:
> > I have added restrictions to the swu generation tool to limit to four
> > fields or semver, but I have seen people use larger than 16 bit versions
> > (for example using current date as a version, e.g. 211014 or 20211014)...
> > Which look like it works in practice, but will eventually break down the
> > line so I wanted to make sure swupdate allows this.
> > 
> > Alternatively trying to parse semver first and falling back to old
> > versions would work for me as well, is that better?
> 
> I guess that all versions are treated as semver - this is the reason why old
> versions are checked first.

hmm, it does have a few failures (for example if an invalid char like !
is present), but looking at it in more details it looks like it also
just ignores whatever comes after an eventual third dot but is
considered valid semver...

Ok, that was not a good idea, x.y.z.t must still work.

> But you can extend the is_oldstyle_version() and version_to_number() to
> perform more checks. is_old_style() could call version_to_number(), and the
> last one returns ULONG_MAX (as 64 bit number) in case of errors, that should
> not accepted at all, for example if a field is > 65535. Then you switch to
> semver in those cases.

Hm, sure, but in this case I do not see the problem in supporting bigger
values the way I did it?
Since it only touches values where odd things would happen the change of
behaviour should be minimal... Well I guess that can still break
something somewhere, even if I stop at 4 numbers.

I'll take a stab at changing is_oldstyle_version tomorrow, it can be
written like the semver_parse() success check.



BTW you made me check documentation as I did not recall seeing the
restriction before, it is clearly written for -N and -max-version
command line switches (doc/source/swupdate.rst):
> Version consists of either 4 numbers (major.minor.rev.build with each
> field in the range 0..65535) or it is a semantic version.

but the paragraph in sw-description.rst does not mention anything about
a maximum value: 
> In this case, version can be any of 2 formats. Either the version consists
> of *up to* 4 numbers separated by a dot, e.g. `<major>.<minor>.<rev>.<build>`,
> or it is a `semantic version <https://semver.org>`_.

I will send a second patch that fixes either depending on which
direction we take.
Stefano Babic Oct. 14, 2021, 11:28 a.m. UTC | #5
On 14.10.21 10:45, Dominique Martinet wrote:
> Hi Stefano,
> 
> Stefano Babic wrote on Thu, Oct 14, 2021 at 10:27:10AM +0200:
>>> I have added restrictions to the swu generation tool to limit to four
>>> fields or semver, but I have seen people use larger than 16 bit versions
>>> (for example using current date as a version, e.g. 211014 or 20211014)...
>>> Which look like it works in practice, but will eventually break down the
>>> line so I wanted to make sure swupdate allows this.
>>>
>>> Alternatively trying to parse semver first and falling back to old
>>> versions would work for me as well, is that better?
>>
>> I guess that all versions are treated as semver - this is the reason why old
>> versions are checked first.
> 
> hmm, it does have a few failures (for example if an invalid char like !
> is present), but looking at it in more details it looks like it also
> just ignores whatever comes after an eventual third dot but is
> considered valid semver...
> 
> Ok, that was not a good idea, x.y.z.t must still work.
> 
>> But you can extend the is_oldstyle_version() and version_to_number() to
>> perform more checks. is_old_style() could call version_to_number(), and the
>> last one returns ULONG_MAX (as 64 bit number) in case of errors, that should
>> not accepted at all, for example if a field is > 65535. Then you switch to
>> semver in those cases.
> 
> Hm, sure, but in this case I do not see the problem in supporting bigger
> values the way I did it?
> Since it only touches values where odd things would happen the change of
> behaviour should be minimal... Well I guess that can still break
> something somewhere, even if I stop at 4 numbers.
> 
> I'll take a stab at changing is_oldstyle_version tomorrow, it can be
> written like the semver_parse() success check.
> 
> 
> 
> BTW you made me check documentation as I did not recall seeing the
> restriction before, it is clearly written for -N and -max-version
> command line switches (doc/source/swupdate.rst):
>> Version consists of either 4 numbers (major.minor.rev.build with each
>> field in the range 0..65535) or it is a semantic version.
> 
> but the paragraph in sw-description.rst does not mention anything about
> a maximum value:

Agree, documentation is missing and should be improved.

>> In this case, version can be any of 2 formats. Either the version consists
>> of *up to* 4 numbers separated by a dot, e.g. `<major>.<minor>.<rev>.<build>`,
>> or it is a `semantic version <https://semver.org>`_.
> 
> I will send a second patch that fixes either depending on which
> direction we take.
> 

Regards,
Stefano
diff mbox series

Patch

diff --git a/core/artifacts_versions.c b/core/artifacts_versions.c
index 433125862014..eb605bc403f1 100644
--- a/core/artifacts_versions.c
+++ b/core/artifacts_versions.c
@@ -161,39 +161,52 @@  static bool is_oldstyle_version(const char* version_string)
 }
 
 /*
- * convert a version string into a number
- * version string is in the format:
+ * compare version strings in the following format:
  *
  * 	major.minor.revision.buildinfo
  *
- * but they do not need to have all fields.
- * Also major.minor or major.minor.revision are allowed
- * The conversion generates a 64 bit value that can be compared
+ * they do not need to have all fields.
  */
-static __u64 version_to_number(const char *version_string)
+static int compare_oldstyle_version(const char *left_version,
+				    const char *right_version)
 {
-	char **versions = NULL;
-	char **ver;
-	unsigned int count = 0;
-	__u64 version = 0;
+	char **left_versions = NULL, **right_versions = NULL;
+	size_t i;
+	int compare;
 
-	versions = string_split(version_string, '.');
-	for (ver = versions; *ver != NULL; ver++, count ++) {
-		if (count < 4) {
-			unsigned long int fld = strtoul(*ver, NULL, 10);
-			/* check for return of strtoul, mandatory */
-			if (fld != ULONG_MAX) {
-				fld &= 0xffff;
-				version = (version << 16) | fld;
-			}
+	left_versions = string_split(left_version, '.');
+	right_versions = string_split(right_version, '.');
+	for (i = 0; left_versions[i] != NULL && right_versions[i] != NULL; i++) {
+		unsigned long int left_ver = strtoul(left_versions[i], NULL, 10);
+		unsigned long int right_ver = strtoul(right_versions[i], NULL, 10);
+		if (left_ver == ULONG_MAX)
+			TRACE("version overflow: %s\n", left_versions[i]);
+		if (right_ver == ULONG_MAX)
+			TRACE("version overflow: %s\n", right_versions[i]);
+
+		if (left_ver < right_ver) {
+			compare = -1;
+			goto out;
+		} else if (left_ver > right_ver) {
+			compare = 1;
+			goto out;
 		}
-		free(*ver);
 	}
-	if ((count < 4) && (count > 0))
-		version <<= 16 * (4 - count);
-	free(versions);
+	if (left_versions[i] != NULL)
+		compare = 1;
+	else if (right_versions[i] != NULL)
+		compare = -1;
+	else
+		compare = 0;
+out:
+	for (i = 0; left_versions[i] != NULL; i++)
+		free(left_versions[i]);
+	free(left_versions);
+	for (i = 0; right_versions[i] != NULL; i++)
+		free(right_versions[i]);
+	free(right_versions);
 
-	return version;
+	return compare;
 }
 
 /*
@@ -211,18 +224,8 @@  int compare_versions(const char* left_version, const char* right_version)
 {
 	if (is_oldstyle_version(left_version) && is_oldstyle_version(right_version))
 	{
-		__u64 left_u64 = version_to_number(left_version);
-		__u64 right_u64 = version_to_number(right_version);
-
 		DEBUG("Comparing old-style versions '%s' <-> '%s'", left_version, right_version);
-		TRACE("Parsed: '%llu' <-> '%llu'", left_u64, right_u64);
-
-		if (left_u64 < right_u64)
-			return -1;
-		else if (left_u64 > right_u64)
-			return 1;
-		else
-			return 0;
+		return compare_oldstyle_version(left_version, right_version);
 	}
 	else
 	{