diff mbox

[1/7] Introduce strtosz() library function to convert a string to a byte count.

Message ID 1286529360-5715-2-git-send-email-Jes.Sorensen@redhat.com
State New
Headers show

Commit Message

Jes Sorensen Oct. 8, 2010, 9:15 a.m. UTC
From: Jes Sorensen <Jes.Sorensen@redhat.com>

strtosz() returns -1 on error.

v2 renamed from strtobytes() to strtosz() as suggested by Markus.

Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
---
 cutils.c      |   39 +++++++++++++++++++++++++++++++++++++++
 qemu-common.h |    1 +
 vl.c          |   31 ++++++++++---------------------
 3 files changed, 50 insertions(+), 21 deletions(-)

Comments

Markus Armbruster Oct. 11, 2010, 8:51 a.m. UTC | #1
Jes.Sorensen@redhat.com writes:

> From: Jes Sorensen <Jes.Sorensen@redhat.com>
>
> strtosz() returns -1 on error.
>
> v2 renamed from strtobytes() to strtosz() as suggested by Markus.
>
> Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
> ---
>  cutils.c      |   39 +++++++++++++++++++++++++++++++++++++++
>  qemu-common.h |    1 +
>  vl.c          |   31 ++++++++++---------------------
>  3 files changed, 50 insertions(+), 21 deletions(-)
>
> diff --git a/cutils.c b/cutils.c
> index 5883737..ee591c5 100644
> --- a/cutils.c
> +++ b/cutils.c
> @@ -283,3 +283,42 @@ int fcntl_setfl(int fd, int flag)
>  }
>  #endif
>  
> +/*
> + * Convert string to bytes, allowing either K/k for KB, M/m for MB,
> + * G/g for GB or T/t for TB. Default without any postfix is MB.
> + * End pointer will be returned in *end, if end is valid.
> + * Return -1 on error.
> + */
> +ssize_t strtosz(const char *nptr, char **end)
> +{
> +    int64_t value;

long long, please, because that's what strtoll() returns.

> +    char *endptr;
> +
> +    value = strtoll(nptr, &endptr, 0);
> +    switch (*endptr++) {
> +    case 'K':
> +    case 'k':
> +        value <<= 10;
> +        break;
> +    case 0:
> +    case 'M':
> +    case 'm':
> +        value <<= 20;
> +        break;
> +    case 'G':
> +    case 'g':
> +        value <<= 30;
> +        break;
> +    case 'T':
> +    case 't':
> +        value <<= 40;
> +        break;
> +    default:
> +        value = -1;
> +    }
> +
> +    if (end)
> +        *end = endptr;
> +
> +    return value;

Casts value to ssize_t, which might truncate.

> +}

Sloppy use of strtoll().

Both tolerable as long as the patch doesn't make things worse.  Let's
see:

> diff --git a/qemu-common.h b/qemu-common.h
> index 81aafa0..0a062d4 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -153,6 +153,7 @@ time_t mktimegm(struct tm *tm);
>  int qemu_fls(int i);
>  int qemu_fdatasync(int fd);
>  int fcntl_setfl(int fd, int flag);
> +ssize_t strtosz(const char *nptr, char **end);
>  
>  /* path.c */
>  void init_paths(const char *prefix);
> diff --git a/vl.c b/vl.c
> index df414ef..6043fa2 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -734,16 +734,13 @@ static void numa_add(const char *optarg)
>          if (get_param_value(option, 128, "mem", optarg) == 0) {
>              node_mem[nodenr] = 0;
>          } else {
> -            value = strtoull(option, &endptr, 0);
> -            switch (*endptr) {
> -            case 0: case 'M': case 'm':
> -                value <<= 20;
> -                break;
> -            case 'G': case 'g':
> -                value <<= 30;
> -                break;
> +            ssize_t sval;
> +            sval = strtosz(option, NULL);
> +            if (sval < 0) {
> +                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
> +                exit(1);

                        Before                          After
Invalid number          silently interpreted as zero    no change
Overflow                silently capped to ULLONG_MAX   LLONG_MAX, then
                                                        trunc ssize_t
Invalid size suffix     silently ignored                rejected

>              }
> -            node_mem[nodenr] = value;
> +            node_mem[nodenr] = sval;
>          }
>          if (get_param_value(option, 128, "cpus", optarg) == 0) {
>              node_cpumask[nodenr] = 0;
> @@ -2163,18 +2160,10 @@ int main(int argc, char **argv, char **envp)
>                  exit(0);
>                  break;
>              case QEMU_OPTION_m: {
> -                uint64_t value;
> -                char *ptr;
> +                ssize_t value;
>  
> -                value = strtoul(optarg, &ptr, 10);
> -                switch (*ptr) {
> -                case 0: case 'M': case 'm':
> -                    value <<= 20;
> -                    break;
> -                case 'G': case 'g':
> -                    value <<= 30;
> -                    break;
> -                default:
> +                value = strtosz(optarg, NULL);
> +                if (value < 0) {
>                      fprintf(stderr, "qemu: invalid ram size: %s\n", optarg);
>                      exit(1);
>                  }

                        Before                          After
Invalid number          silently interpreted as zero    no change
Overflow                silently capped to ULLONG_MAX   LLONG_MAX, then
                                                        trunc ssize_t
Invalid size suffix     rejected                        no change

A bit more context:


                   /* On 32-bit hosts, QEMU is limited by virtual address space */
                   if (value > (2047 << 20) && HOST_LONG_BITS == 32) {
                       fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n");
                       exit(1);
                   }
                   if (value != (uint64_t)(ram_addr_t)value) {
                       fprintf(stderr, "qemu: ram size too large\n");
                       exit(1);
                   }
                   ram_size = value;
                   break;

I'm afraid you break both conditionals for 32 bit hosts.

On such hosts, ssize_t is 32 bits wide.  strtosz() parses 64 bits
internally, but truncates to 32 bits silently.

The old code reliably rejects values larger than 2047MiB.  Your
truncation can change a value exceeding the limit to one within the
limit.  First conditional becomes unreliable.

The second conditional becomes useless: it sign-extends a non-negative
32 bit integer value to 64 bit, then truncates back, and checks the
value stays the same.  It trivially does.


I strongly recommend to make strtosz() sane from the start, not in a
later patch: proper error checking, including proper handling of
overflow.

Perhaps squashing 1-3/7 would get us there, or at least closer.
Jes Sorensen Oct. 11, 2010, 12:45 p.m. UTC | #2
On 10/11/10 10:51, Markus Armbruster wrote:
> Jes.Sorensen@redhat.com writes:
>> +/*
>> + * Convert string to bytes, allowing either K/k for KB, M/m for MB,
>> + * G/g for GB or T/t for TB. Default without any postfix is MB.
>> + * End pointer will be returned in *end, if end is valid.
>> + * Return -1 on error.
>> + */
>> +ssize_t strtosz(const char *nptr, char **end)
>> +{
>> +    int64_t value;
> 
> long long, please, because that's what strtoll() returns.

I merged patches 1-3 as you suggested, so comments based on the combined
patch.

This is longer an issue as it is switched to strtod().

>> +    char *endptr;
>> +
>> +    value = strtoll(nptr, &endptr, 0);
>> +    switch (*endptr++) {
>> +    case 'K':
>> +    case 'k':
>> +        value <<= 10;
>> +        break;
>> +    case 0:
>> +    case 'M':
>> +    case 'm':
>> +        value <<= 20;
>> +        break;
>> +    case 'G':
>> +    case 'g':
>> +        value <<= 30;
>> +        break;
>> +    case 'T':
>> +    case 't':
>> +        value <<= 40;
>> +        break;
>> +    default:
>> +        value = -1;
>> +    }
>> +
>> +    if (end)
>> +        *end = endptr;
>> +
>> +    return value;
> 
> Casts value to ssize_t, which might truncate.

The new patch does:

    int64_t tmpval;
    tmpval = (val * mul);
    if (tmpval >= ~(size_t)0)
        goto fail;

so anything that is out of bounds is checked and caught before returning
a possibly truncated valued.

> Sloppy use of strtoll().    tmpval = (val * mul);
    if (tmpval >= ~(size_t)0)
        goto fail;
> 
> Both tolerable as long as the patch doesn't make things worse.  Let's
> see:
> 
>> diff --git a/qemu-common.h b/qemu-common.h
>> index 81aafa0..0a062d4 100644
>> --- a/qemu-common.h
>> +++ b/qemu-common.h
>> @@ -153,6 +153,7 @@ time_t mktimegm(struct tm *tm);
>>  int qemu_fls(int i);
>>  int qemu_fdatasync(int fd);
>>  int fcntl_setfl(int fd, int flag);
>> +ssize_t strtosz(const char *nptr, char **end);
>>  
>>  /* path.c */
>>  void init_paths(const char *prefix);
>> diff --git a/vl.c b/vl.c
>> index df414ef..6043fa2 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -734,16 +734,13 @@ static void numa_add(const char *optarg)
>>          if (get_param_value(option, 128, "mem", optarg) == 0) {
>>              node_mem[nodenr] = 0;
>>          } else {
>> -            value = strtoull(option, &endptr, 0);
>> -            switch (*endptr) {
>> -            case 0: case 'M': case 'm':
>> -                value <<= 20;
>> -                break;
>> -            case 'G': case 'g':
>> -                value <<= 30;
>> -                break;
>> +            ssize_t sval;
>> +            sval = strtosz(option, NULL);
>> +            if (sval < 0) {
>> +                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
>> +                exit(1);
> 
>                         Before                          After
> Invalid number          silently interpreted as zero    no change
> Overflow                silently capped to ULLONG_MAX   LLONG_MAX, then
>                                                         trunc ssize_t
> Invalid size suffix     silently ignored                rejected

What do you mean by invalid number here?

LLONG_MAX seems a reasonable limit, ie. 31 bit on 32 bit hosts or 63
bits on 64 bit hosts. strtosz() is meant to return a size, so IMHO thats
a fair limitation. We could use size_t instead of ssize_t but it would
require ugly casts in any function calling the function to test for error.

>                         Before                          After
> Invalid number          silently interpreted as zero    no change
> Overflow                silently capped to ULLONG_MAX   LLONG_MAX, then
>                                                         trunc ssize_t
> Invalid size suffix     rejected                        no change
> 
> A bit more context:
> 
> 
>                    /* On 32-bit hosts, QEMU is limited by virtual address space */
>                    if (value > (2047 << 20) && HOST_LONG_BITS == 32) {
>                        fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n");
>                        exit(1);
>                    }
>                    if (value != (uint64_t)(ram_addr_t)value) {
>                        fprintf(stderr, "qemu: ram size too large\n");
>                        exit(1);
>                    }
>                    ram_size = value;
>                    break;
> 
> I'm afraid you break both conditionals for 32 bit hosts.
> 
> On such hosts, ssize_t is 32 bits wide.  strtosz() parses 64 bits
> internally, but truncates to 32 bits silently.

I believe the combined patch is taking care of this fine with the
>= ~(size_t)0 comparison. If the value is above that, it returns an
error. For 32 bit hosts that means we should be able to specify 2047MB
RAM fine.

The only place where I see the latter being a potential problem is on
P64 systems such as win64, since ram_addr_t is defined to be unsigned
long, but afaik we don't support win64 anyway. On both 32 bit and LP64
systems sizeof(ram_addr_t) == sizeof(ssize_t), so it should be fine.


> The old code reliably rejects values larger than 2047MiB.  Your
> truncation can change a value exceeding the limit to one within the
> limit.  First conditional becomes unreliable.
> 
> The second conditional becomes useless: it sign-extends a non-negative
> 32 bit integer value to 64 bit, then truncates back, and checks the
> value stays the same.  It trivially does.
> 
> I strongly recommend to make strtosz() sane from the start, not in a
> later patch: proper error checking, including proper handling of
> overflow.
> 
> Perhaps squashing 1-3/7 would get us there, or at least closer.

I have squashed 1-3, but I don't think 7 should be squashed. Adding a
new feature and taking away the old one in the same patch isn't right IMHO.

Cheers,
Jes
Markus Armbruster Oct. 11, 2010, 2:39 p.m. UTC | #3
Jes Sorensen <Jes.Sorensen@redhat.com> writes:

> On 10/11/10 10:51, Markus Armbruster wrote:
>> Jes.Sorensen@redhat.com writes:
>>> +/*
>>> + * Convert string to bytes, allowing either K/k for KB, M/m for MB,
>>> + * G/g for GB or T/t for TB. Default without any postfix is MB.
>>> + * End pointer will be returned in *end, if end is valid.
>>> + * Return -1 on error.
>>> + */
>>> +ssize_t strtosz(const char *nptr, char **end)
>>> +{
>>> +    int64_t value;
>> 
>> long long, please, because that's what strtoll() returns.
>
> I merged patches 1-3 as you suggested, so comments based on the combined
> patch.
>
> This is longer an issue as it is switched to strtod().

Okay, I'll review it.

>>> +    char *endptr;
>>> +
>>> +    value = strtoll(nptr, &endptr, 0);
>>> +    switch (*endptr++) {
>>> +    case 'K':
>>> +    case 'k':
>>> +        value <<= 10;
>>> +        break;
>>> +    case 0:
>>> +    case 'M':
>>> +    case 'm':
>>> +        value <<= 20;
>>> +        break;
>>> +    case 'G':
>>> +    case 'g':
>>> +        value <<= 30;
>>> +        break;
>>> +    case 'T':
>>> +    case 't':
>>> +        value <<= 40;
>>> +        break;
>>> +    default:
>>> +        value = -1;
>>> +    }
>>> +
>>> +    if (end)
>>> +        *end = endptr;
>>> +
>>> +    return value;
>> 
>> Casts value to ssize_t, which might truncate.
>
> The new patch does:
>
>     int64_t tmpval;
>     tmpval = (val * mul);
>     if (tmpval >= ~(size_t)0)
>         goto fail;
>
> so anything that is out of bounds is checked and caught before returning
> a possibly truncated valued.
>
>> Sloppy use of strtoll().    tmpval = (val * mul);
>     if (tmpval >= ~(size_t)0)
>         goto fail;
>> 
>> Both tolerable as long as the patch doesn't make things worse.  Let's
>> see:
>> 
>>> diff --git a/qemu-common.h b/qemu-common.h
>>> index 81aafa0..0a062d4 100644
>>> --- a/qemu-common.h
>>> +++ b/qemu-common.h
>>> @@ -153,6 +153,7 @@ time_t mktimegm(struct tm *tm);
>>>  int qemu_fls(int i);
>>>  int qemu_fdatasync(int fd);
>>>  int fcntl_setfl(int fd, int flag);
>>> +ssize_t strtosz(const char *nptr, char **end);
>>>  
>>>  /* path.c */
>>>  void init_paths(const char *prefix);
>>> diff --git a/vl.c b/vl.c
>>> index df414ef..6043fa2 100644
>>> --- a/vl.c
>>> +++ b/vl.c
>>> @@ -734,16 +734,13 @@ static void numa_add(const char *optarg)
>>>          if (get_param_value(option, 128, "mem", optarg) == 0) {
>>>              node_mem[nodenr] = 0;
>>>          } else {
>>> -            value = strtoull(option, &endptr, 0);
>>> -            switch (*endptr) {
>>> -            case 0: case 'M': case 'm':
>>> -                value <<= 20;
>>> -                break;
>>> -            case 'G': case 'g':
>>> -                value <<= 30;
>>> -                break;
>>> +            ssize_t sval;
>>> +            sval = strtosz(option, NULL);
>>> +            if (sval < 0) {
>>> +                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
>>> +                exit(1);
>> 
>>                         Before                          After
>> Invalid number          silently interpreted as zero    no change
>> Overflow                silently capped to ULLONG_MAX   LLONG_MAX, then
>>                                                         trunc ssize_t
>> Invalid size suffix     silently ignored                rejected
>
> What do you mean by invalid number here?

strtoul(nptr, &eptr, base) & friends skip whitespace, eat sign + digits,
stop at first unrecognized character.

What if they can't find any digits?  They store nptr in eptr and return
0.

> LLONG_MAX seems a reasonable limit, ie. 31 bit on 32 bit hosts or 63
> bits on 64 bit hosts. strtosz() is meant to return a size, so IMHO thats
> a fair limitation. We could use size_t instead of ssize_t but it would
> require ugly casts in any function calling the function to test for error.

64 bit hosts are fine; 2^63 should remain an acceptable implementation
limit for a while.

The use in main() is fine on 32 bit: the limit is 2047MiB, which fits
into a 32 bit ssize_t.  Wonder why the limit is 2047, not 2048MiB.  Oh
well.

As far as I can see, the use in numa_add() is also fine, because a
node's memory can't exceed total memory.

>>                         Before                          After
>> Invalid number          silently interpreted as zero    no change
>> Overflow                silently capped to ULLONG_MAX   LLONG_MAX, then
>>                                                         trunc ssize_t
>> Invalid size suffix     rejected                        no change
>> 
>> A bit more context:
>> 
>> 
>>                    /* On 32-bit hosts, QEMU is limited by virtual address space */
>>                    if (value > (2047 << 20) && HOST_LONG_BITS == 32) {
>>                        fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n");
>>                        exit(1);
>>                    }
>>                    if (value != (uint64_t)(ram_addr_t)value) {
>>                        fprintf(stderr, "qemu: ram size too large\n");
>>                        exit(1);
>>                    }
>>                    ram_size = value;
>>                    break;
>> 
>> I'm afraid you break both conditionals for 32 bit hosts.
>> 
>> On such hosts, ssize_t is 32 bits wide.  strtosz() parses 64 bits
>> internally, but truncates to 32 bits silently.
>
> I believe the combined patch is taking care of this fine with the
>>= ~(size_t)0 comparison. If the value is above that, it returns an
> error. For 32 bit hosts that means we should be able to specify 2047MB
> RAM fine.
>
> The only place where I see the latter being a potential problem is on
> P64 systems such as win64, since ram_addr_t is defined to be unsigned
> long, but afaik we don't support win64 anyway. On both 32 bit and LP64
> systems sizeof(ram_addr_t) == sizeof(ssize_t), so it should be fine.
>
>
>> The old code reliably rejects values larger than 2047MiB.  Your
>> truncation can change a value exceeding the limit to one within the
>> limit.  First conditional becomes unreliable.
>> 
>> The second conditional becomes useless: it sign-extends a non-negative
>> 32 bit integer value to 64 bit, then truncates back, and checks the
>> value stays the same.  It trivially does.
>> 
>> I strongly recommend to make strtosz() sane from the start, not in a
>> later patch: proper error checking, including proper handling of
>> overflow.
>> 
>> Perhaps squashing 1-3/7 would get us there, or at least closer.
>
> I have squashed 1-3, but I don't think 7 should be squashed. Adding a
> new feature and taking away the old one in the same patch isn't right IMHO.

Misunderstanding?  I suggested to squash 1-3 *of* 7, not (1-3 and 7) of 7.
diff mbox

Patch

diff --git a/cutils.c b/cutils.c
index 5883737..ee591c5 100644
--- a/cutils.c
+++ b/cutils.c
@@ -283,3 +283,42 @@  int fcntl_setfl(int fd, int flag)
 }
 #endif
 
+/*
+ * Convert string to bytes, allowing either K/k for KB, M/m for MB,
+ * G/g for GB or T/t for TB. Default without any postfix is MB.
+ * End pointer will be returned in *end, if end is valid.
+ * Return -1 on error.
+ */
+ssize_t strtosz(const char *nptr, char **end)
+{
+    int64_t value;
+    char *endptr;
+
+    value = strtoll(nptr, &endptr, 0);
+    switch (*endptr++) {
+    case 'K':
+    case 'k':
+        value <<= 10;
+        break;
+    case 0:
+    case 'M':
+    case 'm':
+        value <<= 20;
+        break;
+    case 'G':
+    case 'g':
+        value <<= 30;
+        break;
+    case 'T':
+    case 't':
+        value <<= 40;
+        break;
+    default:
+        value = -1;
+    }
+
+    if (end)
+        *end = endptr;
+
+    return value;
+}
diff --git a/qemu-common.h b/qemu-common.h
index 81aafa0..0a062d4 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -153,6 +153,7 @@  time_t mktimegm(struct tm *tm);
 int qemu_fls(int i);
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
+ssize_t strtosz(const char *nptr, char **end);
 
 /* path.c */
 void init_paths(const char *prefix);
diff --git a/vl.c b/vl.c
index df414ef..6043fa2 100644
--- a/vl.c
+++ b/vl.c
@@ -734,16 +734,13 @@  static void numa_add(const char *optarg)
         if (get_param_value(option, 128, "mem", optarg) == 0) {
             node_mem[nodenr] = 0;
         } else {
-            value = strtoull(option, &endptr, 0);
-            switch (*endptr) {
-            case 0: case 'M': case 'm':
-                value <<= 20;
-                break;
-            case 'G': case 'g':
-                value <<= 30;
-                break;
+            ssize_t sval;
+            sval = strtosz(option, NULL);
+            if (sval < 0) {
+                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
+                exit(1);
             }
-            node_mem[nodenr] = value;
+            node_mem[nodenr] = sval;
         }
         if (get_param_value(option, 128, "cpus", optarg) == 0) {
             node_cpumask[nodenr] = 0;
@@ -2163,18 +2160,10 @@  int main(int argc, char **argv, char **envp)
                 exit(0);
                 break;
             case QEMU_OPTION_m: {
-                uint64_t value;
-                char *ptr;
+                ssize_t value;
 
-                value = strtoul(optarg, &ptr, 10);
-                switch (*ptr) {
-                case 0: case 'M': case 'm':
-                    value <<= 20;
-                    break;
-                case 'G': case 'g':
-                    value <<= 30;
-                    break;
-                default:
+                value = strtosz(optarg, NULL);
+                if (value < 0) {
                     fprintf(stderr, "qemu: invalid ram size: %s\n", optarg);
                     exit(1);
                 }