Patchwork [1/4] Introduce strtosz() library function to convert a string to a byte count.

login
register
mail settings
Submitter Jes Sorensen
Date Oct. 11, 2010, 12:54 p.m.
Message ID <1286801695-12876-2-git-send-email-Jes.Sorensen@redhat.com>
Download mbox | patch
Permalink /patch/67420/
State New
Headers show

Comments

Jes Sorensen - Oct. 11, 2010, 12:54 p.m.
From: Jes Sorensen <Jes.Sorensen@redhat.com>

strtosz() returns -1 on error. It now supports human unit formats in
eg. 1.0G, with better error handling.

Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
---
 cutils.c      |   61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-common.h |    1 +
 vl.c          |   31 +++++++++-------------------
 3 files changed, 72 insertions(+), 21 deletions(-)
Markus Armbruster - Oct. 11, 2010, 4:42 p.m.
Jes, I feel a bit bad finding still more faults, but here goes...

Jes.Sorensen@redhat.com writes:

> From: Jes Sorensen <Jes.Sorensen@redhat.com>
>
> strtosz() returns -1 on error. It now supports human unit formats in
> eg. 1.0G, with better error handling.
>
> Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
> ---
>  cutils.c      |   61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  qemu-common.h |    1 +
>  vl.c          |   31 +++++++++-------------------
>  3 files changed, 72 insertions(+), 21 deletions(-)
>
> diff --git a/cutils.c b/cutils.c
> index 5883737..e5a135e 100644
> --- a/cutils.c
> +++ b/cutils.c
> @@ -283,3 +283,64 @@ int fcntl_setfl(int fd, int flag)
>  }
>  #endif
>  
> +/*
> + * Convert string to bytes, allowing either K/k for KB, M/m for MB,
> + * G/g for GB or T/t for TB. Default without any postfix is MB.
> + * End pointer will be returned in *end, if end is valid.
> + * Return -1 on error.
> + */
> +ssize_t strtosz(const char *nptr, char **end)
> +{
> +    ssize_t retval = -1;
> +    int64_t tmpval;
> +    char *endptr;
> +    int mul_required = 0;
> +    double val, mul = 1;
> +
> +    endptr = (char *)nptr + strspn(nptr, " 0123456789");
> +    if (*endptr == '.') {
> +        mul_required = 1;
> +    }
> +
> +    errno = 0;
> +    val = strtod(nptr, &endptr);
> +    if (endptr == nptr || errno != 0 || val < 0)
> +        goto fail;

Consider strtosz("\t1.5", &eptr): Your strspn() returns 0, endptr points
to tab, and mul_required remains false.  strtod() succeeds and returns
1.5.  Oops.

strtod() skips whitespace, i.e. " \f\n\r\t\v" (assuming C locale).  You
could strspn() those.

But I'd leave the parsing to strtod(), and simply test whether the final
value is an integer.  See below for an obvious way to do that.

> +
> +    switch (*endptr++) {
> +    case 'K':
> +    case 'k':
> +        mul = 1 << 10;
> +        break;
> +    case 0:
> +    case ' ':
> +        if (mul_required) {
> +            goto fail;
> +        }
> +    case 'M':
> +    case 'm':
> +        mul = 1ULL << 20;
> +        break;
> +    case 'G':
> +    case 'g':
> +        mul = 1ULL << 30;
> +        break;
> +    case 'T':
> +    case 't':
> +        mul = 1ULL << 40;
> +        break;
> +    default:
> +        goto fail;

If there's no unit character, the stop character must be either 0 or
' '.  Why only space, but not other whitespace characters, such as tab?

If there is a unit character, any stop character is fine.

I think there are two clean interfaces:

* Consume the longest prefix that makes sense, then stop.  Return where
  we stopped.  Leave parsing / rejecting the suffix to the caller.

* Consume a complete string, fail if there's trailing garbage.  This is
  easier to use when you want to parse complete strings.  It's awkward
  when you don't.

> +    }
> +
> +    tmpval = (val * mul);
> +    if (tmpval >= ~(size_t)0)
> +        goto fail;

strtod() returns HUGE_VAL on overflow.  Converting that to int64_t has
undefined behavior.  Clean code has to do something like

       val *= mul;
       if (val < 0 || val > ~(size_t)0 || val != val) {
           goto fail;
       }

The val != val part cares for the pathological case when some joker
passes us "NAN".

The check for integer promised above:

       if ((int64_t)val != val) {
           goto fail;
       }

Depends on val *= mul, of course.

> +    retval = tmpval;
> +
> +    if (end)
> +        *end = endptr;
> +
> +fail:
> +    return retval;
> +}
> diff --git a/qemu-common.h b/qemu-common.h
> index 81aafa0..0a062d4 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -153,6 +153,7 @@ time_t mktimegm(struct tm *tm);
>  int qemu_fls(int i);
>  int qemu_fdatasync(int fd);
>  int fcntl_setfl(int fd, int flag);
> +ssize_t strtosz(const char *nptr, char **end);
>  
>  /* path.c */
>  void init_paths(const char *prefix);
> diff --git a/vl.c b/vl.c
> index df414ef..6043fa2 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -734,16 +734,13 @@ static void numa_add(const char *optarg)
>          if (get_param_value(option, 128, "mem", optarg) == 0) {
>              node_mem[nodenr] = 0;
>          } else {
> -            value = strtoull(option, &endptr, 0);
> -            switch (*endptr) {
> -            case 0: case 'M': case 'm':
> -                value <<= 20;
> -                break;
> -            case 'G': case 'g':
> -                value <<= 30;
> -                break;
> +            ssize_t sval;
> +            sval = strtosz(option, NULL);
> +            if (sval < 0) {
> +                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
> +                exit(1);

Still ignores trailing garbage.  No need to tighten that now.

>              }
> -            node_mem[nodenr] = value;
> +            node_mem[nodenr] = sval;
>          }
>          if (get_param_value(option, 128, "cpus", optarg) == 0) {
>              node_cpumask[nodenr] = 0;
> @@ -2163,18 +2160,10 @@ int main(int argc, char **argv, char **envp)
>                  exit(0);
>                  break;
>              case QEMU_OPTION_m: {
> -                uint64_t value;
> -                char *ptr;
> +                ssize_t value;
>  
> -                value = strtoul(optarg, &ptr, 10);
> -                switch (*ptr) {
> -                case 0: case 'M': case 'm':
> -                    value <<= 20;
> -                    break;
> -                case 'G': case 'g':
> -                    value <<= 30;
> -                    break;
> -                default:
> +                value = strtosz(optarg, NULL);
> +                if (value < 0) {
>                      fprintf(stderr, "qemu: invalid ram size: %s\n", optarg);
>                      exit(1);
>                  }

Same here.

Patch

diff --git a/cutils.c b/cutils.c
index 5883737..e5a135e 100644
--- a/cutils.c
+++ b/cutils.c
@@ -283,3 +283,64 @@  int fcntl_setfl(int fd, int flag)
 }
 #endif
 
+/*
+ * Convert string to bytes, allowing either K/k for KB, M/m for MB,
+ * G/g for GB or T/t for TB. Default without any postfix is MB.
+ * End pointer will be returned in *end, if end is valid.
+ * Return -1 on error.
+ */
+ssize_t strtosz(const char *nptr, char **end)
+{
+    ssize_t retval = -1;
+    int64_t tmpval;
+    char *endptr;
+    int mul_required = 0;
+    double val, mul = 1;
+
+    endptr = (char *)nptr + strspn(nptr, " 0123456789");
+    if (*endptr == '.') {
+        mul_required = 1;
+    }
+
+    errno = 0;
+    val = strtod(nptr, &endptr);
+    if (endptr == nptr || errno != 0 || val < 0)
+        goto fail;
+
+    switch (*endptr++) {
+    case 'K':
+    case 'k':
+        mul = 1 << 10;
+        break;
+    case 0:
+    case ' ':
+        if (mul_required) {
+            goto fail;
+        }
+    case 'M':
+    case 'm':
+        mul = 1ULL << 20;
+        break;
+    case 'G':
+    case 'g':
+        mul = 1ULL << 30;
+        break;
+    case 'T':
+    case 't':
+        mul = 1ULL << 40;
+        break;
+    default:
+        goto fail;
+    }
+
+    tmpval = (val * mul);
+    if (tmpval >= ~(size_t)0)
+        goto fail;
+    retval = tmpval;
+
+    if (end)
+        *end = endptr;
+
+fail:
+    return retval;
+}
diff --git a/qemu-common.h b/qemu-common.h
index 81aafa0..0a062d4 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -153,6 +153,7 @@  time_t mktimegm(struct tm *tm);
 int qemu_fls(int i);
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
+ssize_t strtosz(const char *nptr, char **end);
 
 /* path.c */
 void init_paths(const char *prefix);
diff --git a/vl.c b/vl.c
index df414ef..6043fa2 100644
--- a/vl.c
+++ b/vl.c
@@ -734,16 +734,13 @@  static void numa_add(const char *optarg)
         if (get_param_value(option, 128, "mem", optarg) == 0) {
             node_mem[nodenr] = 0;
         } else {
-            value = strtoull(option, &endptr, 0);
-            switch (*endptr) {
-            case 0: case 'M': case 'm':
-                value <<= 20;
-                break;
-            case 'G': case 'g':
-                value <<= 30;
-                break;
+            ssize_t sval;
+            sval = strtosz(option, NULL);
+            if (sval < 0) {
+                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
+                exit(1);
             }
-            node_mem[nodenr] = value;
+            node_mem[nodenr] = sval;
         }
         if (get_param_value(option, 128, "cpus", optarg) == 0) {
             node_cpumask[nodenr] = 0;
@@ -2163,18 +2160,10 @@  int main(int argc, char **argv, char **envp)
                 exit(0);
                 break;
             case QEMU_OPTION_m: {
-                uint64_t value;
-                char *ptr;
+                ssize_t value;
 
-                value = strtoul(optarg, &ptr, 10);
-                switch (*ptr) {
-                case 0: case 'M': case 'm':
-                    value <<= 20;
-                    break;
-                case 'G': case 'g':
-                    value <<= 30;
-                    break;
-                default:
+                value = strtosz(optarg, NULL);
+                if (value < 0) {
                     fprintf(stderr, "qemu: invalid ram size: %s\n", optarg);
                     exit(1);
                 }