Patchwork [1/8,v5] cutils: unsigned int parsing functions

login
register
mail settings
Submitter Eduardo Habkost
Date Jan. 18, 2013, 5:57 p.m.
Message ID <1358531842-16752-1-git-send-email-ehabkost@redhat.com>
Download mbox | patch
Permalink /patch/213701/
State New
Headers show

Comments

Eduardo Habkost - Jan. 18, 2013, 5:57 p.m.
There are lots of duplicate parsing code using strto*() in QEMU, and
most of that code is broken in one way or another. Even the visitors
code have duplicate integer parsing code[1]. This introduces functions
to help parsing unsigned int values: parse_uint() and parse_uint_full().

Parsing functions for signed ints and floats will be submitted later.

parse_uint_full() has all the checks made by opts_type_uint64() at
opts-visitor.c:

 - Check for NULL (returns -EINVAL)
 - Check for negative numbers (returns -EINVAL)
 - Check for empty string (returns -EINVAL)
 - Check for overflow or other errno values set by strtoll() (returns
   -errno)
 - Check for end of string (reject invalid characters after number)
   (returns -EINVAL)

parse_uint() does everything above except checking for the end of the
string, so callers can continue parsing the remainder of string after
the number.

Unit tests included.

[1] string-input-visitor.c:parse_int() could use the same parsing code
    used by opts-visitor.c:opts_type_int(), instead of duplicating that
    logic.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Eric Blake <eblake@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>

Changes v2:
 - Trivial whitespace change
 - Add 'base' parameter to the functions

Changes v4:
 - Return -EINVAL in case a minus sign is found
 - Make endptr point to beginning of string in case -EINVAL
   is returned (like the strtoull() behavior)

Changes v5:
 - Updated function documentation to be very specific about
   the syntax and every error case
   Suggested-by: Markus Armbruster <armbru@redhat.com>
 - Added additional test case for whitespace-only string
 - parse_uint_full() will set *value to 0 if returning -EINVAL,
   so callers won't rely on the parsed value being returned.

v4 -> v5 interdiff:

    diff -u b/tests/test-cutils.c b/tests/test-cutils.c
    --- b/tests/test-cutils.c
    +++ b/tests/test-cutils.c
    @@ -61,6 +61,22 @@
         g_assert(endptr == str);
     }
     
    +static void test_parse_uint_whitespace(void)
    +{
    +    unsigned long long i = 999;
    +    char f = 'X';
    +    char *endptr = &f;
    +    const char *str = "   \t   ";
    +    int r;
    +
    +    r = parse_uint(str, &i, &endptr, 0);
    +
    +    g_assert_cmpint(r, ==, -EINVAL);
    +    g_assert_cmpint(i, ==, 0);
    +    g_assert(endptr == str);
    +}
    +
    +
     static void test_parse_uint_invalid(void)
     {
         unsigned long long i = 999;
    @@ -195,7 +211,7 @@
         r = parse_uint_full(str, &i, 0);
     
         g_assert_cmpint(r, ==, -EINVAL);
    -    g_assert_cmpint(i, ==, 123);
    +    g_assert_cmpint(i, ==, 0);
     }
     
     static void test_parse_uint_full_correct(void)
    @@ -216,6 +232,8 @@
     
         g_test_add_func("/cutils/parse_uint/null", test_parse_uint_null);
         g_test_add_func("/cutils/parse_uint/empty", test_parse_uint_empty);
    +    g_test_add_func("/cutils/parse_uint/whitespace",
    +                    test_parse_uint_whitespace);
         g_test_add_func("/cutils/parse_uint/invalid", test_parse_uint_invalid);
         g_test_add_func("/cutils/parse_uint/trailing", test_parse_uint_trailing);
         g_test_add_func("/cutils/parse_uint/correct", test_parse_uint_correct);
    diff -u b/util/cutils.c b/util/cutils.c
    --- b/util/cutils.c
    +++ b/util/cutils.c
    @@ -270,23 +270,30 @@
         return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
     }
     
    -/* Try to parse an unsigned integer
    +/**
    + * parse_uint:
      *
    - * Error checks done by the function:
    - * - NULL pointer will return -EINVAL.
    - * - Empty strings will return -EINVAL.
    - * - Overflow errors or other errno values  set by strtoull() will
    - *   return -errno (-ERANGE in case of overflow).
    - * - Differently from strtoull(), values starting with a minus sign are
    - *   rejected (returning -EINVAL).
    - *
    - * Sets endptr to point to the first invalid character. Callers may rely
    - * on *value and *endptr to be always set by the function, even in case of
    - * errors.
    + * @s: String to parse
    + * @value: Destination for parsed integer value
    + * @endptr: Destination for pointer to first character not consumed
    + * @base: integer base, between 2 and 36 inclusive, or 0
      *
    - * The 'base' parameter has the same meaning of 'base' on strtoull().
    + * Parse unsigned integer
      *
    - * Returns 0 on success, negative errno value on error.
    + * Parsed syntax is: arbitrary whitespace, a single optional '+', an optional
    + * "0x"if @base is 0 or 16, one or more digits. It's similar to strtoull()'s
    + * syntax, except that the minus sign ('-') is rejected, so negative numbers
    + * won't be considered valid.
    + *
    + * If @s is null, or @base is invalid, or @s doesn't start with an
    + * integer in the syntax above, set *@value to 0, *@endptr to @s, and
    + * return -EINVAL.
    + *
    + * Set @endptr to point right beyond the parsed integer.
    + *
    + * If the integer overflows unsigned long long, set *@value to
    + * ULLONG_MAX, and return -ERANGE.
    + * Else, set *@value to the parsed integer, and return 0.
      */
     int parse_uint(const char *s, unsigned long long *value, char **endptr,
                    int base)
    @@ -329,11 +336,19 @@
         return r;
     }
     
    -/* Try to parse an unsigned integer, making sure the whole string is parsed
    +/**
    + * parse_uint_full:
    + *
    + * @s: String to parse
    + * @value: Destination for parsed integer value
    + * @base: integer base, between 2 and 36 inclusive, or 0
    + *
    + * Parse unsigned integer from entire string
      *
      * Have the same behavior of parse_uint(), but with an additional check
    - * for additional data after the parsed number (in that case, the function
    - * will return -EINVAL).
    + * for additional data after the parsed number. If extra characters are present
    + * after the parsed number, the function will return -EINVAL, and the caller
    + * should not rely on the value set on *@value.
      */
     int parse_uint_full(const char *s, unsigned long long *value, int base)
     {
    @@ -345,6 +360,7 @@
             return r;
         }
         if (*endp) {
    +        *value = 0;
             return -EINVAL;
         }
     

---
 include/qemu-common.h |   4 +
 tests/Makefile        |   3 +
 tests/test-cutils.c   | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++
 util/cutils.c         |  97 +++++++++++++++++++
 4 files changed, 355 insertions(+)
 create mode 100644 tests/test-cutils.c
Eric Blake - Jan. 18, 2013, 6:11 p.m.
On 01/18/2013 10:57 AM, Eduardo Habkost wrote:
> There are lots of duplicate parsing code using strto*() in QEMU, and
> most of that code is broken in one way or another. Even the visitors
> code have duplicate integer parsing code[1]. This introduces functions
> to help parsing unsigned int values: parse_uint() and parse_uint_full().
> 
> Parsing functions for signed ints and floats will be submitted later.
> 
> parse_uint_full() has all the checks made by opts_type_uint64() at
> opts-visitor.c:
> 
>  - Check for NULL (returns -EINVAL)
>  - Check for negative numbers (returns -EINVAL)
>  - Check for empty string (returns -EINVAL)
>  - Check for overflow or other errno values set by strtoll() (returns
>    -errno)
>  - Check for end of string (reject invalid characters after number)
>    (returns -EINVAL)
> 
> parse_uint() does everything above except checking for the end of the
> string, so callers can continue parsing the remainder of string after
> the number.
> 
> Unit tests included.
> 
> [1] string-input-visitor.c:parse_int() could use the same parsing code
>     used by opts-visitor.c:opts_type_int(), instead of duplicating that
>     logic.
> 
> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
> ---
>     + *
>     + * If @s is null, or @base is invalid, or @s doesn't start with an
>     + * integer in the syntax above, set *@value to 0, *@endptr to @s, and
>     + * return -EINVAL.
>     + *
>     + * Set @endptr to point right beyond the parsed integer.
>     + *
>     + * If the integer overflows unsigned long long, set *@value to
>     + * ULLONG_MAX, and return -ERANGE.

Is it worth explicitly mentioning that *@endptr is set past the last
digit parsed in the -ERANGE case?  It's implied that it was set beyond
the parsed integer, but did you stop parsing the moment you detected
overflow (and thus *endptr might still be pointing to a digit), or is it
set beyond all possible digits to the first non-digit?

>     +/**
>     + * parse_uint_full:
>     + *
>     + * @s: String to parse
>     + * @value: Destination for parsed integer value
>     + * @base: integer base, between 2 and 36 inclusive, or 0
>     + *
>     + * Parse unsigned integer from entire string
>       *
>       * Have the same behavior of parse_uint(), but with an additional check
>     - * for additional data after the parsed number (in that case, the function
>     - * will return -EINVAL).
>     + * for additional data after the parsed number. If extra characters are present
>     + * after the parsed number, the function will return -EINVAL, and the caller
>     + * should not rely on the value set on *@value.

This says *value is unreliable;

>       */
>      int parse_uint_full(const char *s, unsigned long long *value, int base)
>      {
>     @@ -345,6 +360,7 @@
>              return r;
>          }
>          if (*endp) {
>     +        *value = 0;
>              return -EINVAL;

while this says it is explicitly 0.  Is this an intentional mismatch,
especially given that parse_uint explicitly documents that *value is
always set to a reliable value even on -EINVAL?


> +    /* make sure we reject negative numbers: */
> +    sp = s;
> +    while (isspace((unsigned char)*sp)) {
> +        ++sp;
> +    }
> +    if (*sp == '-') {
> +        r = -EINVAL;
> +        goto out;
> +    }
> +
> +    errno = 0;
> +    val = strtoull(s, &endp, base);

Is it worth a micro-optimization of calling strtoull(sp,...) instead os
strtoull(s,...), to avoid reparsing all the space that we just skipped?

Patch

diff --git a/include/qemu-common.h b/include/qemu-common.h
index ca464bb..f134629 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -170,6 +170,10 @@  int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
 int qemu_parse_fd(const char *param);
 
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+               int base);
+int parse_uint_full(const char *s, unsigned long long *value, int base);
+
 /*
  * strtosz() suffixes used to specify the default treatment of an
  * argument passed to strtosz() without an explicit suffix.
diff --git a/tests/Makefile b/tests/Makefile
index d86e95a..e5929cd 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -45,6 +45,8 @@  gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
 gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
 check-unit-y += tests/test-thread-pool$(EXESUF)
 gcov-files-test-thread-pool-y = thread-pool.c
+check-unit-y += tests/test-cutils$(EXESUF)
+gcov-files-test-cutils-y += util/cutils.c
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -86,6 +88,7 @@  tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
+tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
diff --git a/tests/test-cutils.c b/tests/test-cutils.c
new file mode 100644
index 0000000..7f94828
--- /dev/null
+++ b/tests/test-cutils.c
@@ -0,0 +1,251 @@ 
+/*
+ * cutils.c unit-tests
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Eduardo Habkost <ehabkost@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <glib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "qemu-common.h"
+
+
+static void test_parse_uint_null(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    int r;
+
+    r = parse_uint(NULL, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == NULL);
+}
+
+static void test_parse_uint_empty(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == str);
+}
+
+static void test_parse_uint_whitespace(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "   \t   ";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == str);
+}
+
+
+static void test_parse_uint_invalid(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = " \t xxx";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == str);
+}
+
+
+static void test_parse_uint_trailing(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "123xxx";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 123);
+    g_assert(endptr == str + 3);
+}
+
+static void test_parse_uint_correct(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "123";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_parse_uint_octal(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "0123";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_parse_uint_decimal(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "0123";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 10);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 123);
+    g_assert(endptr == str + strlen(str));
+}
+
+
+static void test_parse_uint_llong_max(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    char *str = g_strdup_printf("%llu", (unsigned long long)LLONG_MAX + 1);
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, (unsigned long long)LLONG_MAX + 1);
+    g_assert(endptr == str + strlen(str));
+
+    g_free(str);
+}
+
+static void test_parse_uint_overflow(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "99999999999999999999999999999999999999";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -ERANGE);
+    g_assert_cmpint(i, ==, ULLONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_parse_uint_negative(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = " \t -321";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == str);
+}
+
+
+static void test_parse_uint_full_trailing(void)
+{
+    unsigned long long i = 999;
+    const char *str = "123xxx";
+    int r;
+
+    r = parse_uint_full(str, &i, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+}
+
+static void test_parse_uint_full_correct(void)
+{
+    unsigned long long i = 999;
+    const char *str = "123";
+    int r;
+
+    r = parse_uint_full(str, &i, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 123);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/cutils/parse_uint/null", test_parse_uint_null);
+    g_test_add_func("/cutils/parse_uint/empty", test_parse_uint_empty);
+    g_test_add_func("/cutils/parse_uint/whitespace",
+                    test_parse_uint_whitespace);
+    g_test_add_func("/cutils/parse_uint/invalid", test_parse_uint_invalid);
+    g_test_add_func("/cutils/parse_uint/trailing", test_parse_uint_trailing);
+    g_test_add_func("/cutils/parse_uint/correct", test_parse_uint_correct);
+    g_test_add_func("/cutils/parse_uint/octal", test_parse_uint_octal);
+    g_test_add_func("/cutils/parse_uint/decimal", test_parse_uint_decimal);
+    g_test_add_func("/cutils/parse_uint/llong_max", test_parse_uint_llong_max);
+    g_test_add_func("/cutils/parse_uint/overflow", test_parse_uint_overflow);
+    g_test_add_func("/cutils/parse_uint/negative", test_parse_uint_negative);
+    g_test_add_func("/cutils/parse_uint_full/trailing",
+                    test_parse_uint_full_trailing);
+    g_test_add_func("/cutils/parse_uint_full/correct",
+                    test_parse_uint_full_correct);
+
+    return g_test_run();
+}
diff --git a/util/cutils.c b/util/cutils.c
index 80bb1dc..80da9df 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -270,6 +270,103 @@  int64_t strtosz(const char *nptr, char **end)
     return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
 }
 
+/**
+ * parse_uint:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @endptr: Destination for pointer to first character not consumed
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer
+ *
+ * Parsed syntax is: arbitrary whitespace, a single optional '+', an optional
+ * "0x"if @base is 0 or 16, one or more digits. It's similar to strtoull()'s
+ * syntax, except that the minus sign ('-') is rejected, so negative numbers
+ * won't be considered valid.
+ *
+ * If @s is null, or @base is invalid, or @s doesn't start with an
+ * integer in the syntax above, set *@value to 0, *@endptr to @s, and
+ * return -EINVAL.
+ *
+ * Set @endptr to point right beyond the parsed integer.
+ *
+ * If the integer overflows unsigned long long, set *@value to
+ * ULLONG_MAX, and return -ERANGE.
+ * Else, set *@value to the parsed integer, and return 0.
+ */
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+               int base)
+{
+    int r = 0;
+    char *endp = (char *)s;
+    unsigned long long val = 0;
+    const char *sp;
+
+    if (!s) {
+        r = -EINVAL;
+        goto out;
+    }
+
+    /* make sure we reject negative numbers: */
+    sp = s;
+    while (isspace((unsigned char)*sp)) {
+        ++sp;
+    }
+    if (*sp == '-') {
+        r = -EINVAL;
+        goto out;
+    }
+
+    errno = 0;
+    val = strtoull(s, &endp, base);
+    if (errno) {
+        r = -errno;
+        goto out;
+    }
+
+    if (endp == s) {
+        r = -EINVAL;
+        goto out;
+    }
+
+out:
+    *value = val;
+    *endptr = endp;
+    return r;
+}
+
+/**
+ * parse_uint_full:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer from entire string
+ *
+ * Have the same behavior of parse_uint(), but with an additional check
+ * for additional data after the parsed number. If extra characters are present
+ * after the parsed number, the function will return -EINVAL, and the caller
+ * should not rely on the value set on *@value.
+ */
+int parse_uint_full(const char *s, unsigned long long *value, int base)
+{
+    char *endp;
+    int r;
+
+    r = parse_uint(s, value, &endp, base);
+    if (r < 0) {
+        return r;
+    }
+    if (*endp) {
+        *value = 0;
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 int qemu_parse_fd(const char *param)
 {
     int fd;