diff mbox

[1/8,v6] cutils: unsigned int parsing functions

Message ID 20130118194123.GO20133@otherpad.lan.raisama.net
State New
Headers show

Commit Message

Eduardo Habkost Jan. 18, 2013, 7:41 p.m. UTC
There are lots of duplicate parsing code using strto*() in QEMU, and
most of that code is broken in one way or another. Even the visitors
code have duplicate integer parsing code[1]. This introduces functions
to help parsing unsigned int values: parse_uint() and parse_uint_full().

Parsing functions for signed ints and floats will be submitted later.

parse_uint_full() has all the checks made by opts_type_uint64() at
opts-visitor.c:

 - Check for NULL (returns -EINVAL)
 - Check for negative numbers (returns -EINVAL)
 - Check for empty string (returns -EINVAL)
 - Check for overflow or other errno values set by strtoll() (returns
   -errno)
 - Check for end of string (reject invalid characters after number)
   (returns -EINVAL)

parse_uint() does everything above except checking for the end of the
string, so callers can continue parsing the remainder of string after
the number.

Unit tests included.

[1] string-input-visitor.c:parse_int() could use the same parsing code
    used by opts-visitor.c:opts_type_int(), instead of duplicating that
    logic.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
Changes v2:
 - Trivial whitespace change
 - Add 'base' parameter to the functions

Changes v4:
 - Return -EINVAL in case a minus sign is found
 - Make endptr point to beginning of string in case -EINVAL
   is returned (like the strtoull() behavior)

Changes v5:
 - Updated function documentation to be very specific about
   the syntax and every error case
   Suggested-by: Markus Armbruster <armbru@redhat.com>
 - Added additional test case for whitespace-only string
 - parse_uint_full() will set *value to 0 if returning -EINVAL,
   so callers won't rely on the parsed value being returned.

Changes v6:
 - Return -ERANGE (and set *endptr beyond all digits) for negative
   numbers
   Suggested-by: Markus Armbruster <armbru@redhat.com>
 - Clarify that function will consume all digits even in case of
   overflow/underflow
 - Doesn't define *v as undefined in case parse_uint_full() finds extra
   characters. Explicitly document it as set to 0.

6 version of a simple integer parsing function. This is getting funny.
:-)

I decided to follow Markus' advice and return -ERANGE in case of
negative numbers. It sounds more intuitive, and will allow us to make
parse_uint() and a future parse_int() function (for signed integers) to
accept exactly the same syntax, with just different ranges of valid
number values. Most callers won't care, anyway.


Interdiff v5->v6:

    diff -u b/tests/test-cutils.c b/tests/test-cutils.c
    --- b/tests/test-cutils.c
    +++ b/tests/test-cutils.c
    @@ -196,9 +196,9 @@
     
         r = parse_uint(str, &i, &endptr, 0);
     
    -    g_assert_cmpint(r, ==, -EINVAL);
    +    g_assert_cmpint(r, ==, -ERANGE);
         g_assert_cmpint(i, ==, 0);
    -    g_assert(endptr == str);
    +    g_assert(endptr == str + strlen(str));
     }
     
     
    diff -u b/util/cutils.c b/util/cutils.c
    --- b/util/cutils.c
    +++ b/util/cutils.c
    @@ -280,19 +280,22 @@
      *
      * Parse unsigned integer
      *
    - * Parsed syntax is: arbitrary whitespace, a single optional '+', an optional
    - * "0x"if @base is 0 or 16, one or more digits. It's similar to strtoull()'s
    - * syntax, except that the minus sign ('-') is rejected, so negative numbers
    - * won't be considered valid.
    + * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
    + * '+' or '-', an optional "0x"if @base is 0 or 16, one or more digits.
      *
      * If @s is null, or @base is invalid, or @s doesn't start with an
      * integer in the syntax above, set *@value to 0, *@endptr to @s, and
      * return -EINVAL.
      *
    - * Set @endptr to point right beyond the parsed integer.
    + * Set *@endptr to point right beyond the parsed integer (even if the integer
    + * overflows or is negative, all digits will be parsed and *@endptr will
    + * point right beyond them).
    + *
    + * If the integer is negative, set *@value to 0, and return -ERANGE.
      *
      * If the integer overflows unsigned long long, set *@value to
      * ULLONG_MAX, and return -ERANGE.
    + *
      * Else, set *@value to the parsed integer, and return 0.
      */
     int parse_uint(const char *s, unsigned long long *value, char **endptr,
    @@ -301,23 +304,12 @@
         int r = 0;
         char *endp = (char *)s;
         unsigned long long val = 0;
    -    const char *sp;
     
         if (!s) {
             r = -EINVAL;
             goto out;
         }
     
    -    /* make sure we reject negative numbers: */
    -    sp = s;
    -    while (isspace((unsigned char)*sp)) {
    -        ++sp;
    -    }
    -    if (*sp == '-') {
    -        r = -EINVAL;
    -        goto out;
    -    }
    -
         errno = 0;
         val = strtoull(s, &endp, base);
         if (errno) {
    @@ -330,6 +322,16 @@
             goto out;
         }
     
    +    /* make sure we reject negative numbers: */
    +    while (isspace((unsigned char)*s)) {
    +        s++;
    +    }
    +    if (*s == '-') {
    +        val = 0;
    +        r = -ERANGE;
    +        goto out;
    +    }
    +
     out:
         *value = val;
         *endptr = endp;
    @@ -347,8 +349,8 @@
      *
      * Have the same behavior of parse_uint(), but with an additional check
      * for additional data after the parsed number. If extra characters are present
    - * after the parsed number, the function will return -EINVAL, and the caller
    - * should not rely on the value set on *@value.
    + * after the parsed number, the function will return -EINVAL, and *@v will
    + * be set to 0.
      */
     int parse_uint_full(const char *s, unsigned long long *value, int base)
     {
---
 include/qemu-common.h |   4 +
 tests/Makefile        |   3 +
 tests/test-cutils.c   | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++
 util/cutils.c         |  99 ++++++++++++++++++++
 4 files changed, 357 insertions(+)
 create mode 100644 tests/test-cutils.c

Comments

Eric Blake Jan. 18, 2013, 8:20 p.m. UTC | #1
On 01/18/2013 12:41 PM, Eduardo Habkost wrote:
> There are lots of duplicate parsing code using strto*() in QEMU, and
> most of that code is broken in one way or another. Even the visitors
> code have duplicate integer parsing code[1]. This introduces functions
> to help parsing unsigned int values: parse_uint() and parse_uint_full().
> 

> Changes v6:
>  - Return -ERANGE (and set *endptr beyond all digits) for negative
>    numbers
>    Suggested-by: Markus Armbruster <armbru@redhat.com>
>  - Clarify that function will consume all digits even in case of
>    overflow/underflow
>  - Doesn't define *v as undefined in case parse_uint_full() finds extra
>    characters. Explicitly document it as set to 0.

Looks sane to me, except for the introduction of one typo (please, can
whoever queues up the PULL request just fix that, without needing to see
a v7)?

> 
> 6 version of a simple integer parsing function. This is getting funny.
> :-)

Yeah, but just think how much easier parse_int() is going to be :)

> ---
>  include/qemu-common.h |   4 +
>  tests/Makefile        |   3 +
>  tests/test-cutils.c   | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  util/cutils.c         |  99 ++++++++++++++++++++
>  4 files changed, 357 insertions(+)
>  create mode 100644 tests/test-cutils.c

Reviewed-by: Eric Blake <eblake@redhat.com>

> + *
> + * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
> + * '+' or '-', an optional "0x"if @base is 0 or 16, one or more digits.

s/"0x"if/"0x" if/

> +
> +/**
> + * parse_uint_full:
> + *
> + * @s: String to parse
> + * @value: Destination for parsed integer value
> + * @base: integer base, between 2 and 36 inclusive, or 0
> + *
> + * Parse unsigned integer from entire string
> + *
> + * Have the same behavior of parse_uint(), but with an additional check
> + * for additional data after the parsed number. If extra characters are present
> + * after the parsed number, the function will return -EINVAL, and *@v will
> + * be set to 0.
> + */
> +int parse_uint_full(const char *s, unsigned long long *value, int base)

[Side note - in libvirt, we have just one function instead of two; you
get the parse_uint_full semantics by passing NULL for endptr to the
parse_uint counterpart.  But I'm fine with your approach of mandating
endptr to be a non-NULL pointer to parse_uint, and having a second
function for the case where no end characters are allowed.]
Laszlo Ersek Jan. 23, 2013, 3:37 p.m. UTC | #2
On 01/18/13 20:41, Eduardo Habkost wrote:

> Changes v6:

Reviewed-by: Laszlo Ersek <lersek@redhat.com>
diff mbox

Patch

diff --git a/include/qemu-common.h b/include/qemu-common.h
index ca464bb..f134629 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -170,6 +170,10 @@  int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
 int qemu_parse_fd(const char *param);
 
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+               int base);
+int parse_uint_full(const char *s, unsigned long long *value, int base);
+
 /*
  * strtosz() suffixes used to specify the default treatment of an
  * argument passed to strtosz() without an explicit suffix.
diff --git a/tests/Makefile b/tests/Makefile
index d86e95a..e5929cd 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -45,6 +45,8 @@  gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
 gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
 check-unit-y += tests/test-thread-pool$(EXESUF)
 gcov-files-test-thread-pool-y = thread-pool.c
+check-unit-y += tests/test-cutils$(EXESUF)
+gcov-files-test-cutils-y += util/cutils.c
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -86,6 +88,7 @@  tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
+tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
diff --git a/tests/test-cutils.c b/tests/test-cutils.c
new file mode 100644
index 0000000..2a4556d
--- /dev/null
+++ b/tests/test-cutils.c
@@ -0,0 +1,251 @@ 
+/*
+ * cutils.c unit-tests
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Eduardo Habkost <ehabkost@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <glib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "qemu-common.h"
+
+
+static void test_parse_uint_null(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    int r;
+
+    r = parse_uint(NULL, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == NULL);
+}
+
+static void test_parse_uint_empty(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == str);
+}
+
+static void test_parse_uint_whitespace(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "   \t   ";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == str);
+}
+
+
+static void test_parse_uint_invalid(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = " \t xxx";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == str);
+}
+
+
+static void test_parse_uint_trailing(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "123xxx";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 123);
+    g_assert(endptr == str + 3);
+}
+
+static void test_parse_uint_correct(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "123";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_parse_uint_octal(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "0123";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 0123);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_parse_uint_decimal(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "0123";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 10);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 123);
+    g_assert(endptr == str + strlen(str));
+}
+
+
+static void test_parse_uint_llong_max(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    char *str = g_strdup_printf("%llu", (unsigned long long)LLONG_MAX + 1);
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, (unsigned long long)LLONG_MAX + 1);
+    g_assert(endptr == str + strlen(str));
+
+    g_free(str);
+}
+
+static void test_parse_uint_overflow(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = "99999999999999999999999999999999999999";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -ERANGE);
+    g_assert_cmpint(i, ==, ULLONG_MAX);
+    g_assert(endptr == str + strlen(str));
+}
+
+static void test_parse_uint_negative(void)
+{
+    unsigned long long i = 999;
+    char f = 'X';
+    char *endptr = &f;
+    const char *str = " \t -321";
+    int r;
+
+    r = parse_uint(str, &i, &endptr, 0);
+
+    g_assert_cmpint(r, ==, -ERANGE);
+    g_assert_cmpint(i, ==, 0);
+    g_assert(endptr == str + strlen(str));
+}
+
+
+static void test_parse_uint_full_trailing(void)
+{
+    unsigned long long i = 999;
+    const char *str = "123xxx";
+    int r;
+
+    r = parse_uint_full(str, &i, 0);
+
+    g_assert_cmpint(r, ==, -EINVAL);
+    g_assert_cmpint(i, ==, 0);
+}
+
+static void test_parse_uint_full_correct(void)
+{
+    unsigned long long i = 999;
+    const char *str = "123";
+    int r;
+
+    r = parse_uint_full(str, &i, 0);
+
+    g_assert_cmpint(r, ==, 0);
+    g_assert_cmpint(i, ==, 123);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/cutils/parse_uint/null", test_parse_uint_null);
+    g_test_add_func("/cutils/parse_uint/empty", test_parse_uint_empty);
+    g_test_add_func("/cutils/parse_uint/whitespace",
+                    test_parse_uint_whitespace);
+    g_test_add_func("/cutils/parse_uint/invalid", test_parse_uint_invalid);
+    g_test_add_func("/cutils/parse_uint/trailing", test_parse_uint_trailing);
+    g_test_add_func("/cutils/parse_uint/correct", test_parse_uint_correct);
+    g_test_add_func("/cutils/parse_uint/octal", test_parse_uint_octal);
+    g_test_add_func("/cutils/parse_uint/decimal", test_parse_uint_decimal);
+    g_test_add_func("/cutils/parse_uint/llong_max", test_parse_uint_llong_max);
+    g_test_add_func("/cutils/parse_uint/overflow", test_parse_uint_overflow);
+    g_test_add_func("/cutils/parse_uint/negative", test_parse_uint_negative);
+    g_test_add_func("/cutils/parse_uint_full/trailing",
+                    test_parse_uint_full_trailing);
+    g_test_add_func("/cutils/parse_uint_full/correct",
+                    test_parse_uint_full_correct);
+
+    return g_test_run();
+}
diff --git a/util/cutils.c b/util/cutils.c
index 80bb1dc..8e43fae 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -270,6 +270,105 @@  int64_t strtosz(const char *nptr, char **end)
     return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
 }
 
+/**
+ * parse_uint:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @endptr: Destination for pointer to first character not consumed
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer
+ *
+ * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
+ * '+' or '-', an optional "0x"if @base is 0 or 16, one or more digits.
+ *
+ * If @s is null, or @base is invalid, or @s doesn't start with an
+ * integer in the syntax above, set *@value to 0, *@endptr to @s, and
+ * return -EINVAL.
+ *
+ * Set *@endptr to point right beyond the parsed integer (even if the integer
+ * overflows or is negative, all digits will be parsed and *@endptr will
+ * point right beyond them).
+ *
+ * If the integer is negative, set *@value to 0, and return -ERANGE.
+ *
+ * If the integer overflows unsigned long long, set *@value to
+ * ULLONG_MAX, and return -ERANGE.
+ *
+ * Else, set *@value to the parsed integer, and return 0.
+ */
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+               int base)
+{
+    int r = 0;
+    char *endp = (char *)s;
+    unsigned long long val = 0;
+
+    if (!s) {
+        r = -EINVAL;
+        goto out;
+    }
+
+    errno = 0;
+    val = strtoull(s, &endp, base);
+    if (errno) {
+        r = -errno;
+        goto out;
+    }
+
+    if (endp == s) {
+        r = -EINVAL;
+        goto out;
+    }
+
+    /* make sure we reject negative numbers: */
+    while (isspace((unsigned char)*s)) {
+        s++;
+    }
+    if (*s == '-') {
+        val = 0;
+        r = -ERANGE;
+        goto out;
+    }
+
+out:
+    *value = val;
+    *endptr = endp;
+    return r;
+}
+
+/**
+ * parse_uint_full:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer from entire string
+ *
+ * Have the same behavior of parse_uint(), but with an additional check
+ * for additional data after the parsed number. If extra characters are present
+ * after the parsed number, the function will return -EINVAL, and *@v will
+ * be set to 0.
+ */
+int parse_uint_full(const char *s, unsigned long long *value, int base)
+{
+    char *endp;
+    int r;
+
+    r = parse_uint(s, value, &endp, base);
+    if (r < 0) {
+        return r;
+    }
+    if (*endp) {
+        *value = 0;
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 int qemu_parse_fd(const char *param)
 {
     int fd;