diff mbox

[v2,3/3] qapi: Fix memleak in string visitors on int lists

Message ID 1464712890-14262-4-git-send-email-eblake@redhat.com
State New
Headers show

Commit Message

Eric Blake May 31, 2016, 4:41 p.m. UTC
Commit 7f8f9ef1 introduced the ability to store a list of
integers as a sorted list of ranges, but when merging ranges,
it leaks one or more ranges.  It was also using range_get_last()
incorrectly within range_compare() (a range is a start/end pair,
but range_get_last() is for start/len pairs), and will also
mishandle a range ending in UINT64_MAX (remember, we document
that no range covers 2**64 bytes, but that ranges that end on
UINT64_MAX have end < begin).

The whole merge algorithm was rather complex, and included
unnecessary passes over data within glib functions, and enough
indirection to make it hard to easily plug the data leaks.
Since we are already hard-coding things to a list of ranges,
just rewrite the thing to open-code the traversal and
comparisons, by making the range_compare() helper function give
us an answer that is easier to use, at which point we avoid the
need to pass any callbacks to g_list_*(). Then by reusing
range_extend() instead of duplicating effort with range_merge(),
we cover the corner cases correctly.

Drop the now-unused range_merge() and ranges_can_merge().

Doing this lets test-string-{input,output}-visitor pass under
valgrind without leaks.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 util/range.c | 75 +++++++++++++++++++++++-------------------------------------
 1 file changed, 29 insertions(+), 46 deletions(-)

Comments

Markus Armbruster June 1, 2016, 7:47 a.m. UTC | #1
Eric Blake <eblake@redhat.com> writes:

> Commit 7f8f9ef1 introduced the ability to store a list of
> integers as a sorted list of ranges, but when merging ranges,
> it leaks one or more ranges.  It was also using range_get_last()
> incorrectly within range_compare() (a range is a start/end pair,
> but range_get_last() is for start/len pairs), and will also
> mishandle a range ending in UINT64_MAX (remember, we document
> that no range covers 2**64 bytes, but that ranges that end on
> UINT64_MAX have end < begin).
>
> The whole merge algorithm was rather complex, and included
> unnecessary passes over data within glib functions, and enough
> indirection to make it hard to easily plug the data leaks.
> Since we are already hard-coding things to a list of ranges,
> just rewrite the thing to open-code the traversal and
> comparisons, by making the range_compare() helper function give
> us an answer that is easier to use, at which point we avoid the
> need to pass any callbacks to g_list_*(). Then by reusing
> range_extend() instead of duplicating effort with range_merge(),
> we cover the corner cases correctly.
>
> Drop the now-unused range_merge() and ranges_can_merge().
>
> Doing this lets test-string-{input,output}-visitor pass under
> valgrind without leaks.
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>  util/range.c | 75 +++++++++++++++++++++++-------------------------------------
>  1 file changed, 29 insertions(+), 46 deletions(-)
>
> diff --git a/util/range.c b/util/range.c
> index dd46092..56e6baf 100644
> --- a/util/range.c
> +++ b/util/range.c
> @@ -28,65 +28,48 @@
>   *   - this can not represent a full 0 to ~0x0LL range.
>   */
>
> -/* 0,1 can merge with 1,2 but don't overlap */
> -static bool ranges_can_merge(Range *range1, Range *range2)
> +/* Return -1 if @a < @b, 1 if greater, and 0 if they touch or overlap. */
> +static inline int range_compare(Range *a, Range *b)
>  {
> -    return !(range1->end < range2->begin || range2->end < range1->begin);
> -}
> -
> -static void range_merge(Range *range1, Range *range2)
> -{
> -    if (range1->end < range2->end) {
> -        range1->end = range2->end;
> -    }
> -    if (range1->begin > range2->begin) {
> -        range1->begin = range2->begin;
> -    }
> -}
> -
> -static gint range_compare(gconstpointer a, gconstpointer b)
> -{
> -    Range *ra = (Range *)a, *rb = (Range *)b;
> -    if (ra->begin == rb->begin && ra->end == rb->end) {
> -        return 0;
> -    } else if (range_get_last(ra->begin, ra->end) <
> -               range_get_last(rb->begin, rb->end)) {
> +    /* Zero a->end is 2**64, and therefore not less than any b->begin */
> +    if (a->end && a->end < b->begin) {
>          return -1;
> -    } else {
> +    }
> +    if (b->end && a->begin > b->end) {
>          return 1;
>      }
> +    return 0;
>  }
>
> +/* Insert @data into @list of ranges; caller no longer owns @data */
>  GList *range_list_insert(GList *list, Range *data)
>  {
> -    GList *l, *next = NULL;
> -    Range *r, *nextr;
> +    GList *l;
>
> -    if (!list) {
> -        list = g_list_insert_sorted(list, data, range_compare);
> -        return list;
> +    /* Range lists require no empty ranges */
> +    assert(data->begin < data->end || (data->begin && !data->end));

Consider { begin = 0, end = 0 }.

Since zero @end means 2^64, this encodes the (non-empty) range
0..2^64-1.

range.h's comment

 * Notes:
 *   - ranges must not wrap around 0, but can include the last byte ~0x0LL.
 *   - this can not represent a full 0 to ~0x0LL range.

appears to be wrong.  The actual limitation is "can't represent ranges
wrapping around zero, and can't represent the empty range starting at
zero."  Would you like to correct it?

I'm afraid range.h is too clever by half.

> +
> +    for (l = list; l && range_compare(l->data, data) < 0; l = l->next) {
> +        /* Skip all list elements strictly less than data */
>      }

Let's put the comment before the loop.  It describes the whole loop.
Also makes the emptiness of the body more obvious.

>
> -    nextr = data;
> -    l = list;
> -    while (l && l != next && nextr) {
> -        r = l->data;
> -        if (ranges_can_merge(r, nextr)) {
> -            range_merge(r, nextr);
> -            l = g_list_remove_link(l, next);
> -            next = g_list_next(l);
> -            if (next) {
> -                nextr = next->data;
> -            } else {
> -                nextr = NULL;
> -            }
> -        } else {
> -            l = g_list_next(l);
> -        }
> +    if (!l || range_compare(l->data, data) > 0) {
> +        /* Rest of the list (if any) is strictly greater than @data */
> +        return g_list_insert_before(list, l, data);
>      }
>
> -    if (!l) {
> -        list = g_list_insert_sorted(list, data, range_compare);
> +    /* Current list element overlaps @data, merge the two */
> +    range_extend(l->data, data);
> +    g_free(data);
> +
> +    /* Merge any subsequent list elements that now also overlap */
> +    while (l->next && range_compare(l->data, l->next->data) == 0) {
> +        GList *new_l;
> +
> +        range_extend(l->data, l->next->data);
> +        g_free(l->next->data);
> +        new_l = g_list_delete_link(list, l->next);
> +        assert(new_l == list);
>      }
>
>      return list;

I think I could fix up things on commit (assuming we agree on what needs
fixing).
Eric Blake June 1, 2016, 2:51 p.m. UTC | #2
On 06/01/2016 01:47 AM, Markus Armbruster wrote:
> Eric Blake <eblake@redhat.com> writes:
> 
>> Commit 7f8f9ef1 introduced the ability to store a list of
>> integers as a sorted list of ranges, but when merging ranges,
>> it leaks one or more ranges.  It was also using range_get_last()
>> incorrectly within range_compare() (a range is a start/end pair,
>> but range_get_last() is for start/len pairs), and will also
>> mishandle a range ending in UINT64_MAX (remember, we document
>> that no range covers 2**64 bytes, but that ranges that end on
>> UINT64_MAX have end < begin).
>>

>>
>> -    if (!list) {
>> -        list = g_list_insert_sorted(list, data, range_compare);
>> -        return list;
>> +    /* Range lists require no empty ranges */
>> +    assert(data->begin < data->end || (data->begin && !data->end));
> 
> Consider { begin = 0, end = 0 }.
> 
> Since zero @end means 2^64, this encodes the (non-empty) range
> 0..2^64-1.

Or else it means an uninitialized range.  My argument is that no range
can contain 2^64 bytes, and therefore the only possible range that would
be that large (0..2^64-1) is unrepresentable, therefore, if end == 0,
begin must be non-zero for the range to be valid as an initialized range.

> 
> range.h's comment
> 
>  * Notes:
>  *   - ranges must not wrap around 0, but can include the last byte ~0x0LL.
>  *   - this can not represent a full 0 to ~0x0LL range.
> 
> appears to be wrong.  The actual limitation is "can't represent ranges
> wrapping around zero, and can't represent the empty range starting at
> zero."  Would you like to correct it?

I'm not sure what corrections it needs, though.

> 
> I'm afraid range.h is too clever by half.

Unfortunately true.

> 
>> +
>> +    for (l = list; l && range_compare(l->data, data) < 0; l = l->next) {
>> +        /* Skip all list elements strictly less than data */
>>      }
> 
> Let's put the comment before the loop.  It describes the whole loop.
> Also makes the emptiness of the body more obvious.

Sure.

> 
> I think I could fix up things on commit (assuming we agree on what needs
> fixing).
> 

Adding other authors of range.h for their opinions...
Markus Armbruster June 13, 2016, 12:54 p.m. UTC | #3
Eric Blake <eblake@redhat.com> writes:

> On 06/01/2016 01:47 AM, Markus Armbruster wrote:
>> Eric Blake <eblake@redhat.com> writes:
>> 
>>> Commit 7f8f9ef1 introduced the ability to store a list of
>>> integers as a sorted list of ranges, but when merging ranges,
>>> it leaks one or more ranges.  It was also using range_get_last()
>>> incorrectly within range_compare() (a range is a start/end pair,
>>> but range_get_last() is for start/len pairs), and will also
>>> mishandle a range ending in UINT64_MAX (remember, we document
>>> that no range covers 2**64 bytes, but that ranges that end on
>>> UINT64_MAX have end < begin).
>>>
>
>>>
>>> -    if (!list) {
>>> -        list = g_list_insert_sorted(list, data, range_compare);
>>> -        return list;
>>> +    /* Range lists require no empty ranges */
>>> +    assert(data->begin < data->end || (data->begin && !data->end));
>> 
>> Consider { begin = 0, end = 0 }.
>> 
>> Since zero @end means 2^64, this encodes the (non-empty) range
>> 0..2^64-1.
>
> Or else it means an uninitialized range.  My argument is that no range
> can contain 2^64 bytes, and therefore the only possible range that would
> be that large (0..2^64-1) is unrepresentable, therefore, if end == 0,
> begin must be non-zero for the range to be valid as an initialized range.

I'm not sure what you mean by "uninitialized range".  Maybe "invalid
range"?

>> range.h's comment
>> 
>>  * Notes:
>>  *   - ranges must not wrap around 0, but can include the last byte ~0x0LL.
>>  *   - this can not represent a full 0 to ~0x0LL range.
>> 
>> appears to be wrong.  The actual limitation is "can't represent ranges
>> wrapping around zero, and can't represent the empty range starting at
>> zero."  Would you like to correct it?
>
> I'm not sure what corrections it needs, though.
>
>> I'm afraid range.h is too clever by half.
>
> Unfortunately true.
>
>> 
>>> +
>>> +    for (l = list; l && range_compare(l->data, data) < 0; l = l->next) {
>>> +        /* Skip all list elements strictly less than data */
>>>      }
>> 
>> Let's put the comment before the loop.  It describes the whole loop.
>> Also makes the emptiness of the body more obvious.
>
> Sure.
>
>> 
>> I think I could fix up things on commit (assuming we agree on what needs
>> fixing).
>> 
>
> Adding other authors of range.h for their opinions...

No reply.

I find the comments in range.h terminally confusing.

The clear parts:

* we want to have inclusive lower bound <= inclusive upper bound (no
  wrap around), and

* we want to encode the bounds using @start as inclusive lower bound,
  and @end as exclusive upper bound.

This begs the question how end == 0 is to be interpreted.  Options:

(1) It's literally the exclusive upper bound.  An interval with a
non-negative inclusive lower bound and a zero exclusive upper bound is
empty.  There is no way to represent the inclusive upper bound 2^63-1.
This contradicts the comment's claim that you can.

(2) It's 2^64.  Now you cannot represent the inclusive upper bound -1.
You cannot represent the empty interval [0,-1], although you can
represent other empty intervals [b,b-1], b>0.  { start = 0, end = 0 }
encodes the interval [0,2^64-1].  Contradicts the comment's claim that
you can't, unless...

(2') end=0 is special-cased to mean something else when start=0!  Namely
0 instead of 2^64, so that { start=0, end=0 } becomes the empty interval
[0,-1].

The tradeoff between (2) and (2') is between two anomalies: "can't do
[0,-1]", and "can't do [0..2^64-1]".

I prefer (2), because I find the former anomaly less bad, and feel
special-casing @end is bound to lead to bugs.

Whatever option we choose, we should fix the comment to explain it
clearly.
Markus Armbruster June 14, 2016, 5:53 p.m. UTC | #4
Markus Armbruster <armbru@redhat.com> writes:

[...]
> I find the comments in range.h terminally confusing.
>
> The clear parts:
>
> * we want to have inclusive lower bound <= inclusive upper bound (no
>   wrap around), and
>
> * we want to encode the bounds using @start as inclusive lower bound,
>   and @end as exclusive upper bound.
>
> This begs the question how end == 0 is to be interpreted.  Options:
>
> (1) It's literally the exclusive upper bound.  An interval with a
> non-negative inclusive lower bound and a zero exclusive upper bound is
> empty.  There is no way to represent the inclusive upper bound 2^63-1.
> This contradicts the comment's claim that you can.
>
> (2) It's 2^64.  Now you cannot represent the inclusive upper bound -1.
> You cannot represent the empty interval [0,-1], although you can
> represent other empty intervals [b,b-1], b>0.  { start = 0, end = 0 }
> encodes the interval [0,2^64-1].  Contradicts the comment's claim that
> you can't, unless...
>
> (2') end=0 is special-cased to mean something else when start=0!  Namely
> 0 instead of 2^64, so that { start=0, end=0 } becomes the empty interval
> [0,-1].
>
> The tradeoff between (2) and (2') is between two anomalies: "can't do
> [0,-1]", and "can't do [0..2^64-1]".
>
> I prefer (2), because I find the former anomaly less bad, and feel
> special-casing @end is bound to lead to bugs.
>
> Whatever option we choose, we should fix the comment to explain it
> clearly.

Unless somebody has better ideas, I'll cook up a patch documenting (2')
and make code conform to it (in case there is code that doesn't).
diff mbox

Patch

diff --git a/util/range.c b/util/range.c
index dd46092..56e6baf 100644
--- a/util/range.c
+++ b/util/range.c
@@ -28,65 +28,48 @@ 
  *   - this can not represent a full 0 to ~0x0LL range.
  */

-/* 0,1 can merge with 1,2 but don't overlap */
-static bool ranges_can_merge(Range *range1, Range *range2)
+/* Return -1 if @a < @b, 1 if greater, and 0 if they touch or overlap. */
+static inline int range_compare(Range *a, Range *b)
 {
-    return !(range1->end < range2->begin || range2->end < range1->begin);
-}
-
-static void range_merge(Range *range1, Range *range2)
-{
-    if (range1->end < range2->end) {
-        range1->end = range2->end;
-    }
-    if (range1->begin > range2->begin) {
-        range1->begin = range2->begin;
-    }
-}
-
-static gint range_compare(gconstpointer a, gconstpointer b)
-{
-    Range *ra = (Range *)a, *rb = (Range *)b;
-    if (ra->begin == rb->begin && ra->end == rb->end) {
-        return 0;
-    } else if (range_get_last(ra->begin, ra->end) <
-               range_get_last(rb->begin, rb->end)) {
+    /* Zero a->end is 2**64, and therefore not less than any b->begin */
+    if (a->end && a->end < b->begin) {
         return -1;
-    } else {
+    }
+    if (b->end && a->begin > b->end) {
         return 1;
     }
+    return 0;
 }

+/* Insert @data into @list of ranges; caller no longer owns @data */
 GList *range_list_insert(GList *list, Range *data)
 {
-    GList *l, *next = NULL;
-    Range *r, *nextr;
+    GList *l;

-    if (!list) {
-        list = g_list_insert_sorted(list, data, range_compare);
-        return list;
+    /* Range lists require no empty ranges */
+    assert(data->begin < data->end || (data->begin && !data->end));
+
+    for (l = list; l && range_compare(l->data, data) < 0; l = l->next) {
+        /* Skip all list elements strictly less than data */
     }

-    nextr = data;
-    l = list;
-    while (l && l != next && nextr) {
-        r = l->data;
-        if (ranges_can_merge(r, nextr)) {
-            range_merge(r, nextr);
-            l = g_list_remove_link(l, next);
-            next = g_list_next(l);
-            if (next) {
-                nextr = next->data;
-            } else {
-                nextr = NULL;
-            }
-        } else {
-            l = g_list_next(l);
-        }
+    if (!l || range_compare(l->data, data) > 0) {
+        /* Rest of the list (if any) is strictly greater than @data */
+        return g_list_insert_before(list, l, data);
     }

-    if (!l) {
-        list = g_list_insert_sorted(list, data, range_compare);
+    /* Current list element overlaps @data, merge the two */
+    range_extend(l->data, data);
+    g_free(data);
+
+    /* Merge any subsequent list elements that now also overlap */
+    while (l->next && range_compare(l->data, l->next->data) == 0) {
+        GList *new_l;
+
+        range_extend(l->data, l->next->data);
+        g_free(l->next->data);
+        new_l = g_list_delete_link(list, l->next);
+        assert(new_l == list);
     }

     return list;