diff mbox

[v6,05/23] qmp: Fix reference-counting of qnull on empty output visit

Message ID 1448497401-27784-6-git-send-email-eblake@redhat.com
State New
Headers show

Commit Message

Eric Blake Nov. 26, 2015, 12:23 a.m. UTC
Commit 6c2f9a15 ensured that we would not return NULL when the
caller used an output visitor but had nothing to visit. But
in doing so, it added a FIXME about a reference count leak
that could abort qemu in the (unlikely) case of SIZE_MAX such
visits (more plausible on 32-bit).

This fixes things by documenting the internal contracts, and
explaining why the internal function can return NULL and only
the public facing interface needs to worry about qnull(),
thus avoiding over-referencing the qnull_ global object.

It does not, however, fix the stupidity of the stack mixing
up two separate pieces of information; add a FIXME to explain
that issue.

Signed-off-by: Eric Blake <eblake@redhat.com>

---
v6: no change
---
 qapi/qmp-output-visitor.c       | 30 ++++++++++++++++++++++++++++--
 tests/test-qmp-output-visitor.c |  2 ++
 2 files changed, 30 insertions(+), 2 deletions(-)

Comments

Markus Armbruster Nov. 27, 2015, 1:06 p.m. UTC | #1
Eric Blake <eblake@redhat.com> writes:

> Commit 6c2f9a15 ensured that we would not return NULL when the
> caller used an output visitor but had nothing to visit. But
> in doing so, it added a FIXME about a reference count leak
> that could abort qemu in the (unlikely) case of SIZE_MAX such
> visits (more plausible on 32-bit).

The commit message says "We'll want to fix it for real before the
release."  Is this patch for 2.5?  For what it's worth, it applies to
master.

> This fixes things by documenting the internal contracts, and
> explaining why the internal function can return NULL and only
> the public facing interface needs to worry about qnull(),
> thus avoiding over-referencing the qnull_ global object.
>
> It does not, however, fix the stupidity of the stack mixing
> up two separate pieces of information; add a FIXME to explain
> that issue.
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
>
> ---
> v6: no change
> ---
>  qapi/qmp-output-visitor.c       | 30 ++++++++++++++++++++++++++++--
>  tests/test-qmp-output-visitor.c |  2 ++
>  2 files changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/qapi/qmp-output-visitor.c b/qapi/qmp-output-visitor.c
> index e66ab3c..9d0f9d1 100644
> --- a/qapi/qmp-output-visitor.c
> +++ b/qapi/qmp-output-visitor.c
> @@ -29,6 +29,12 @@ typedef QTAILQ_HEAD(QStack, QStackEntry) QStack;
>  struct QmpOutputVisitor
>  {
>      Visitor visitor;
> +    /* FIXME: we are abusing stack to hold two separate pieces of
> +     * information: the current root object, and the stack of objects
> +     * still being built.  Worse, our behavior is inconsistent:
> +     * visiting two top-level scalars in a row discards the first in
> +     * favor of the second, but visiting two top-level objects in a
> +     * row tries to append the second object into the first.  */

Uhh, I'll simply take your word for it.

>      QStack stack;
>  };
>
> @@ -41,10 +47,12 @@ static QmpOutputVisitor *to_qov(Visitor *v)
>      return container_of(v, QmpOutputVisitor, visitor);
>  }
>
> +/* Push @value onto the stack of current QObjects being built */
>  static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value)
>  {
>      QStackEntry *e = g_malloc0(sizeof(*e));
>
> +    assert(value);
>      e->value = value;

Keep in mind for later: we never push a null value on the stack, and
there is no other way to grow the stack.  Therefore, the stack holds
only non-null values.

>      if (qobject_type(e->value) == QTYPE_QLIST) {
>          e->is_list_head = true;
> @@ -52,16 +60,20 @@ static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value)
>      QTAILQ_INSERT_HEAD(&qov->stack, e, node);
>  }
>
> +/* Grab and remove the most recent QObject from the stack */
>  static QObject *qmp_output_pop(QmpOutputVisitor *qov)
>  {
>      QStackEntry *e = QTAILQ_FIRST(&qov->stack);
>      QObject *value;
> +
> +    assert(e);

I figure this assert the stack isn't empty.  Correct?

>      QTAILQ_REMOVE(&qov->stack, e, node);
>      value = e->value;
>      g_free(e);
>      return value;
>  }
>
> +/* Grab the root QObject, if any, in preparation to empty the stack */
>  static QObject *qmp_output_first(QmpOutputVisitor *qov)
>  {
>      QStackEntry *e = QTAILQ_LAST(&qov->stack, QStack);
> @@ -72,24 +84,32 @@ static QObject *qmp_output_first(QmpOutputVisitor *qov)
       /*
        * FIXME Wrong, because qmp_output_get_qobject() will increment
        * the refcnt *again*.  We need to think through how visitors
>       * handle null.
>       */

Sure you don't want to drop the FIXME?

>      if (!e) {

I figure this means the stack is empty.  Now I wish I understood how we
use the stack.
    
> -        return qnull();
> +        /* No root */
> +        return NULL;
>      }
> -
> +    assert(e->value);

Safe because the stack holds only non-null values.

>      return e->value;
>  }

We return null exactly when the stack is empty (whatever that may mean).

>
> +/* Grab the most recent QObject from the stack, which must exist */
>  static QObject *qmp_output_last(QmpOutputVisitor *qov)
>  {
>      QStackEntry *e = QTAILQ_FIRST(&qov->stack);
> +
> +    assert(e);
>      return e->value;
>  }

Can't return null, because the stack holds only non-null values.

assert(e && e->value) to match qmp_output_first()?

>
> +/* Add @value to the current QObject being built.
> + * If the stack is visiting a dictionary or list, @value is now owned
> + * by that container. Otherwise, @value is now the root.  */
>  static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,
>                                 QObject *value)
>  {
>      QObject *cur;
>
>      if (QTAILQ_EMPTY(&qov->stack)) {
> +        /* Stack was empty, track this object as root */
>          qmp_output_push_obj(qov, value);
>          return;
>      }
> @@ -98,13 +118,16 @@ static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,
>
>      switch (qobject_type(cur)) {
>      case QTYPE_QDICT:
> +        assert(name);

Okay, but it really only converts one kind of crash into another one.

>          qdict_put_obj(qobject_to_qdict(cur), name, value);
>          break;
>      case QTYPE_QLIST:
>          qlist_append_obj(qobject_to_qlist(cur), value);
>          break;
>      default:
> +        /* The previous root was a scalar, replace it with a new root */
>          qobject_decref(qmp_output_pop(qov));
> +        assert(QTAILQ_EMPTY(&qov->stack));
>          qmp_output_push_obj(qov, value);
>          break;
>      }

Hand me the bucket.

> @@ -205,11 +228,14 @@ static void qmp_output_type_any(Visitor *v, QObject **obj, const char *name,
>      qmp_output_add_obj(qov, name, *obj);
>  }
>
> +/* Finish building, and return the root object. Will not be NULL. */
>  QObject *qmp_output_get_qobject(QmpOutputVisitor *qov)
>  {
>      QObject *obj = qmp_output_first(qov);
>      if (obj) {

Non-empty stack.

>          qobject_incref(obj);
> +    } else {

Empty stack.

> +        obj = qnull();
>      }
>      return obj;
>  }

Thanks for your comments.  They help a lot with understanding the code,
and they also demonstrate that it's muddled crap %-}

So, how does this contraption work?

A visitor cab encounter NULL only when it visits pointers (d'oh!).
Searching qapi-visit-core.c for **obj finds start_struct(),
start_implicit_struct(), type_str(), type_any().

As far as I can tell, start_implicit_struct() is for unboxed structs, so
NULL must not happen there.  This visitor doesn't implement it anyway.

qmp_output_type_str() substitutes "" for NULL.  Ooookay.

qmp_output_type_any() crashes on NULL.  Can this happen?

qmp_output_start_struct() ignores its obj argument.  My best guess is
that this substitutes an empty dictionary for NULL.

Okay, I give up: how do we get an empty stack and return qnull()?

> diff --git a/tests/test-qmp-output-visitor.c b/tests/test-qmp-output-visitor.c
> index 3078442..8e6fc33 100644
> --- a/tests/test-qmp-output-visitor.c
> +++ b/tests/test-qmp-output-visitor.c
> @@ -461,6 +461,8 @@ static void test_visitor_out_empty(TestOutputVisitorData *data,
>
>      arg = qmp_output_get_qobject(data->qov);
>      g_assert(qobject_type(arg) == QTYPE_QNULL);
> +    /* Check that qnull reference counting is sane */
> +    g_assert(arg->refcnt == 2);
>      qobject_decref(arg);
>  }
Eric Blake Dec. 2, 2015, 11:10 p.m. UTC | #2
On 11/27/2015 06:06 AM, Markus Armbruster wrote:
> Eric Blake <eblake@redhat.com> writes:
> 
>> Commit 6c2f9a15 ensured that we would not return NULL when the
>> caller used an output visitor but had nothing to visit. But
>> in doing so, it added a FIXME about a reference count leak
>> that could abort qemu in the (unlikely) case of SIZE_MAX such
>> visits (more plausible on 32-bit).
> 
> The commit message says "We'll want to fix it for real before the
> release."  Is this patch for 2.5?  For what it's worth, it applies to
> master.

It's a bit late for 2.5 now, and I don't think we'll run into 2^32 uses
of qnull(), so I can live with this waiting until 2.6 and additionally
adding qemu-stable in cc.

> 
>> This fixes things by documenting the internal contracts, and
>> explaining why the internal function can return NULL and only
>> the public facing interface needs to worry about qnull(),
>> thus avoiding over-referencing the qnull_ global object.
>>
>> It does not, however, fix the stupidity of the stack mixing
>> up two separate pieces of information; add a FIXME to explain
>> that issue.
>>
>> Signed-off-by: Eric Blake <eblake@redhat.com>
>>
>> ---
>> v6: no change
>> ---
>>  qapi/qmp-output-visitor.c       | 30 ++++++++++++++++++++++++++++--
>>  tests/test-qmp-output-visitor.c |  2 ++
>>  2 files changed, 30 insertions(+), 2 deletions(-)
>>
>> diff --git a/qapi/qmp-output-visitor.c b/qapi/qmp-output-visitor.c
>> index e66ab3c..9d0f9d1 100644
>> --- a/qapi/qmp-output-visitor.c
>> +++ b/qapi/qmp-output-visitor.c
>> @@ -29,6 +29,12 @@ typedef QTAILQ_HEAD(QStack, QStackEntry) QStack;
>>  struct QmpOutputVisitor
>>  {
>>      Visitor visitor;
>> +    /* FIXME: we are abusing stack to hold two separate pieces of
>> +     * information: the current root object, and the stack of objects
>> +     * still being built.  Worse, our behavior is inconsistent:
>> +     * visiting two top-level scalars in a row discards the first in
>> +     * favor of the second, but visiting two top-level objects in a
>> +     * row tries to append the second object into the first.  */
> 
> Uhh, I'll simply take your word for it.

I clean it up in 6/23, if reviewing that one helps understand the
comment here.

> 
>>      QStack stack;
>>  };
>>
>> @@ -41,10 +47,12 @@ static QmpOutputVisitor *to_qov(Visitor *v)
>>      return container_of(v, QmpOutputVisitor, visitor);
>>  }
>>
>> +/* Push @value onto the stack of current QObjects being built */
>>  static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value)
>>  {
>>      QStackEntry *e = g_malloc0(sizeof(*e));
>>
>> +    assert(value);
>>      e->value = value;
> 
> Keep in mind for later: we never push a null value on the stack, and
> there is no other way to grow the stack.  Therefore, the stack holds
> only non-null values.
> 
>>      if (qobject_type(e->value) == QTYPE_QLIST) {
>>          e->is_list_head = true;
>> @@ -52,16 +60,20 @@ static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value)
>>      QTAILQ_INSERT_HEAD(&qov->stack, e, node);
>>  }
>>
>> +/* Grab and remove the most recent QObject from the stack */
>>  static QObject *qmp_output_pop(QmpOutputVisitor *qov)
>>  {
>>      QStackEntry *e = QTAILQ_FIRST(&qov->stack);
>>      QObject *value;
>> +
>> +    assert(e);
> 
> I figure this assert the stack isn't empty.  Correct?

Yes.

> 
>>      QTAILQ_REMOVE(&qov->stack, e, node);
>>      value = e->value;
>>      g_free(e);
>>      return value;
>>  }
>>
>> +/* Grab the root QObject, if any, in preparation to empty the stack */
>>  static QObject *qmp_output_first(QmpOutputVisitor *qov)
>>  {
>>      QStackEntry *e = QTAILQ_LAST(&qov->stack, QStack);
>> @@ -72,24 +84,32 @@ static QObject *qmp_output_first(QmpOutputVisitor *qov)
>        /*
>         * FIXME Wrong, because qmp_output_get_qobject() will increment
>         * the refcnt *again*.  We need to think through how visitors
>>       * handle null.
>>       */
> 
> Sure you don't want to drop the FIXME?

Oops. I didn't rebase my patch quite correctly (when I first wrote it,
commit 6c2f9a15 wasn't yet in final form).  Yes, the comment is dead now.

> 
>>      if (!e) {
> 
> I figure this means the stack is empty.  Now I wish I understood how we
> use the stack.

The stack is either empty (visit just started) or the head contains a
root element (see qmp_output_add_obj calling qmp_output_push_obj).
Additionally, if the head is a QDict or QList, then the 2nd through
(N+1)th elements are the 1st through Nth incomplete containers still
collecting members.

>     
>> -        return qnull();
>> +        /* No root */
>> +        return NULL;
>>      }
>> -
>> +    assert(e->value);
> 
> Safe because the stack holds only non-null values.
> 
>>      return e->value;
>>  }
> 
> We return null exactly when the stack is empty (whatever that may mean).
> 
>>
>> +/* Grab the most recent QObject from the stack, which must exist */
>>  static QObject *qmp_output_last(QmpOutputVisitor *qov)
>>  {
>>      QStackEntry *e = QTAILQ_FIRST(&qov->stack);
>> +
>> +    assert(e);
>>      return e->value;
>>  }
> 
> Can't return null, because the stack holds only non-null values.
> 
> assert(e && e->value) to match qmp_output_first()?

Can't hurt.

> 
>>
>> +/* Add @value to the current QObject being built.
>> + * If the stack is visiting a dictionary or list, @value is now owned
>> + * by that container. Otherwise, @value is now the root.  */
>>  static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,
>>                                 QObject *value)
>>  {
>>      QObject *cur;
>>
>>      if (QTAILQ_EMPTY(&qov->stack)) {
>> +        /* Stack was empty, track this object as root */
>>          qmp_output_push_obj(qov, value);
>>          return;
>>      }
>> @@ -98,13 +118,16 @@ static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,
>>
>>      switch (qobject_type(cur)) {
>>      case QTYPE_QDICT:
>> +        assert(name);
> 
> Okay, but it really only converts one kind of crash into another one.
> 
>>          qdict_put_obj(qobject_to_qdict(cur), name, value);
>>          break;
>>      case QTYPE_QLIST:
>>          qlist_append_obj(qobject_to_qlist(cur), value);
>>          break;
>>      default:
>> +        /* The previous root was a scalar, replace it with a new root */
>>          qobject_decref(qmp_output_pop(qov));
>> +        assert(QTAILQ_EMPTY(&qov->stack));
>>          qmp_output_push_obj(qov, value);
>>          break;
>>      }
> 
> Hand me the bucket.

Yep, this was the gross abuse of the stack that the FIXME above tries to
point out, and which 6/23 tries to clean up.

> 
>> @@ -205,11 +228,14 @@ static void qmp_output_type_any(Visitor *v, QObject **obj, const char *name,
>>      qmp_output_add_obj(qov, name, *obj);
>>  }
>>
>> +/* Finish building, and return the root object. Will not be NULL. */
>>  QObject *qmp_output_get_qobject(QmpOutputVisitor *qov)
>>  {
>>      QObject *obj = qmp_output_first(qov);
>>      if (obj) {
> 
> Non-empty stack.
> 
>>          qobject_incref(obj);
>> +    } else {
> 
> Empty stack.
> 
>> +        obj = qnull();
>>      }
>>      return obj;
>>  }
> 
> Thanks for your comments.  They help a lot with understanding the code,
> and they also demonstrate that it's muddled crap %-}

Indeed.

> 
> So, how does this contraption work?
> 
> A visitor cab encounter NULL only when it visits pointers (d'oh!).
> Searching qapi-visit-core.c for **obj finds start_struct(),
> start_implicit_struct(), type_str(), type_any().
> 
> As far as I can tell, start_implicit_struct() is for unboxed structs, so
> NULL must not happen there.

You are correct that start_implicit_struct is for unboxed substructs
(the branch of a flat union).  And we should never get here with it
being NULL (although until commit 20/23 of this series, it was because
we are abusing the 'data' member of the union during visit_start_union()).

>  This visitor doesn't implement it anyway.

Well, not yet (but it IS on queue as a patch by Zoltán:
http://repo.or.cz/qemu/ericb.git/commitdiff/9ee9f4ad)

> 
> qmp_output_type_str() substitutes "" for NULL.  Ooookay.

Not the smartest thing we've done, but don't know how hard it would be
to fix (we DO have clients abusing this aspect of qapi that would have
to be cleaned up to quit passing in incomplete qapi structs).  Someday
we may want to actually use qnull() instead of qstring("") (so that the
JSON on the wire would read "name":null instead of "name":"").  But if
we make that change, we would still be inserting a non-NULL QObject onto
the stack.

> 
> qmp_output_type_any() crashes on NULL.  Can this happen?

Again, if the QObject is trying to represent NULL, it does so with
qnull() (a non-null QObject), so we should never pass in NULL.  We
aren't using 'any' very heavily, so I doubt we have any broken clients.

> 
> qmp_output_start_struct() ignores its obj argument.  My best guess is
> that this substitutes an empty dictionary for NULL.

No one should ever be passing in NULL for a QDict, but yes, the code
would create an empty QDict on output if that happened.  Maybe worth
adding an assert; but not in this patch, because I don't know if we have
broken clients.

> 
> Okay, I give up: how do we get an empty stack and return qnull()?

The only way to get an empty stack is to never visit anything in the
first place.  See test-qmp-output-visitor.c:test_visitor_out_empty(),
which creates a visitor with qmp_output_get_qobject() then immediately
asks for the root object even though it didn't call any visit_type_FOO()
to populate a root object.  And my recollection is that back in
September you had an example use of qom-get that could trigger a failure
early enough to run into the empty stack scenario, which is why you
added qnull() in the first place.

> 
>> diff --git a/tests/test-qmp-output-visitor.c b/tests/test-qmp-output-visitor.c
>> index 3078442..8e6fc33 100644
>> --- a/tests/test-qmp-output-visitor.c
>> +++ b/tests/test-qmp-output-visitor.c
>> @@ -461,6 +461,8 @@ static void test_visitor_out_empty(TestOutputVisitorData *data,
>>
>>      arg = qmp_output_get_qobject(data->qov);
>>      g_assert(qobject_type(arg) == QTYPE_QNULL);
>> +    /* Check that qnull reference counting is sane */
>> +    g_assert(arg->refcnt == 2);
>>      qobject_decref(arg);
>>  }
>
Markus Armbruster Dec. 3, 2015, 5:50 p.m. UTC | #3
Eric Blake <eblake@redhat.com> writes:

> On 11/27/2015 06:06 AM, Markus Armbruster wrote:
>> Eric Blake <eblake@redhat.com> writes:
>> 
>>> Commit 6c2f9a15 ensured that we would not return NULL when the
>>> caller used an output visitor but had nothing to visit. But
>>> in doing so, it added a FIXME about a reference count leak
>>> that could abort qemu in the (unlikely) case of SIZE_MAX such
>>> visits (more plausible on 32-bit).
>> 
>> The commit message says "We'll want to fix it for real before the
>> release."  Is this patch for 2.5?  For what it's worth, it applies to
>> master.
>
> It's a bit late for 2.5 now, and I don't think we'll run into 2^32 uses
> of qnull(), so I can live with this waiting until 2.6 and additionally
> adding qemu-stable in cc.

If this patch was simple, I'd try to get it into 2.5.  But my questions
demonstrate it isn't simple, so we'll have to break our promise to "fix
it for real before the release".  *Sigh*

>>> This fixes things by documenting the internal contracts, and
>>> explaining why the internal function can return NULL and only
>>> the public facing interface needs to worry about qnull(),
>>> thus avoiding over-referencing the qnull_ global object.
>>>
>>> It does not, however, fix the stupidity of the stack mixing
>>> up two separate pieces of information; add a FIXME to explain
>>> that issue.
>>>
>>> Signed-off-by: Eric Blake <eblake@redhat.com>
>>>
>>> ---
>>> v6: no change
>>> ---
>>>  qapi/qmp-output-visitor.c       | 30 ++++++++++++++++++++++++++++--
>>>  tests/test-qmp-output-visitor.c |  2 ++
>>>  2 files changed, 30 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/qapi/qmp-output-visitor.c b/qapi/qmp-output-visitor.c
>>> index e66ab3c..9d0f9d1 100644
>>> --- a/qapi/qmp-output-visitor.c
>>> +++ b/qapi/qmp-output-visitor.c
>>> @@ -29,6 +29,12 @@ typedef QTAILQ_HEAD(QStack, QStackEntry) QStack;
>>>  struct QmpOutputVisitor
>>>  {
>>>      Visitor visitor;
>>> +    /* FIXME: we are abusing stack to hold two separate pieces of
>>> +     * information: the current root object, and the stack of objects
>>> +     * still being built.  Worse, our behavior is inconsistent:
>>> +     * visiting two top-level scalars in a row discards the first in
>>> +     * favor of the second, but visiting two top-level objects in a
>>> +     * row tries to append the second object into the first.  */
>> 
>> Uhh, I'll simply take your word for it.
>
> I clean it up in 6/23, if reviewing that one helps understand the
> comment here.
>
>> 
>>>      QStack stack;
>>>  };
>>>
>>> @@ -41,10 +47,12 @@ static QmpOutputVisitor *to_qov(Visitor *v)
>>>      return container_of(v, QmpOutputVisitor, visitor);
>>>  }
>>>
>>> +/* Push @value onto the stack of current QObjects being built */
>>>  static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value)
>>>  {
>>>      QStackEntry *e = g_malloc0(sizeof(*e));
>>>
>>> +    assert(value);
>>>      e->value = value;
>> 
>> Keep in mind for later: we never push a null value on the stack, and
>> there is no other way to grow the stack.  Therefore, the stack holds
>> only non-null values.
>> 
>>>      if (qobject_type(e->value) == QTYPE_QLIST) {
>>>          e->is_list_head = true;
>>> @@ -52,16 +60,20 @@ static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value)
>>>      QTAILQ_INSERT_HEAD(&qov->stack, e, node);
>>>  }
>>>
>>> +/* Grab and remove the most recent QObject from the stack */
>>>  static QObject *qmp_output_pop(QmpOutputVisitor *qov)
>>>  {
>>>      QStackEntry *e = QTAILQ_FIRST(&qov->stack);
>>>      QObject *value;
>>> +
>>> +    assert(e);
>> 
>> I figure this assert the stack isn't empty.  Correct?
>
> Yes.
>
>> 
>>>      QTAILQ_REMOVE(&qov->stack, e, node);
>>>      value = e->value;
>>>      g_free(e);
>>>      return value;
>>>  }
>>>
>>> +/* Grab the root QObject, if any, in preparation to empty the stack */
>>>  static QObject *qmp_output_first(QmpOutputVisitor *qov)
>>>  {
>>>      QStackEntry *e = QTAILQ_LAST(&qov->stack, QStack);
>>> @@ -72,24 +84,32 @@ static QObject *qmp_output_first(QmpOutputVisitor *qov)
>>        /*
>>         * FIXME Wrong, because qmp_output_get_qobject() will increment
>>         * the refcnt *again*.  We need to think through how visitors
>>>       * handle null.
>>>       */
>> 
>> Sure you don't want to drop the FIXME?
>
> Oops. I didn't rebase my patch quite correctly (when I first wrote it,
> commit 6c2f9a15 wasn't yet in final form).  Yes, the comment is dead now.
>
>> 
>>>      if (!e) {
>> 
>> I figure this means the stack is empty.  Now I wish I understood how we
>> use the stack.
>
> The stack is either empty (visit just started) or the head contains a
> root element (see qmp_output_add_obj calling qmp_output_push_obj).
> Additionally, if the head is a QDict or QList, then the 2nd through
> (N+1)th elements are the 1st through Nth incomplete containers still
> collecting members.

Explanation should go into a comment.  It's okay to add it only after
you rework the use of the stack if the current use defies description.

>>> -        return qnull();
>>> +        /* No root */
>>> +        return NULL;
>>>      }
>>> -
>>> +    assert(e->value);
>> 
>> Safe because the stack holds only non-null values.
>> 
>>>      return e->value;
>>>  }
>> 
>> We return null exactly when the stack is empty (whatever that may mean).

According to your explanation above, it means "nothing has been visited,
yet".

>>> +/* Grab the most recent QObject from the stack, which must exist */
>>>  static QObject *qmp_output_last(QmpOutputVisitor *qov)
>>>  {
>>>      QStackEntry *e = QTAILQ_FIRST(&qov->stack);
>>> +
>>> +    assert(e);
>>>      return e->value;
>>>  }
>> 
>> Can't return null, because the stack holds only non-null values.
>> 
>> assert(e && e->value) to match qmp_output_first()?
>
> Can't hurt.
>
>> 
>>>
>>> +/* Add @value to the current QObject being built.
>>> + * If the stack is visiting a dictionary or list, @value is now owned
>>> + * by that container. Otherwise, @value is now the root.  */
>>>  static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,
>>>                                 QObject *value)
>>>  {
>>>      QObject *cur;
>>>
>>>      if (QTAILQ_EMPTY(&qov->stack)) {
>>> +        /* Stack was empty, track this object as root */
>>>          qmp_output_push_obj(qov, value);
>>>          return;
>>>      }
>>> @@ -98,13 +118,16 @@ static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,
>>>
>>>      switch (qobject_type(cur)) {
>>>      case QTYPE_QDICT:
>>> +        assert(name);
>> 
>> Okay, but it really only converts one kind of crash into another one.
>> 
>>>          qdict_put_obj(qobject_to_qdict(cur), name, value);
>>>          break;
>>>      case QTYPE_QLIST:
>>>          qlist_append_obj(qobject_to_qlist(cur), value);
>>>          break;
>>>      default:
>>> +        /* The previous root was a scalar, replace it with a new root */
>>>          qobject_decref(qmp_output_pop(qov));
>>> +        assert(QTAILQ_EMPTY(&qov->stack));
>>>          qmp_output_push_obj(qov, value);
>>>          break;
>>>      }
>> 
>> Hand me the bucket.
>
> Yep, this was the gross abuse of the stack that the FIXME above tries to
> point out, and which 6/23 tries to clean up.
>
>> 
>>> @@ -205,11 +228,14 @@ static void qmp_output_type_any(Visitor *v, QObject **obj, const char *name,
>>>      qmp_output_add_obj(qov, name, *obj);
>>>  }
>>>
>>> +/* Finish building, and return the root object. Will not be NULL. */
>>>  QObject *qmp_output_get_qobject(QmpOutputVisitor *qov)
>>>  {
>>>      QObject *obj = qmp_output_first(qov);
>>>      if (obj) {
>> 
>> Non-empty stack.
>> 
>>>          qobject_incref(obj);
>>> +    } else {
>> 
>> Empty stack.
>> 
>>> +        obj = qnull();
>>>      }
>>>      return obj;
>>>  }
>> 
>> Thanks for your comments.  They help a lot with understanding the code,
>> and they also demonstrate that it's muddled crap %-}
>
> Indeed.
>
>> 
>> So, how does this contraption work?
>> 
>> A visitor cab encounter NULL only when it visits pointers (d'oh!).
>> Searching qapi-visit-core.c for **obj finds start_struct(),
>> start_implicit_struct(), type_str(), type_any().
>> 
>> As far as I can tell, start_implicit_struct() is for unboxed structs, so
>> NULL must not happen there.
>
> You are correct that start_implicit_struct is for unboxed substructs
> (the branch of a flat union).  And we should never get here with it
> being NULL (although until commit 20/23 of this series, it was because
> we are abusing the 'data' member of the union during visit_start_union()).

We should spell out "*obj isn't null" in visit_start_implicit_struct()'s
contract.  I'd assert it for good measure.

>>  This visitor doesn't implement it anyway.
>
> Well, not yet (but it IS on queue as a patch by Zoltán:
> http://repo.or.cz/qemu/ericb.git/commitdiff/9ee9f4ad)

That's opts-visitor.c.  This is qmp-output-visitor.c.

>> qmp_output_type_str() substitutes "" for NULL.  Ooookay.
>
> Not the smartest thing we've done, but don't know how hard it would be
> to fix (we DO have clients abusing this aspect of qapi that would have
> to be cleaned up to quit passing in incomplete qapi structs).

Disgusting :)

>                                                                Someday
> we may want to actually use qnull() instead of qstring("") (so that the
> JSON on the wire would read "name":null instead of "name":"").

QAPI's treatment of null is unsatisfactory.  Initially, QAPI ignored
null at the surface, and hacked around it below the surface (mapping C
NULL to "" is such a hack).  Since then, we added partial support for
null, and we still have all the hacks.  Needs a thorough rethink, but
not in this thread.

>                                                                 But if
> we make that change, we would still be inserting a non-NULL QObject onto
> the stack.

Yes.

We created qnull() instead of representing null as a null pointer
because we were afraid of violating unstated assumptions in crap code
like this.

>> qmp_output_type_any() crashes on NULL.  Can this happen?
>
> Again, if the QObject is trying to represent NULL, it does so with
> qnull() (a non-null QObject), so we should never pass in NULL.  We
> aren't using 'any' very heavily, so I doubt we have any broken clients.

A visit_type_FOO() visits a variable whose type is the C representation
of a FOOish QAPI type, and its obj parameter is a pointer to the C
representation.

With an input visitor, it writes to the visited variable, and with an
output visitor, it reads from it.

Example: visit_type_int64() visits an int64_t, and the obj parameter is
int64_t *.  Input visitors can write any int64_t value, and output
visitors can cope with any int64_t value.

Example: visit_type_any() visits a QObject *, and the obj parameter is
Object **.  Input visitors must not write a null pointer, and output
visitors need not cope with a null pointer.  In fact,
qmp_output_type_any() crashes on null pointer:

    #0  0x0000555555b44294 in qobject_type (obj=0x0)
        at /work/armbru/qemu/include/qapi/qmp/qobject.h:106
    #1  0x0000555555b4432d in qmp_output_push_obj (qov=0x555557495470, value=0x0)
        at /work/armbru/qemu/qapi/qmp-output-visitor.c:49
    #2  0x0000555555b444c8 in qmp_output_add_obj (qov=
        0x555557495470, name=0x555555b9e772 "unused", value=0x0)
        at /work/armbru/qemu/qapi/qmp-output-visitor.c:94
    #3  0x0000555555b448e1 in qmp_output_type_any (v=
        0x555557495470, obj=0x7fffffffc878, name=0x555555b9e772 "unused", errp=0x7fffffffc888) at /work/armbru/qemu/qapi/qmp-output-visitor.c:199

Feeding data that was built with an input visitor to an output visitor
is safe: no null pointers.

Feeding arbitrary data isn't: we crash on null.

For now, I think we should simply spell out the restrictions in
visit_type_any()'s contract.  We should revisit it (haha) when we
rethink null in QAPI.

>> qmp_output_start_struct() ignores its obj argument.  My best guess is
>> that this substitutes an empty dictionary for NULL.
>
> No one should ever be passing in NULL for a QDict, but yes, the code
> would create an empty QDict on output if that happened.  Maybe worth
> adding an assert; but not in this patch, because I don't know if we have
> broken clients.

qmp_output_start_struct() starts the visit of a variable whose type is
the C representation of a structured QAPI type, i.e. a pointer to the
generated C type.  The obj parameter is a pointer to this pointer, thus
void **.  Input visitors must not write a null pointer, and output
visitors need not cope with a null pointer.  qmp_output_start_struct()
chooses to cope: it substitutes an empty QDict.  Doesn't crash, but
violates the schema instead, hurrah.

Again, let's simply spell out the mess in the contract for now.

>> Okay, I give up: how do we get an empty stack and return qnull()?
>
> The only way to get an empty stack is to never visit anything in the
> first place.  See test-qmp-output-visitor.c:test_visitor_out_empty(),
> which creates a visitor with qmp_output_get_qobject() then immediately
> asks for the root object even though it didn't call any visit_type_FOO()
> to populate a root object.  And my recollection is that back in
> September you had an example use of qom-get that could trigger a failure
> early enough to run into the empty stack scenario, which is why you
> added qnull() in the first place.

Why would anyone create a QMP output visitor, visit nothing, then expect
the visitor to yield a value?  Smells funny, so let's find out more.

Running make check with suitably instrumented code finds exactly one
place: the getter for "spapr-dr-connector" property "fdt"
prop_get_fdt().  It gets exercised with QMP commands like

    { "execute": "qom-get",
      "arguments": {
          "path": "/machine/unattached/device[6]/dr-connector[1073741934]",
          "property": "fdt" } }

Let's find out how prop_get_fdt() works.

If sPAPRDRConnector member fdt is null, it returns without doing
anything.  The output visitor pulls a null out of its hat then.

Else, we iterate over the flattened device tree until the nesting level
goes back to zero:

* On FDT_BEGIN_NODE, visit_start_struct(), increment nesting level

* On FDT_END_NODE, visit_end_struct(), decrement nesting level

* On FDT_PROP, visit a list of uint8_t.  I guess the list could be
  empty.

The loop can produce a recursive structure

    T = list of uint8_t
      | struct where all members are T

Whether "just a list" can happen I can't say; the code lacks assertions
or comments.  Likewise, I can't say whether the structs can ever be
empty.

Taken together, prop_get_fdt() returns null | T.

Note that the value's structure is *dynamic*.  The best a statically
defined QAPI schema can do to describe it is 'any'.

qom-get is of course declared to return 'any', but so far I had thought
it was because declaring a strongly typed qom-get was impractical.  This
property pushes it from impractical to impossible.  Not sure that's a
good idea.  But I digress.

Is null the appropriate value of qom-get when there is no fdt?

It looks accidental:

* Initially, attempting to qmp_output_get_qobject() without having
  visited anything was *wrong* and crashed.

* Commit 1d10b44 (v2.1.0) papered over the crash by making it return
  NULL for empty stacks.  I'm not sure why Marcel did that back then.  I
  guess he somehow ran into the crash, but I can't see how.

  Returning NULL is problematic, because QObject * generally isn't
  supposed to be NULL.  Some places cope with it anyway.  The QMP core
  is such a place: it arbitrarily substitutes an empty dictionary for
  NULL.

* When this device was created in commit bbf5c87 (v2.4.0),
  prop_get_fdt() returning without doing anything should've made qom-get
  return the empty dictionary.  "Should've", because my attempts to
  create the device fail.  I tried with commit 6c2f9a15^ instead, and
  can see it return {} there.

* Commit 6c2f9a1 (not yet released) exchanges the paper we use to paper
  over the crash: return qnull() for empty stacks.  The commit message
  explains my reasons back then.  What we didn't see back then is that
  it changes this property's value.  I'm trying to get that change
  reverted for 2.5, by fixing the visitor abuse, i.e. do a proper empty
  struct visit.

Back to the QMP output visitor: is asking the QMP output visitor for the
visit's value when you haven't visited anything a sane thing to do?  I
doubt it.  I think we should go back to the initial behavior, and find
and fix the code that misuses visitors that way.

>>> diff --git a/tests/test-qmp-output-visitor.c b/tests/test-qmp-output-visitor.c
>>> index 3078442..8e6fc33 100644
>>> --- a/tests/test-qmp-output-visitor.c
>>> +++ b/tests/test-qmp-output-visitor.c
>>> @@ -461,6 +461,8 @@ static void test_visitor_out_empty(TestOutputVisitorData *data,
>>>
>>>      arg = qmp_output_get_qobject(data->qov);
>>>      g_assert(qobject_type(arg) == QTYPE_QNULL);
>>> +    /* Check that qnull reference counting is sane */
>>> +    g_assert(arg->refcnt == 2);
>>>      qobject_decref(arg);
>>>  }
>>
Eric Blake Dec. 4, 2015, 3:01 a.m. UTC | #4
On 12/03/2015 10:50 AM, Markus Armbruster wrote:

>>> So, how does this contraption work?
>>>
>>> A visitor cab encounter NULL only when it visits pointers (d'oh!).
>>> Searching qapi-visit-core.c for **obj finds start_struct(),
>>> start_implicit_struct(), type_str(), type_any().
>>>
>>> As far as I can tell, start_implicit_struct() is for unboxed structs, so
>>> NULL must not happen there.
>>
>> You are correct that start_implicit_struct is for unboxed substructs
>> (the branch of a flat union).  And we should never get here with it
>> being NULL (although until commit 20/23 of this series, it was because
>> we are abusing the 'data' member of the union during visit_start_union()).
> 
> We should spell out "*obj isn't null" in visit_start_implicit_struct()'s
> contract.  I'd assert it for good measure.

I've spent the better part of my afternoon playing with asserts to see
what works and what doesn't.  So far, I've learned:

We have input visitors that allow visit_start_struct(v, NULL, ...) (see
vl.c's use of opts-visitor).  This is done when we want to parse from
some other format into what qapi would accept by hierarchical
visit_type_FOO(), but don't actually have a qapi type to parse into.

We have output visitors that allow visit_start_struct(v, NULL, ...)
(most uses of visit_start outside of generated qapi-visit).  This is
done when we want to create certain output but don't have a qapi type,
so we instead are visiting types by hand.

We are inconsistent on how we pass things.  I think that ALL callers of
visit_start_struct() should pass one of two sets:
 valid type name, sizeof(that type), non-NULL obj (where input visitors
will do *obj = g_new0(size) if no other error is present; and where
output visitors should have an already-complete *obj)
or:
 NULL type name, size 0, NULL obj
But we aren't there yet - several clients have to be tweaked.

I'm also considering changing the signature of visit_start_struct() to
bundle the type name next to the type size (the current code inserts the
dictionary key name in between the two, making life a bit more awkward).

>>> qmp_output_type_any() crashes on NULL.  Can this happen?
>>
>> Again, if the QObject is trying to represent NULL, it does so with
>> qnull() (a non-null QObject), so we should never pass in NULL.  We
>> aren't using 'any' very heavily, so I doubt we have any broken clients.

And I argued in the other thread that the spapr prop_get_fdt() abuse
should probably explicitly use visit_type_any() with qnull() as its
subject, instead of relying on no visit at all, if it turns out that we
like 'null' for missing fdt as distinct from an fdt that is present but
empty.

> 
> A visit_type_FOO() visits a variable whose type is the C representation
> of a FOOish QAPI type, and its obj parameter is a pointer to the C
> representation.

Or NULL when there is no qapi type, and we are just using the
visit_type_* to parse or output things manually without the intermediate
qapi representation.


> For now, I think we should simply spell out the restrictions in
> visit_type_any()'s contract.  We should revisit it (haha) when we
> rethink null in QAPI.

Yes, this thread has given me ideas on how to improve my 7/23 patch
documentation of the visitor interfaces.

> Again, let's simply spell out the mess in the contract for now.

Or since we're (soon) at the start of 2.6 development, add some asserts,
with the ability to fix bugs or revert the asserts closer to that
release as needed.


> Taken together, prop_get_fdt() returns null | T.
> 
> Note that the value's structure is *dynamic*.  The best a statically
> defined QAPI schema can do to describe it is 'any'.

Or possibly an alternate.

> Back to the QMP output visitor: is asking the QMP output visitor for the
> visit's value when you haven't visited anything a sane thing to do?  I
> doubt it.  I think we should go back to the initial behavior, and find
> and fix the code that misuses visitors that way.

Sounds like adding assertions and fixing up fallout is worth attempting,
then.
diff mbox

Patch

diff --git a/qapi/qmp-output-visitor.c b/qapi/qmp-output-visitor.c
index e66ab3c..9d0f9d1 100644
--- a/qapi/qmp-output-visitor.c
+++ b/qapi/qmp-output-visitor.c
@@ -29,6 +29,12 @@  typedef QTAILQ_HEAD(QStack, QStackEntry) QStack;
 struct QmpOutputVisitor
 {
     Visitor visitor;
+    /* FIXME: we are abusing stack to hold two separate pieces of
+     * information: the current root object, and the stack of objects
+     * still being built.  Worse, our behavior is inconsistent:
+     * visiting two top-level scalars in a row discards the first in
+     * favor of the second, but visiting two top-level objects in a
+     * row tries to append the second object into the first.  */
     QStack stack;
 };

@@ -41,10 +47,12 @@  static QmpOutputVisitor *to_qov(Visitor *v)
     return container_of(v, QmpOutputVisitor, visitor);
 }

+/* Push @value onto the stack of current QObjects being built */
 static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value)
 {
     QStackEntry *e = g_malloc0(sizeof(*e));

+    assert(value);
     e->value = value;
     if (qobject_type(e->value) == QTYPE_QLIST) {
         e->is_list_head = true;
@@ -52,16 +60,20 @@  static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value)
     QTAILQ_INSERT_HEAD(&qov->stack, e, node);
 }

+/* Grab and remove the most recent QObject from the stack */
 static QObject *qmp_output_pop(QmpOutputVisitor *qov)
 {
     QStackEntry *e = QTAILQ_FIRST(&qov->stack);
     QObject *value;
+
+    assert(e);
     QTAILQ_REMOVE(&qov->stack, e, node);
     value = e->value;
     g_free(e);
     return value;
 }

+/* Grab the root QObject, if any, in preparation to empty the stack */
 static QObject *qmp_output_first(QmpOutputVisitor *qov)
 {
     QStackEntry *e = QTAILQ_LAST(&qov->stack, QStack);
@@ -72,24 +84,32 @@  static QObject *qmp_output_first(QmpOutputVisitor *qov)
      * handle null.
      */
     if (!e) {
-        return qnull();
+        /* No root */
+        return NULL;
     }
-
+    assert(e->value);
     return e->value;
 }

+/* Grab the most recent QObject from the stack, which must exist */
 static QObject *qmp_output_last(QmpOutputVisitor *qov)
 {
     QStackEntry *e = QTAILQ_FIRST(&qov->stack);
+
+    assert(e);
     return e->value;
 }

+/* Add @value to the current QObject being built.
+ * If the stack is visiting a dictionary or list, @value is now owned
+ * by that container. Otherwise, @value is now the root.  */
 static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,
                                QObject *value)
 {
     QObject *cur;

     if (QTAILQ_EMPTY(&qov->stack)) {
+        /* Stack was empty, track this object as root */
         qmp_output_push_obj(qov, value);
         return;
     }
@@ -98,13 +118,16 @@  static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,

     switch (qobject_type(cur)) {
     case QTYPE_QDICT:
+        assert(name);
         qdict_put_obj(qobject_to_qdict(cur), name, value);
         break;
     case QTYPE_QLIST:
         qlist_append_obj(qobject_to_qlist(cur), value);
         break;
     default:
+        /* The previous root was a scalar, replace it with a new root */
         qobject_decref(qmp_output_pop(qov));
+        assert(QTAILQ_EMPTY(&qov->stack));
         qmp_output_push_obj(qov, value);
         break;
     }
@@ -205,11 +228,14 @@  static void qmp_output_type_any(Visitor *v, QObject **obj, const char *name,
     qmp_output_add_obj(qov, name, *obj);
 }

+/* Finish building, and return the root object. Will not be NULL. */
 QObject *qmp_output_get_qobject(QmpOutputVisitor *qov)
 {
     QObject *obj = qmp_output_first(qov);
     if (obj) {
         qobject_incref(obj);
+    } else {
+        obj = qnull();
     }
     return obj;
 }
diff --git a/tests/test-qmp-output-visitor.c b/tests/test-qmp-output-visitor.c
index 3078442..8e6fc33 100644
--- a/tests/test-qmp-output-visitor.c
+++ b/tests/test-qmp-output-visitor.c
@@ -461,6 +461,8 @@  static void test_visitor_out_empty(TestOutputVisitorData *data,

     arg = qmp_output_get_qobject(data->qov);
     g_assert(qobject_type(arg) == QTYPE_QNULL);
+    /* Check that qnull reference counting is sane */
+    g_assert(arg->refcnt == 2);
     qobject_decref(arg);
 }