diff mbox

[3/8] Add QBuffer

Message ID 6e14cbfe3764b46d9bd6d2db61d41fd9c85dd54e.1273843151.git.jan.kiszka@siemens.com
State New
Headers show

Commit Message

Jan Kiszka May 14, 2010, 1:20 p.m. UTC
This introduces a buffer object for use with QMP. As a buffer is not
natively encodable in JSON, we encode it as a base64 string. To decode
this kind of strings back to a QBuffer, the receiving side has to be
aware of their semantic, which is normally no problem within QMP.

The first use case of this type is pushing the content of buffers that
are part of a device state into a qdict.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 Makefile        |    3 +-
 Makefile.objs   |    2 +-
 check-qbuffer.c |  172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 configure       |    2 +-
 qbuffer.c       |  116 +++++++++++++++++++++++++++++++++++++
 qbuffer.h       |   33 +++++++++++
 qjson.c         |   16 +++++
 qobject.h       |    1 +
 8 files changed, 342 insertions(+), 3 deletions(-)
 create mode 100644 check-qbuffer.c
 create mode 100644 qbuffer.c
 create mode 100644 qbuffer.h

Comments

Anthony Liguori May 14, 2010, 6:15 p.m. UTC | #1
On 05/14/2010 08:20 AM, Jan Kiszka wrote:
> diff --git a/qjson.c b/qjson.c
> index 483c667..4d1c21a 100644
> --- a/qjson.c
> +++ b/qjson.c
> @@ -19,7 +19,9 @@
>   #include "qlist.h"
>   #include "qbool.h"
>   #include "qfloat.h"
> +#include "qbuffer.h"
>   #include "qdict.h"
> +#include "base64.h"
>
>   typedef struct JSONParsingState
>   {
> @@ -235,6 +237,20 @@ static void to_json(const QObject *obj, QString *str)
>           }
>           break;
>       }
> +    case QTYPE_QBUFFER: {
> +        QBuffer *val = qobject_to_qbuffer(obj);
> +        size_t data_size = qbuffer_get_size(val);
> +        size_t str_len = ((data_size + 2) / 3) * 4;
> +        char *buffer = qemu_malloc(str_len + 3);
> +
> +        buffer[0] = '"';
> +        base64_encode(qbuffer_get_data(val), data_size, buffer + 1);
> +        buffer[str_len + 1] = '"';
> +        buffer[str_len + 2] = 0;
> +        qstring_append(str, buffer);
> +        qemu_free(buffer);
> +        break;
> +    }
>    

Instead of encoding just as a string, it would be a good idea to encode 
it as something like:

{'__class__': 'base64', 'data': ...}

We've discussed using hidden properties to describe special things like 
abstract classes and since we already have this namespace reserved, I 
think it's a good time to use it.

The advantage is that in a dynamic language like Python, the parser can 
convert base64 to binary strings automatically without having to 
understand the QMP protocol.

Regards,

Anthony Liguori

>       case QTYPE_QERROR:
>           /* XXX: should QError be emitted? */
>       case QTYPE_NONE:
> diff --git a/qobject.h b/qobject.h
> index 07de211..45c4fa0 100644
> --- a/qobject.h
> +++ b/qobject.h
> @@ -44,6 +44,7 @@ typedef enum {
>       QTYPE_QFLOAT,
>       QTYPE_QBOOL,
>       QTYPE_QERROR,
> +    QTYPE_QBUFFER,
>   } qtype_code;
>
>   struct QObject;
>
Jan Kiszka May 15, 2010, 8:45 a.m. UTC | #2
Anthony Liguori wrote:
> On 05/14/2010 08:20 AM, Jan Kiszka wrote:
>> diff --git a/qjson.c b/qjson.c
>> index 483c667..4d1c21a 100644
>> --- a/qjson.c
>> +++ b/qjson.c
>> @@ -19,7 +19,9 @@
>>   #include "qlist.h"
>>   #include "qbool.h"
>>   #include "qfloat.h"
>> +#include "qbuffer.h"
>>   #include "qdict.h"
>> +#include "base64.h"
>>
>>   typedef struct JSONParsingState
>>   {
>> @@ -235,6 +237,20 @@ static void to_json(const QObject *obj, QString
>> *str)
>>           }
>>           break;
>>       }
>> +    case QTYPE_QBUFFER: {
>> +        QBuffer *val = qobject_to_qbuffer(obj);
>> +        size_t data_size = qbuffer_get_size(val);
>> +        size_t str_len = ((data_size + 2) / 3) * 4;
>> +        char *buffer = qemu_malloc(str_len + 3);
>> +
>> +        buffer[0] = '"';
>> +        base64_encode(qbuffer_get_data(val), data_size, buffer + 1);
>> +        buffer[str_len + 1] = '"';
>> +        buffer[str_len + 2] = 0;
>> +        qstring_append(str, buffer);
>> +        qemu_free(buffer);
>> +        break;
>> +    }
>>    
> 
> Instead of encoding just as a string, it would be a good idea to encode
> it as something like:
> 
> {'__class__': 'base64', 'data': ...}
> 
> We've discussed using hidden properties to describe special things like
> abstract classes and since we already have this namespace reserved, I
> think it's a good time to use it.
> 
> The advantage is that in a dynamic language like Python, the parser can
> convert base64 to binary strings automatically without having to
> understand the QMP protocol.

Indeed, was amazingly simple to add and works nicely with qmp-shell as
demonstrator. Will repost, also to fix a few remaining glitches I came
across in the meantime.

Jan
Avi Kivity May 15, 2010, 8:49 a.m. UTC | #3
On 05/15/2010 11:45 AM, Jan Kiszka wrote:
>
>> Instead of encoding just as a string, it would be a good idea to encode
>> it as something like:
>>
>> {'__class__': 'base64', 'data': ...}
>>
>> We've discussed using hidden properties to describe special things like
>> abstract classes and since we already have this namespace reserved, I
>> think it's a good time to use it.
>>
>> The advantage is that in a dynamic language like Python, the parser can
>> convert base64 to binary strings automatically without having to
>> understand the QMP protocol.
>>      
> Indeed, was amazingly simple to add and works nicely with qmp-shell as
> demonstrator. Will repost, also to fix a few remaining glitches I came
> across in the meantime.
>
>    

Is this __class__ stuff documented anywhere?
Jan Kiszka May 15, 2010, 8:59 a.m. UTC | #4
Avi Kivity wrote:
> On 05/15/2010 11:45 AM, Jan Kiszka wrote:
>>
>>> Instead of encoding just as a string, it would be a good idea to encode
>>> it as something like:
>>>
>>> {'__class__': 'base64', 'data': ...}
>>>
>>> We've discussed using hidden properties to describe special things like
>>> abstract classes and since we already have this namespace reserved, I
>>> think it's a good time to use it.
>>>
>>> The advantage is that in a dynamic language like Python, the parser can
>>> convert base64 to binary strings automatically without having to
>>> understand the QMP protocol.
>>>      
>> Indeed, was amazingly simple to add and works nicely with qmp-shell as
>> demonstrator. Will repost, also to fix a few remaining glitches I came
>> across in the meantime.
>>
>>    
> 
> Is this __class__ stuff documented anywhere?
> 

Not yet. Also, we should clarify the proposed private extension section
that only "__some_key" is reserved for downstream, not
'__some_other_key__' (i.e. downstream names must not end with '__').

Jan
Avi Kivity May 15, 2010, 5:31 p.m. UTC | #5
On 05/15/2010 11:59 AM, Jan Kiszka wrote:
>
>> Is this __class__ stuff documented anywhere?
>>
>>      
> Not yet. Also, we should clarify the proposed private extension section
> that only "__some_key" is reserved for downstream, not
> '__some_other_key__' (i.e. downstream names must not end with '__').
>
>    

Why use such weird names at all?  What's wrong with 'class'?
Paolo Bonzini May 16, 2010, 9:37 a.m. UTC | #6
On 05/15/2010 07:31 PM, Avi Kivity wrote:
> On 05/15/2010 11:59 AM, Jan Kiszka wrote:
>>
>>> Is this __class__ stuff documented anywhere?
>>>
>> Not yet. Also, we should clarify the proposed private extension section
>> that only "__some_key" is reserved for downstream, not
>> '__some_other_key__' (i.e. downstream names must not end with '__').
>>
>
> Why use such weird names at all? What's wrong with 'class'?

That it conflicts with e.g. PCI classes?

Paolo
Avi Kivity May 16, 2010, 9:50 a.m. UTC | #7
On 05/16/2010 12:37 PM, Paolo Bonzini wrote:
> On 05/15/2010 07:31 PM, Avi Kivity wrote:
>> On 05/15/2010 11:59 AM, Jan Kiszka wrote:
>>>
>>>> Is this __class__ stuff documented anywhere?
>>>>
>>> Not yet. Also, we should clarify the proposed private extension section
>>> that only "__some_key" is reserved for downstream, not
>>> '__some_other_key__' (i.e. downstream names must not end with '__').
>>>
>>
>> Why use such weird names at all? What's wrong with 'class'?
>
> That it conflicts with e.g. PCI classes?

Won't the context tell it apart?  When you expect a pci function, 
'class': 'video' means one thing, when you read a buffer it means another.

The only reason to use a special name is if it's a protocol level 
feature that can happen in all contexts.  But in that case we're better 
off with a schema, we don't want to push class descriptors everywhere.
Jan Kiszka May 16, 2010, 10:04 a.m. UTC | #8
Avi Kivity wrote:
> On 05/15/2010 11:59 AM, Jan Kiszka wrote:
>>
>>> Is this __class__ stuff documented anywhere?
>>>
>>>      
>> Not yet. Also, we should clarify the proposed private extension section
>> that only "__some_key" is reserved for downstream, not
>> '__some_other_key__' (i.e. downstream names must not end with '__').
>>
>>    
> 
> Why use such weird names at all?  What's wrong with 'class'?

It's too generic and we may too easily create dict keys that will be
misinterpreted.

Jan
Jan Kiszka May 16, 2010, 10:15 a.m. UTC | #9
Avi Kivity wrote:
> On 05/16/2010 12:37 PM, Paolo Bonzini wrote:
>> On 05/15/2010 07:31 PM, Avi Kivity wrote:
>>> On 05/15/2010 11:59 AM, Jan Kiszka wrote:
>>>>
>>>>> Is this __class__ stuff documented anywhere?
>>>>>
>>>> Not yet. Also, we should clarify the proposed private extension section
>>>> that only "__some_key" is reserved for downstream, not
>>>> '__some_other_key__' (i.e. downstream names must not end with '__').
>>>>
>>>
>>> Why use such weird names at all? What's wrong with 'class'?
>>
>> That it conflicts with e.g. PCI classes?
> 
> Won't the context tell it apart?  When you expect a pci function,
> 'class': 'video' means one thing, when you read a buffer it means another.

The point is to make this notation context-independent so that you can do

    def __json_obj_hook(self, dct):
        if '__class__' in dct:
            if dct['__class__'] == 'buffer':
                return base64.b64decode(dct['data'])
            else:
                return
        return dct

and

        line = json.loads(self.sockfile.readline(),
                          object_hook=self.__json_obj_hook)

i.e. parse the QMP stream into a proper representation without known QMP
at all.

> 
> The only reason to use a special name is if it's a protocol level
> feature that can happen in all contexts.  But in that case we're better
> off with a schema, we don't want to push class descriptors everywhere.
> 

Not everywhere, just into those nodes that aren't expressible with
native JSON types.

Jan
Paolo Bonzini May 16, 2010, 10:16 a.m. UTC | #10
On 05/16/2010 11:50 AM, Avi Kivity wrote:
> On 05/16/2010 12:37 PM, Paolo Bonzini wrote:
>> On 05/15/2010 07:31 PM, Avi Kivity wrote:
>>> On 05/15/2010 11:59 AM, Jan Kiszka wrote:
>>>>
>>>>> Is this __class__ stuff documented anywhere?
>>>>>
>>>> Not yet. Also, we should clarify the proposed private extension section
>>>> that only "__some_key" is reserved for downstream, not
>>>> '__some_other_key__' (i.e. downstream names must not end with '__').
>>>>
>>>
>>> Why use such weird names at all? What's wrong with 'class'?
>>
>> That it conflicts with e.g. PCI classes?
>
> Won't the context tell it apart?

Yes, of course, it you need to know the schema.  If you don't know the 
schema you don't know the context.

This QBuffer thing is something that a client QMP library could create 
automatically.  Keys in a separate namespace (like '__class__') have the 
advantage of being easily picked up automatically by a wrapper of the 
JSON parser; if you used simply 'class' such as layer would need to know 
a schema, or it wouldn't know that "context".

(BTW I'd prefer something like '__encoding__'; the word "class" suggests 
much more than what it is in reality).

Paolo
Avi Kivity May 16, 2010, 10:49 a.m. UTC | #11
On 05/16/2010 01:16 PM, Paolo Bonzini wrote:
> On 05/16/2010 11:50 AM, Avi Kivity wrote:
>> On 05/16/2010 12:37 PM, Paolo Bonzini wrote:
>>> On 05/15/2010 07:31 PM, Avi Kivity wrote:
>>>> On 05/15/2010 11:59 AM, Jan Kiszka wrote:
>>>>>
>>>>>> Is this __class__ stuff documented anywhere?
>>>>>>
>>>>> Not yet. Also, we should clarify the proposed private extension 
>>>>> section
>>>>> that only "__some_key" is reserved for downstream, not
>>>>> '__some_other_key__' (i.e. downstream names must not end with '__').
>>>>>
>>>>
>>>> Why use such weird names at all? What's wrong with 'class'?
>>>
>>> That it conflicts with e.g. PCI classes?
>>
>> Won't the context tell it apart?
>
> Yes, of course, it you need to know the schema.  If you don't know the 
> schema you don't know the context.
>
> This QBuffer thing is something that a client QMP library could create 
> automatically.  Keys in a separate namespace (like '__class__') have 
> the advantage of being easily picked up automatically by a wrapper of 
> the JSON parser; if you used simply 'class' such as layer would need 
> to know a schema, or it wouldn't know that "context".

Makes sense.  So this is a protocol feature and needs to be documented 
as such.

> (BTW I'd prefer something like '__encoding__'; the word "class" 
> suggests much more than what it is in reality).

Agreed.
Jamie Lokier May 16, 2010, 5:38 p.m. UTC | #12
Anthony Liguori wrote:
> Instead of encoding just as a string, it would be a good idea to encode 
> it as something like:
> 
> {'__class__': 'base64', 'data': ...}

Is there a benefit to the class indirection, over simply a keyword?:

{'__base64__': ...}

__class__ seems to suggest much more than it's being used for here.

-- Jamie
Jan Kiszka May 16, 2010, 6:03 p.m. UTC | #13
Jamie Lokier wrote:
> Anthony Liguori wrote:
>> Instead of encoding just as a string, it would be a good idea to encode 
>> it as something like:
>>
>> {'__class__': 'base64', 'data': ...}
> 
> Is there a benefit to the class indirection, over simply a keyword?:
> 
> {'__base64__': ...}
> 
> __class__ seems to suggest much more than it's being used for here.
> 

Depending on how sophisticated your parser is, you could directly push
the result into an object of the proper type. And we can add more
complex objects in the future that do not only consists of a single data
key. Note that this extension is not just about encoding, it is about
typecasting (dict -> custom type).

Jan
Anthony Liguori May 17, 2010, 12:12 a.m. UTC | #14
On Sun, May 16, 2010 at 12:38 PM, Jamie Lokier <jamie@shareable.org> wrote:
> Anthony Liguori wrote:
>> Instead of encoding just as a string, it would be a good idea to encode
>> it as something like:
>>
>> {'__class__': 'base64', 'data': ...}
>
> Is there a benefit to the class indirection, over simply a keyword?:
>
> {'__base64__': ...}
>
> __class__ seems to suggest much more than it's being used for here.

Yes.  The problem with JSON is that it's based on JavaScript and
JavaScript is goofy :-)

JavaScript's object mechanism doesn't map well to most other languages
since it's prototype based.  What we're calling QDict's are really
objects in JavaScript and they carry mostly no type information.  With
JS, it's very simple to treat a generic object as a specialized class
after instantiation which means objects don't need type information.

For non-prototype languages, which is the vast majority of clients,
it's necessary to have type information at instantiation time since
monkey patching is awkward at best.  That's why we need a special,
reserved, object member to carry type information.  The remainder of
the object members represent the serialized state of the object.

Another way to think of it, is that we're already transmitting objects
so we really just need a way to say, no, this isn't just a Dictionary,
it's really an instance of the following class.

Regards,

Anthony Liguori

> -- Jamie
>
Avi Kivity May 17, 2010, 6:48 a.m. UTC | #15
On 05/17/2010 03:12 AM, Anthony Liguori wrote:
> On Sun, May 16, 2010 at 12:38 PM, Jamie Lokier<jamie@shareable.org>  wrote:
>    
>> Anthony Liguori wrote:
>>      
>>> Instead of encoding just as a string, it would be a good idea to encode
>>> it as something like:
>>>
>>> {'__class__': 'base64', 'data': ...}
>>>        
>> Is there a benefit to the class indirection, over simply a keyword?:
>>
>> {'__base64__': ...}
>>
>> __class__ seems to suggest much more than it's being used for here.
>>      
> Yes.  The problem with JSON is that it's based on JavaScript and
> JavaScript is goofy :-)
>
>    

I suggest completely ignoring JavaScript.  JSON is simply an encoding 
for numbers, strings, arrays, and key-value stores.  Where's the goofiness?

> JavaScript's object mechanism doesn't map well to most other languages
> since it's prototype based.  What we're calling QDict's are really
> objects in JavaScript and they carry mostly no type information.  With
> JS, it's very simple to treat a generic object as a specialized class
> after instantiation which means objects don't need type information.
>
> For non-prototype languages, which is the vast majority of clients,
> it's necessary to have type information at instantiation time since
> monkey patching is awkward at best.  That's why we need a special,
> reserved, object member to carry type information.  The remainder of
> the object members represent the serialized state of the object.
>    

The alternative is to have a schema.  Sun RPC/XDR doesn't carry any type 
information (you can't even distinguish between a number and text) yet C 
clients have to problem extracting typed information from it.

Having __class__ everywhere means we're carrying the schema in every 
message instead of just once.

> Another way to think of it, is that we're already transmitting objects
> so we really just need a way to say, no, this isn't just a Dictionary,
> it's really an instance of the following class.
>    

Are there cases where the receiver cannot infer this from the context?

As I see it, dynamic type information is easiest for dynamicically typed 
languages.  You just have a dict of class names -> object constructor 
and call the constructors at runtime.  Statically typed languages will 
need a schema to use objects, since the field types have to be known at 
compile time, not just run time.

Another wart is arrays: statically typed languages usually contain only 
objects of the same type, but here this isn't known until we process the 
first member.
Jan Kiszka May 17, 2010, 7:40 a.m. UTC | #16
Avi Kivity wrote:
> On 05/17/2010 03:12 AM, Anthony Liguori wrote:
>> On Sun, May 16, 2010 at 12:38 PM, Jamie Lokier<jamie@shareable.org> 
>> wrote:
>>   
>>> Anthony Liguori wrote:
>>>     
>>>> Instead of encoding just as a string, it would be a good idea to encode
>>>> it as something like:
>>>>
>>>> {'__class__': 'base64', 'data': ...}
>>>>        
>>> Is there a benefit to the class indirection, over simply a keyword?:
>>>
>>> {'__base64__': ...}
>>>
>>> __class__ seems to suggest much more than it's being used for here.
>>>      
>> Yes.  The problem with JSON is that it's based on JavaScript and
>> JavaScript is goofy :-)
>>
>>    
> 
> I suggest completely ignoring JavaScript.  JSON is simply an encoding
> for numbers, strings, arrays, and key-value stores.  Where's the goofiness?
> 
>> JavaScript's object mechanism doesn't map well to most other languages
>> since it's prototype based.  What we're calling QDict's are really
>> objects in JavaScript and they carry mostly no type information.  With
>> JS, it's very simple to treat a generic object as a specialized class
>> after instantiation which means objects don't need type information.
>>
>> For non-prototype languages, which is the vast majority of clients,
>> it's necessary to have type information at instantiation time since
>> monkey patching is awkward at best.  That's why we need a special,
>> reserved, object member to carry type information.  The remainder of
>> the object members represent the serialized state of the object.
>>    
> 
> The alternative is to have a schema.  Sun RPC/XDR doesn't carry any type
> information (you can't even distinguish between a number and text) yet C
> clients have to problem extracting typed information from it.
> 
> Having __class__ everywhere means we're carrying the schema in every
> message instead of just once.

The device_show command is already an example where you don't have a
predefined schema. It is derived from the data stream the encodes the
vmstate fields. So far we have no collision between base64-encoded
buffers and real strings, but this may actually change when we start
annotating the fields with symbolic constants.

I really don't see the problem with __class__. Being a text protocol,
JSON is already fairly verbose.

Jan
Avi Kivity May 17, 2010, 7:45 a.m. UTC | #17
On 05/17/2010 10:40 AM, Jan Kiszka wrote:
>
>> The alternative is to have a schema.  Sun RPC/XDR doesn't carry any type
>> information (you can't even distinguish between a number and text) yet C
>> clients have to problem extracting typed information from it.
>>
>> Having __class__ everywhere means we're carrying the schema in every
>> message instead of just once.
>>      
> The device_show command is already an example where you don't have a
> predefined schema. It is derived from the data stream the encodes the
> vmstate fields. So far we have no collision between base64-encoded
> buffers and real strings, but this may actually change when we start
> annotating the fields with symbolic constants.
>    

What is the receiver to do with it?

If it doesn't know the schema (and there is no schema), then all it can 
do is display the key/values.  If it does know the schema, then 
__class__ is unnecessary.

My worry is that __class__ will make the protocol more ad-hoc.

> I really don't see the problem with __class__. Being a text protocol,
> JSON is already fairly verbose.
>    

The problem is not the verbosity, it's that information is carried too 
late.  Many clients want to know this information at compile time or 
initialization time, and we are providing it at object instantiating time.
Jan Kiszka May 17, 2010, 7:57 a.m. UTC | #18
Avi Kivity wrote:
> On 05/17/2010 10:40 AM, Jan Kiszka wrote:
>>> The alternative is to have a schema.  Sun RPC/XDR doesn't carry any type
>>> information (you can't even distinguish between a number and text) yet C
>>> clients have to problem extracting typed information from it.
>>>
>>> Having __class__ everywhere means we're carrying the schema in every
>>> message instead of just once.
>>>      
>> The device_show command is already an example where you don't have a
>> predefined schema. It is derived from the data stream the encodes the
>> vmstate fields. So far we have no collision between base64-encoded
>> buffers and real strings, but this may actually change when we start
>> annotating the fields with symbolic constants.
>>    
> 
> What is the receiver to do with it?
> 
> If it doesn't know the schema (and there is no schema), then all it can 
> do is display the key/values.  If it does know the schema, then 
> __class__ is unnecessary.

There is a schema describing the fields (name, size, number of
elements), but their types (int, buffer, sub-field, array of X) are
derived from the JSON objects (ie. the JSON parser does this job).

> 
> My worry is that __class__ will make the protocol more ad-hoc.
> 
>> I really don't see the problem with __class__. Being a text protocol,
>> JSON is already fairly verbose.
>>    
> 
> The problem is not the verbosity, it's that information is carried too 
> late.  Many clients want to know this information at compile time or 
> initialization time, and we are providing it at object instantiating time.

What clients do you have in mind?

Jan
Avi Kivity May 17, 2010, 8:10 a.m. UTC | #19
On 05/17/2010 10:57 AM, Jan Kiszka wrote:
> Avi Kivity wrote:
>    
>> On 05/17/2010 10:40 AM, Jan Kiszka wrote:
>>      
>>>> The alternative is to have a schema.  Sun RPC/XDR doesn't carry any type
>>>> information (you can't even distinguish between a number and text) yet C
>>>> clients have to problem extracting typed information from it.
>>>>
>>>> Having __class__ everywhere means we're carrying the schema in every
>>>> message instead of just once.
>>>>
>>>>          
>>> The device_show command is already an example where you don't have a
>>> predefined schema. It is derived from the data stream the encodes the
>>> vmstate fields. So far we have no collision between base64-encoded
>>> buffers and real strings, but this may actually change when we start
>>> annotating the fields with symbolic constants.
>>>
>>>        
>> What is the receiver to do with it?
>>
>> If it doesn't know the schema (and there is no schema), then all it can
>> do is display the key/values.  If it does know the schema, then
>> __class__ is unnecessary.
>>      
> There is a schema describing the fields (name, size, number of
> elements),

Surely the schema has to describe the type as well?  If it does, you can 
use the schema to generate a classes at compile time.

>   but their types (int, buffer, sub-field, array of X) are
> derived from the JSON objects (ie. the JSON parser does this job).
>    

The names of fields are also type information.


>>> I really don't see the problem with __class__. Being a text protocol,
>>> JSON is already fairly verbose.
>>>
>>>        
>> The problem is not the verbosity, it's that information is carried too
>> late.  Many clients want to know this information at compile time or
>> initialization time, and we are providing it at object instantiating time.
>>      
> What clients do you have in mind?
>
>    

Any client that doesn't allow object types to be created dynamically; C, 
C++, Java, and the like could all benefit from a schema and wouldn't be 
able to do much with __class__ unless all classes were predefined.  
Python, JavaScript, and the like wouldn't care.

Another way of looking at it: if the client sees { __class__: foo, f1: 
10, f2: 9 }, it cannot derive any information from __class__ unless it 
was aware of foo beforehand.  If that's the case, let's make it part of 
the schema so it is available at compile time instead of runtime.
Avi Kivity May 17, 2010, 8:13 a.m. UTC | #20
On 05/17/2010 11:10 AM, Avi Kivity wrote:
>
> Another way of looking at it: if the client sees { __class__: foo, f1: 
> 10, f2: 9 }, it cannot derive any information from __class__ unless it 
> was aware of foo beforehand.  If that's the case, let's make it part 
> of the schema so it is available at compile time instead of runtime.
>

Or both.  That's similar to a non-object field's type being described 
both in the schema at during runtime.  A static parser can verify 
__class__ matches the expectations, a dynamic parser can look up 
__class__ and call the appropriate constructor.
Jan Kiszka May 17, 2010, 8:55 a.m. UTC | #21
Avi Kivity wrote:
> On 05/17/2010 10:57 AM, Jan Kiszka wrote:
>> Avi Kivity wrote:
>>    
>>> On 05/17/2010 10:40 AM, Jan Kiszka wrote:
>>>      
>>>>> The alternative is to have a schema.  Sun RPC/XDR doesn't carry any type
>>>>> information (you can't even distinguish between a number and text) yet C
>>>>> clients have to problem extracting typed information from it.
>>>>>
>>>>> Having __class__ everywhere means we're carrying the schema in every
>>>>> message instead of just once.
>>>>>
>>>>>          
>>>> The device_show command is already an example where you don't have a
>>>> predefined schema. It is derived from the data stream the encodes the
>>>> vmstate fields. So far we have no collision between base64-encoded
>>>> buffers and real strings, but this may actually change when we start
>>>> annotating the fields with symbolic constants.
>>>>
>>>>        
>>> What is the receiver to do with it?
>>>
>>> If it doesn't know the schema (and there is no schema), then all it can
>>> do is display the key/values.  If it does know the schema, then
>>> __class__ is unnecessary.
>>>      
>> There is a schema describing the fields (name, size, number of
>> elements),
> 
> Surely the schema has to describe the type as well?  If it does, you can 
> use the schema to generate a classes at compile time.
> 
>>   but their types (int, buffer, sub-field, array of X) are
>> derived from the JSON objects (ie. the JSON parser does this job).
>>    
> 
> The names of fields are also type information.

Not in the case of device_show. The clients have no idea of the vmstate
structures before they were transfered. Granted, that will likely remain
a special case in the QMP command set.

> 
>>>> I really don't see the problem with __class__. Being a text protocol,
>>>> JSON is already fairly verbose.
>>>>
>>>>        
>>> The problem is not the verbosity, it's that information is carried too
>>> late.  Many clients want to know this information at compile time or
>>> initialization time, and we are providing it at object instantiating time.
>>>      
>> What clients do you have in mind?
>>
>>    
> 
> Any client that doesn't allow object types to be created dynamically; C, 
> C++, Java, and the like could all benefit from a schema and wouldn't be 
> able to do much with __class__ unless all classes were predefined.  
> Python, JavaScript, and the like wouldn't care.
> 
> Another way of looking at it: if the client sees { __class__: foo, f1: 
> 10, f2: 9 }, it cannot derive any information from __class__ unless it 
> was aware of foo beforehand.  If that's the case, let's make it part of 
> the schema so it is available at compile time instead of runtime.
> 

Maybe a misunderstanding on my side: I'm not arguing against predefining
what __class__ values exists and how dicts that carry such keys are encoded.

Jan
Avi Kivity May 17, 2010, 8:59 a.m. UTC | #22
On 05/17/2010 11:55 AM, Jan Kiszka wrote:
>
>> The names of fields are also type information.
>>      
> Not in the case of device_show. The clients have no idea of the vmstate
> structures before they were transfered. Granted, that will likely remain
> a special case in the QMP command set.
>    

For that use case, I agree.  Maybe we should send both the parsed and 
unparsed information.

But if the client isn't going to interpret the object and only display 
it, then there is no need for __class__?
Jan Kiszka May 17, 2010, 9:17 a.m. UTC | #23
Avi Kivity wrote:
> On 05/17/2010 11:55 AM, Jan Kiszka wrote:
>>> The names of fields are also type information.
>>>      
>> Not in the case of device_show. The clients have no idea of the vmstate
>> structures before they were transfered. Granted, that will likely remain
>> a special case in the QMP command set.
>>    
> 
> For that use case, I agree.  Maybe we should send both the parsed and 
> unparsed information.

Now I can't parse what you mean.

> 
> But if the client isn't going to interpret the object and only display 
> it, then there is no need for __class__?

For that uncommon case, yes. But the common one is to perform a bit more
than raw JSON dictionary printing.

Jan
Avi Kivity May 17, 2010, 9:29 a.m. UTC | #24
On 05/17/2010 12:17 PM, Jan Kiszka wrote:
> Avi Kivity wrote:
>    
>> On 05/17/2010 11:55 AM, Jan Kiszka wrote:
>>      
>>>> The names of fields are also type information.
>>>>
>>>>          
>>> Not in the case of device_show. The clients have no idea of the vmstate
>>> structures before they were transfered. Granted, that will likely remain
>>> a special case in the QMP command set.
>>>
>>>        
>> For that use case, I agree.  Maybe we should send both the parsed and
>> unparsed information.
>>      
> Now I can't parse what you mean.
>    


Sorry.  I meant that if we have a raw buffer that we can decode (like 
pci config space and its fields) maybe it makes sense to send both 
formats.  The raw buffer is something likely to be stable, and the 
decoded fields are more readable.  But I realize now that doesn't make 
sense in the context of base64 encoding which started all this.


>> But if the client isn't going to interpret the object and only display
>> it, then there is no need for __class__?
>>      
> For that uncommon case, yes. But the common one is to perform a bit more
> than raw JSON dictionary printing.
>    

But the common one is likely to know the type of the object beforehand 
(or it can't do much beyond printing, since it has no idea what fields 
to expect).
Anthony Liguori May 17, 2010, 1:05 p.m. UTC | #25
On 05/17/2010 02:45 AM, Avi Kivity wrote:
> On 05/17/2010 10:40 AM, Jan Kiszka wrote:
>>
>>> The alternative is to have a schema.  Sun RPC/XDR doesn't carry any 
>>> type
>>> information (you can't even distinguish between a number and text) 
>>> yet C
>>> clients have to problem extracting typed information from it.
>>>
>>> Having __class__ everywhere means we're carrying the schema in every
>>> message instead of just once.
>> The device_show command is already an example where you don't have a
>> predefined schema. It is derived from the data stream the encodes the
>> vmstate fields. So far we have no collision between base64-encoded
>> buffers and real strings, but this may actually change when we start
>> annotating the fields with symbolic constants.
>
> What is the receiver to do with it?
>
> If it doesn't know the schema (and there is no schema), then all it 
> can do is display the key/values.  If it does know the schema, then 
> __class__ is unnecessary.
>
> My worry is that __class__ will make the protocol more ad-hoc.
>
>> I really don't see the problem with __class__. Being a text protocol,
>> JSON is already fairly verbose.
>
> The problem is not the verbosity, it's that information is carried too 
> late.  Many clients want to know this information at compile time or 
> initialization time, and we are providing it at object instantiating 
> time.

Whether a protocol is self-describing is orthogonal to whether it's well 
defined (ala a schema).  A self-describing protocol is very convenient 
for dynamic languages like Python.  We should also provide a formal 
schema though for languages that require IDL to generate bindings (like C).

Regards,

Anthony Liguori
Jamie Lokier May 17, 2010, 8:20 p.m. UTC | #26
Jan Kiszka wrote:
> Jamie Lokier wrote:
> > Anthony Liguori wrote:
> >> Instead of encoding just as a string, it would be a good idea to encode 
> >> it as something like:
> >>
> >> {'__class__': 'base64', 'data': ...}
> > 
> > Is there a benefit to the class indirection, over simply a keyword?:
> > 
> > {'__base64__': ...}
> > 
> > __class__ seems to suggest much more than it's being used for here.
> > 
> 
> Depending on how sophisticated your parser is, you could directly push
> the result into an object of the proper type. And we can add more
> complex objects in the future that do not only consists of a single data
> key. Note that this extension is not just about encoding, it is about
> typecasting (dict -> custom type).

Sure, if that's the plan.

Does it make sense to combine encoding and custom types in this way?
It looks like mixing syntax and semantics, which has consequences for
code using generic parsers with separate semantic layer, but I realise
there's no "correct" answer.

Back to the syntax: I'm under the impression from earlier discussion
that the '__*__' keyspace reserved, so even types could use the
compact syntax?

Or is there something Javascript-ish (and not merely JSON-ish) about
'__class__' in particular which makes it appropriate?

-- Jamie
Markus Armbruster May 18, 2010, 12:27 p.m. UTC | #27
Jan Kiszka <jan.kiszka@siemens.com> writes:

> Avi Kivity wrote:
>> On 05/17/2010 10:57 AM, Jan Kiszka wrote:
>>> Avi Kivity wrote:
>>>    
>>>> On 05/17/2010 10:40 AM, Jan Kiszka wrote:
>>>>      
>>>>>> The alternative is to have a schema.  Sun RPC/XDR doesn't carry any type
>>>>>> information (you can't even distinguish between a number and text) yet C
>>>>>> clients have to problem extracting typed information from it.
>>>>>>
>>>>>> Having __class__ everywhere means we're carrying the schema in every
>>>>>> message instead of just once.
>>>>>>
>>>>>>          
>>>>> The device_show command is already an example where you don't have a
>>>>> predefined schema. It is derived from the data stream the encodes the
>>>>> vmstate fields. So far we have no collision between base64-encoded
>>>>> buffers and real strings, but this may actually change when we start
>>>>> annotating the fields with symbolic constants.
>>>>>
>>>>>        
>>>> What is the receiver to do with it?
>>>>
>>>> If it doesn't know the schema (and there is no schema), then all it can
>>>> do is display the key/values.  If it does know the schema, then
>>>> __class__ is unnecessary.
>>>>      
>>> There is a schema describing the fields (name, size, number of
>>> elements),
>> 
>> Surely the schema has to describe the type as well?  If it does, you can 
>> use the schema to generate a classes at compile time.

Doesn't that tie you to a specific version of QMP at compile-time?

>>>   but their types (int, buffer, sub-field, array of X) are
>>> derived from the JSON objects (ie. the JSON parser does this job).
>>>    
>> 
>> The names of fields are also type information.
>
> Not in the case of device_show. The clients have no idea of the vmstate
> structures before they were transfered. Granted, that will likely remain
> a special case in the QMP command set.

qdev device properties are similar.  Right now, they occur only as
arguments of device_add.  When do_info_qtree() gets converted, they'll
appear in results.

[...]
Markus Armbruster May 18, 2010, 12:28 p.m. UTC | #28
Anthony Liguori <anthony@codemonkey.ws> writes:

> On 05/17/2010 02:45 AM, Avi Kivity wrote:
>> On 05/17/2010 10:40 AM, Jan Kiszka wrote:
>>>
>>>> The alternative is to have a schema.  Sun RPC/XDR doesn't carry
>>>> any type
>>>> information (you can't even distinguish between a number and text)
>>>> yet C
>>>> clients have to problem extracting typed information from it.
>>>>
>>>> Having __class__ everywhere means we're carrying the schema in every
>>>> message instead of just once.
>>> The device_show command is already an example where you don't have a
>>> predefined schema. It is derived from the data stream the encodes the
>>> vmstate fields. So far we have no collision between base64-encoded
>>> buffers and real strings, but this may actually change when we start
>>> annotating the fields with symbolic constants.
>>
>> What is the receiver to do with it?
>>
>> If it doesn't know the schema (and there is no schema), then all it
>> can do is display the key/values.  If it does know the schema, then
>> __class__ is unnecessary.
>>
>> My worry is that __class__ will make the protocol more ad-hoc.
>>
>>> I really don't see the problem with __class__. Being a text protocol,
>>> JSON is already fairly verbose.
>>
>> The problem is not the verbosity, it's that information is carried
>> too late.  Many clients want to know this information at compile
>> time or initialization time, and we are providing it at object
>> instantiating time.
>
> Whether a protocol is self-describing is orthogonal to whether it's
> well defined (ala a schema).  A self-describing protocol is very
> convenient for dynamic languages like Python.  We should also provide
> a formal schema though for languages that require IDL to generate
> bindings (like C).

And that schema should be available over QMP.
Avi Kivity May 18, 2010, 5:24 p.m. UTC | #29
On 05/18/2010 03:27 PM, Markus Armbruster wrote:
>
>>> Surely the schema has to describe the type as well?  If it does, you can
>>> use the schema to generate a classes at compile time.
>>>        
> Doesn't that tie you to a specific version of QMP at compile-time?
>    

The client needs to ignore anything not provided by the schema to be 
forward compatible.

(alternatively we make sure anything new is explicitly enabled by the 
client, so it can parse strictly according to the schema).

>    
>>>>    but their types (int, buffer, sub-field, array of X) are
>>>> derived from the JSON objects (ie. the JSON parser does this job).
>>>>
>>>>          
>>> The names of fields are also type information.
>>>        
>> Not in the case of device_show. The clients have no idea of the vmstate
>> structures before they were transfered. Granted, that will likely remain
>> a special case in the QMP command set.
>>      
> qdev device properties are similar.  Right now, they occur only as
> arguments of device_add.  When do_info_qtree() gets converted, they'll
> appear in results.
>    

This sounds like a horror movie in the making, "sysfs: the return".  The 
qdev tree is completely undocumented, so once again the code dictates 
the protocol.  Any bug in the qdev hierarchy will be hardcoded forever 
and ever, or we have to add a new layer of indirection to have a 
separate internal qdev and an external qdev-for-qmp tree(s).
diff mbox

Patch

diff --git a/Makefile b/Makefile
index eb9e02b..065c1a5 100644
--- a/Makefile
+++ b/Makefile
@@ -149,7 +149,8 @@  check-qstring: check-qstring.o qstring.o qemu-malloc.o
 check-qdict: check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qemu-malloc.o qlist.o
 check-qlist: check-qlist.o qlist.o qint.o qemu-malloc.o
 check-qfloat: check-qfloat.o qfloat.o qemu-malloc.o
-check-qjson: check-qjson.o qfloat.o qint.o qdict.o qstring.o qlist.o qbool.o qjson.o json-streamer.o json-lexer.o json-parser.o qemu-malloc.o
+check-qjson: check-qjson.o qfloat.o qint.o qdict.o qstring.o qlist.o qbool.o qbuffer.o base64.o qjson.o json-streamer.o json-lexer.o json-parser.o qemu-malloc.o
+check-qbuffer: check-qbuffer.o qbuffer.o base64.o qstring.o qemu-malloc.o
 
 clean:
 # avoid old build problems by removing potentially incorrect old files
diff --git a/Makefile.objs b/Makefile.objs
index 3d2a27a..52c8ec7 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,6 +1,6 @@ 
 #######################################################################
 # QObject
-qobject-obj-y = qint.o qstring.o qdict.o qlist.o qfloat.o qbool.o
+qobject-obj-y = qint.o qstring.o qdict.o qlist.o qfloat.o qbool.o qbuffer.o
 qobject-obj-y += qjson.o json-lexer.o json-streamer.o json-parser.o
 qobject-obj-y += qerror.o base64.o
 
diff --git a/check-qbuffer.c b/check-qbuffer.c
new file mode 100644
index 0000000..b490230
--- /dev/null
+++ b/check-qbuffer.c
@@ -0,0 +1,172 @@ 
+/*
+ * QBuffer unit-tests.
+ *
+ * Copyright (C) 2010 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include <check.h>
+
+#include "qbuffer.h"
+#include "qemu-common.h"
+
+const char data[] = "some data";
+
+START_TEST(qbuffer_from_data_test)
+{
+    QBuffer *qbuffer;
+
+    qbuffer = qbuffer_from_data(data, sizeof(data));
+    fail_unless(qbuffer != NULL);
+    fail_unless(qbuffer->base.refcnt == 1);
+    fail_unless(memcmp(data, qbuffer->data, sizeof(data)) == 0);
+    fail_unless(qbuffer->size == sizeof(data));
+    fail_unless(qobject_type(QOBJECT(qbuffer)) == QTYPE_QBUFFER);
+
+    /* destroy doesn't exit yet */
+    qemu_free(qbuffer->data);
+    qemu_free(qbuffer);
+}
+END_TEST
+
+START_TEST(qbuffer_destroy_test)
+{
+    QBuffer *qbuffer = qbuffer_from_data(data, sizeof(data));
+
+    QDECREF(qbuffer);
+}
+END_TEST
+
+START_TEST(qbuffer_get_data_test)
+{
+    QBuffer *qbuffer;
+    const void *ret_data;
+
+    qbuffer = qbuffer_from_data(data, sizeof(data));
+    ret_data = qbuffer_get_data(qbuffer);
+    fail_unless(memcmp(ret_data, data, sizeof(data)) == 0);
+
+    QDECREF(qbuffer);
+}
+END_TEST
+
+START_TEST(qbuffer_get_size_test)
+{
+    QBuffer *qbuffer;
+
+    qbuffer = qbuffer_from_data(data, sizeof(data));
+    fail_unless(qbuffer_get_size(qbuffer) == sizeof(data));
+
+    QDECREF(qbuffer);
+}
+END_TEST
+
+START_TEST(qbuffer_from_qstring_test)
+{
+    const struct {
+        const char *encoded;
+        const char *decoded;
+    } pattern[3] = {
+        {
+            .encoded = "SGVsbG8sIFFCdWZmZXIhCg==",
+            .decoded = "Hello, QBuffer!",
+        },
+        {
+             .encoded = "SGVsbG8gUUJ1ZmZlcgo=",
+             .decoded = "Hello QBuffer",
+        },
+        {
+             .encoded = "SGVsbG8gUUJ1ZmZlciEK===",
+             .decoded = "Hello QBuffer!",
+        },
+    };
+    QBuffer *qbuffer;
+    QString *qstring;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(pattern); i++) {
+        qstring = qstring_from_str(pattern[i].encoded);
+        qbuffer = qbuffer_from_qstring(qstring);
+        QDECREF(qstring);
+
+        fail_unless(qbuffer != NULL);
+        fail_unless(memcmp(qbuffer_get_data(qbuffer), pattern[i].decoded,
+                    sizeof(pattern[i].decoded)) == 0);
+
+        QDECREF(qbuffer);
+    }
+}
+END_TEST
+
+START_TEST(qbuffer_from_invalid_qstring_test)
+{
+    const char *pattern[] = {
+        "SGVsbG8sIFFCdWZmZXIhC",
+        "SGVsbG8gU=UJ1ZmZlcgo",
+        "SGVsbG8gUUJ1*ZmZlciEK",
+    };
+    QBuffer *qbuffer;
+    QString *qstring;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(pattern); i++) {
+        qstring = qstring_from_str(pattern[i]);
+        qbuffer = qbuffer_from_qstring(qstring);
+        QDECREF(qstring);
+
+        fail_unless(qbuffer == NULL);
+    }
+}
+END_TEST
+
+START_TEST(qobject_to_qbuffer_test)
+{
+    QBuffer *qbuffer;
+
+    qbuffer = qbuffer_from_data(data, sizeof(data));
+    fail_unless(qobject_to_qbuffer(QOBJECT(qbuffer)) == qbuffer);
+
+    QDECREF(qbuffer);
+}
+END_TEST
+
+static Suite *qbuffer_suite(void)
+{
+    Suite *s;
+    TCase *qbuffer_public_tcase;
+
+    s = suite_create("QBuffer test-suite");
+
+    qbuffer_public_tcase = tcase_create("Public Interface");
+    suite_add_tcase(s, qbuffer_public_tcase);
+    tcase_add_test(qbuffer_public_tcase, qbuffer_from_data_test);
+    tcase_add_test(qbuffer_public_tcase, qbuffer_destroy_test);
+    tcase_add_test(qbuffer_public_tcase, qbuffer_get_data_test);
+    tcase_add_test(qbuffer_public_tcase, qbuffer_get_size_test);
+    tcase_add_test(qbuffer_public_tcase, qbuffer_from_qstring_test);
+    tcase_add_test(qbuffer_public_tcase, qbuffer_from_invalid_qstring_test);
+    tcase_add_test(qbuffer_public_tcase, qobject_to_qbuffer_test);
+
+    return s;
+}
+
+int main(void)
+{
+    int nf;
+    Suite *s;
+    SRunner *sr;
+
+    s = qbuffer_suite();
+    sr = srunner_create(s);
+
+    srunner_run_all(sr, CK_NORMAL);
+    nf = srunner_ntests_failed(sr);
+    srunner_free(sr);
+
+    return (nf == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/configure b/configure
index 36d028f..9ff9308 100755
--- a/configure
+++ b/configure
@@ -2280,7 +2280,7 @@  if test `expr "$target_list" : ".*softmmu.*"` != 0 ; then
   if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
       tools="qemu-nbd\$(EXESUF) $tools"
     if [ "$check_utests" = "yes" ]; then
-      tools="check-qint check-qstring check-qdict check-qlist $tools"
+      tools="check-qint check-qstring check-qdict check-qlist check-qbuffer $tools"
       tools="check-qfloat check-qjson $tools"
     fi
   fi
diff --git a/qbuffer.c b/qbuffer.c
new file mode 100644
index 0000000..704d170
--- /dev/null
+++ b/qbuffer.c
@@ -0,0 +1,116 @@ 
+/*
+ * QBuffer Module
+ *
+ * Copyright (C) 2010 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qbuffer.h"
+#include "qobject.h"
+#include "qemu-common.h"
+#include "base64.h"
+
+static void qbuffer_destroy_obj(QObject *obj);
+
+static const QType qbuffer_type = {
+    .code = QTYPE_QBUFFER,
+    .destroy = qbuffer_destroy_obj,
+};
+
+/**
+ * qbuffer_from_data(): Create a new QBuffer from an existing data blob
+ *
+ * Returns strong reference.
+ */
+QBuffer *qbuffer_from_data(const void *data, size_t size)
+{
+    QBuffer *qb;
+
+    qb = qemu_malloc(sizeof(*qb));
+    qb->data = qemu_malloc(size);
+    memcpy(qb->data, data, size);
+    qb->size = size;
+    QOBJECT_INIT(qb, &qbuffer_type);
+
+    return qb;
+}
+
+/**
+ * qbuffer_from_qstring(): Create a new QBuffer from a QString object that
+ * contains the data as a stream of hex-encoded bytes
+ *
+ * Returns strong reference.
+ */
+QBuffer *qbuffer_from_qstring(const QString *string)
+{
+    const char *str = qstring_get_str(string);
+    size_t str_len;
+    QBuffer *qb;
+
+    qb = qemu_malloc(sizeof(*qb));
+
+    str_len = strlen(str);
+    while (str_len > 0 && str[str_len - 1] == '=') {
+        str_len--;
+    }
+    qb->size = (str_len / 4) * 3 + ((str_len % 4) * 3) / 4;
+    qb->data = qemu_malloc(qb->size);
+
+    QOBJECT_INIT(qb, &qbuffer_type);
+
+    if (base64_decode(str, str_len, qb->data) < 0) {
+        qbuffer_destroy_obj(QOBJECT(qb));
+        return NULL;
+    }
+
+    return qb;
+}
+
+/**
+ * qbuffer_get_data(): Return pointer to stored data
+ *
+ * NOTE: Should be used with caution, if the object is deallocated
+ * this pointer becomes invalid.
+ */
+const void *qbuffer_get_data(const QBuffer *qb)
+{
+    return qb->data;
+}
+
+/**
+ * qbuffer_get_size(): Return size of stored data
+ */
+size_t qbuffer_get_size(const QBuffer *qb)
+{
+    return qb->size;
+}
+
+/**
+ * qobject_to_qbool(): Convert a QObject into a QBuffer
+ */
+QBuffer *qobject_to_qbuffer(const QObject *obj)
+{
+    if (qobject_type(obj) != QTYPE_QBUFFER)
+        return NULL;
+
+    return container_of(obj, QBuffer, base);
+}
+
+/**
+ * qbuffer_destroy_obj(): Free all memory allocated by a QBuffer object
+ */
+static void qbuffer_destroy_obj(QObject *obj)
+{
+    QBuffer *qb;
+
+    assert(obj != NULL);
+    qb = qobject_to_qbuffer(obj);
+    qemu_free(qb->data);
+    qemu_free(qb);
+}
diff --git a/qbuffer.h b/qbuffer.h
new file mode 100644
index 0000000..2e01078
--- /dev/null
+++ b/qbuffer.h
@@ -0,0 +1,33 @@ 
+/*
+ * QBuffer Module
+ *
+ * Copyright (C) 2010 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QBUFFER_H
+#define QBUFFER_H
+
+#include <stdint.h>
+#include "qobject.h"
+#include "qstring.h"
+
+typedef struct QBuffer {
+    QObject_HEAD;
+    void *data;
+    size_t size;
+} QBuffer;
+
+QBuffer *qbuffer_from_data(const void *data, size_t size);
+QBuffer *qbuffer_from_qstring(const QString *string);
+const void *qbuffer_get_data(const QBuffer *qb);
+size_t qbuffer_get_size(const QBuffer *qb);
+QBuffer *qobject_to_qbuffer(const QObject *obj);
+
+#endif /* QBUFFER_H */
diff --git a/qjson.c b/qjson.c
index 483c667..4d1c21a 100644
--- a/qjson.c
+++ b/qjson.c
@@ -19,7 +19,9 @@ 
 #include "qlist.h"
 #include "qbool.h"
 #include "qfloat.h"
+#include "qbuffer.h"
 #include "qdict.h"
+#include "base64.h"
 
 typedef struct JSONParsingState
 {
@@ -235,6 +237,20 @@  static void to_json(const QObject *obj, QString *str)
         }
         break;
     }
+    case QTYPE_QBUFFER: {
+        QBuffer *val = qobject_to_qbuffer(obj);
+        size_t data_size = qbuffer_get_size(val);
+        size_t str_len = ((data_size + 2) / 3) * 4;
+        char *buffer = qemu_malloc(str_len + 3);
+
+        buffer[0] = '"';
+        base64_encode(qbuffer_get_data(val), data_size, buffer + 1);
+        buffer[str_len + 1] = '"';
+        buffer[str_len + 2] = 0;
+        qstring_append(str, buffer);
+        qemu_free(buffer);
+        break;
+    }
     case QTYPE_QERROR:
         /* XXX: should QError be emitted? */
     case QTYPE_NONE:
diff --git a/qobject.h b/qobject.h
index 07de211..45c4fa0 100644
--- a/qobject.h
+++ b/qobject.h
@@ -44,6 +44,7 @@  typedef enum {
     QTYPE_QFLOAT,
     QTYPE_QBOOL,
     QTYPE_QERROR,
+    QTYPE_QBUFFER,
 } qtype_code;
 
 struct QObject;