Patchwork [4/5] backdoor: [softmmu] Add QEMU-side proxy to "libbackdoor.a"

login
register
mail settings
Submitter Lluís Vilanova
Date Sept. 29, 2011, 1:47 p.m.
Message ID <20110929134749.19559.26774.stgit@ginnungagap.bsc.es>
Download mbox | patch
Permalink /patch/116957/
State New
Headers show

Comments

Lluís Vilanova - Sept. 29, 2011, 1:47 p.m.
Uses a virtual device to proxy uses of the backdoor communication channel to the
user-provided code.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
---
 Makefile.objs           |    1 
 backdoor/qemu/softmmu.c |  124 +++++++++++++++++++++++++++++++++++++++++++++++
 hw/pci.h                |    1 
 3 files changed, 126 insertions(+), 0 deletions(-)
 create mode 100644 backdoor/qemu/softmmu.c
Blue Swirl - Sept. 29, 2011, 8:42 p.m.
2011/9/29 Lluís Vilanova <vilanova@ac.upc.edu>:
> Uses a virtual device to proxy uses of the backdoor communication channel to the
> user-provided code.
>
> Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
> ---
>  Makefile.objs           |    1
>  backdoor/qemu/softmmu.c |  124 +++++++++++++++++++++++++++++++++++++++++++++++
>  hw/pci.h                |    1
>  3 files changed, 126 insertions(+), 0 deletions(-)
>  create mode 100644 backdoor/qemu/softmmu.c
>
> diff --git a/Makefile.objs b/Makefile.objs
> index d39074d..5f54d10 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -398,6 +398,7 @@ $(trace-obj-y): $(GENERATED_HEADERS)
>  # backdoor
>
>  backdoor-nested-$(CONFIG_USER_ONLY) += user.o
> +backdoor-nested-$(CONFIG_SOFTMMU) += softmmu.o
>
>  backdoor-obj-y += $(addprefix backdoor/qemu/, $(backdoor-nested-y))
>
> diff --git a/backdoor/qemu/softmmu.c b/backdoor/qemu/softmmu.c
> new file mode 100644
> index 0000000..fdd3a25
> --- /dev/null
> +++ b/backdoor/qemu/softmmu.c
> @@ -0,0 +1,124 @@
> +/*
> + * QEMU-side management of backdoor channels in softmmu emulation.
> + *
> + * Copyright (C) 2011 Lluís Vilanova <vilanova@ac.upc.edu>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "hw/pci.h"
> +#include "backdoor/qemu/qemu-backdoor.h"
> +
> +
> +#define PAGE_SIZE TARGET_PAGE_SIZE
> +#define CTRL_BYTES sizeof(uint64_t)
> +
> +
> +typedef struct State
> +{
> +    PCIDevice dev;
> +
> +    uint8_t pages;
> +    uint64_t size;
> +
> +    uint64_t cmd;
> +
> +    void *data_ptr;
> +    MemoryRegion data;
> +    MemoryRegion control;
> +} State;

Please use BackdoorState.

> +
> +
> +static uint64_t control_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
> +{
> +    State *s = opaque;
> +
> +    uint64_t res = ldq_p(&s->size);
> +    uint8_t *resb = (uint8_t*)&res;
> +    return resb[addr % CTRL_BYTES];

I don't think these lines do what you mean, but I'm also not sure what
it is supposed to mean.

> +}
> +
> +static void control_io_write(void *opaque, target_phys_addr_t addr, uint64_t data, unsigned size)
> +{
> +    State *s = opaque;
> +
> +    uint8_t *cmdb = (uint8_t*)&s->cmd;
> +    cmdb[addr % CTRL_BYTES] = (uint8_t)data;
> +
> +    if ((addr + size) % CTRL_BYTES == 0) {
> +        qemu_backdoor(ldq_p(&s->cmd), s->data_ptr);
> +    }

Same here.

> +}
> +
> +static const MemoryRegionOps control_ops = {
> +    .read = control_io_read,
> +    .write = control_io_write,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .impl = {
> +        .min_access_size = 1,
> +        .max_access_size = 1,
> +    },
> +};
> +
> +
> +static int init(PCIDevice *dev)
> +{
> +    State *s = DO_UPCAST(State, dev, dev);
> +
> +    if (s->pages < 1) {
> +        fprintf(stderr, "error: backdoor: "
> +                "the data channel must have one or more pages\n");
> +        return -1;
> +    }
> +    s->size = s->pages * PAGE_SIZE;
> +
> +    pci_set_word(s->dev.config + PCI_COMMAND,
> +                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
> +
> +    memory_region_init_io(&s->control, &control_ops, s, "backdoor.control",
> +                          PAGE_SIZE);
> +    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->control);
> +
> +    memory_region_init_ram(&s->data, &s->dev.qdev, "backdoor.data",
> +                           s->size);
> +    pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->data);
> +    s->data_ptr = qemu_get_ram_ptr(s->data.ram_addr);
> +
> +    qemu_backdoor_init(s->size);
> +
> +    return 0;
> +}
> +
> +static int fini(PCIDevice *dev)
> +{
> +    State *s = DO_UPCAST(State, dev, dev);
> +
> +    memory_region_destroy(&s->data);
> +    memory_region_destroy(&s->control);
> +
> +    return 0;
> +}
> +
> +
> +static PCIDeviceInfo info = {
> +    .qdev.name  = "backdoor",
> +    .qdev.desc  = "Backdoor communication channel",
> +    .qdev.size  = sizeof(State),
> +    .init       = init,
> +    .exit       = fini,
> +    .vendor_id  = PCI_VENDOR_ID_REDHAT_QUMRANET,
> +    .device_id  = PCI_DEVICE_ID_BACKDOOR,
> +    .class_id   = PCI_CLASS_MEMORY_RAM,
> +    .qdev.props = (Property[]) {
> +        DEFINE_PROP_UINT8("pages", State, pages, 1),
> +        DEFINE_PROP_END_OF_LIST(),
> +    }
> +};
> +
> +static void register_device(void)
> +{
> +    pci_qdev_register(&info);
> +}
> +
> +device_init(register_device)
> diff --git a/hw/pci.h b/hw/pci.h
> index 86a81c8..4d7d161 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -75,6 +75,7 @@
>  #define PCI_DEVICE_ID_VIRTIO_BLOCK       0x1001
>  #define PCI_DEVICE_ID_VIRTIO_BALLOON     0x1002
>  #define PCI_DEVICE_ID_VIRTIO_CONSOLE     0x1003
> +#define PCI_DEVICE_ID_BACKDOOR           0x1004
>
>  #define FMT_PCIBUS                      PRIx64
>
>
>
>
Lluís Vilanova - Sept. 29, 2011, 9:49 p.m.
Blue Swirl writes:

> 2011/9/29 Lluís Vilanova <vilanova@ac.upc.edu>:
>> +static uint64_t control_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
>> +{
>> +    State *s = opaque;
>> +
>> +    uint64_t res = ldq_p(&s->size);
>> +    uint8_t *resb = (uint8_t*)&res;
>> +    return resb[addr % CTRL_BYTES];

> I don't think these lines do what you mean, but I'm also not sure what
> it is supposed to mean.

Pre: only can read on a byte-per-byte basis (as stated in control_ops.impl),
just because the code looks less ugly, and host performance should not be an
issue here.

The device is treated as a circular buffer of length CTRL_BYTES

Reads are only used to get the size of the data channel.

First line should handle guest/host endianess swapping, although I'm not sure if
that's the API I'm supposed to use.

Then return the N'th byte of the uint64_t variable holding the (endianess-aware)
result.

>> +}
>> +
>> +static void control_io_write(void *opaque, target_phys_addr_t addr, uint64_t data, unsigned size)
>> +{
>> +    State *s = opaque;
>> +
>> +    uint8_t *cmdb = (uint8_t*)&s->cmd;
>> +    cmdb[addr % CTRL_BYTES] = (uint8_t)data;
>> +
>> +    if ((addr + size) % CTRL_BYTES == 0) {
>> +        qemu_backdoor(ldq_p(&s->cmd), s->data_ptr);
>> +    }

> Same here.

Pre: same as during reads.

Accumulates writes into s->cmd to build the command the guest is sending us (in
guest endianess).

When CTRL_BYTES bytes have been written into the device, get the command value
into host endianess and invoke the user-provided backdoor callback.

This assumes that when executing in KVM, the device handling infrastructure will
get a lock and only one CPU will be sending a backdoor command until completion.


I'll add some comments there and prefix all structs and functions with
"backdoor_", as otherwise debugging could get harder if everyone started
avoiding the prefixes.


Lluis
Frans de Boer - Sept. 29, 2011, 10:13 p.m.
On 09/29/2011 11:49 PM, Lluís Vilanova wrote:
> Blue Swirl writes:
>
>> 2011/9/29 Lluís Vilanova<vilanova@ac.upc.edu>:
>>> +static uint64_t control_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
>>> +{
>>> +    State *s = opaque;
>>> +
>>> +    uint64_t res = ldq_p(&s->size);
>>> +    uint8_t *resb = (uint8_t*)&res;
>>> +    return resb[addr % CTRL_BYTES];
>
>> I don't think these lines do what you mean, but I'm also not sure what
>> it is supposed to mean.
>
> Pre: only can read on a byte-per-byte basis (as stated in control_ops.impl),
> just because the code looks less ugly, and host performance should not be an
> issue here.
>
> The device is treated as a circular buffer of length CTRL_BYTES
>
> Reads are only used to get the size of the data channel.
>
> First line should handle guest/host endianess swapping, although I'm not sure if
> that's the API I'm supposed to use.
>
> Then return the N'th byte of the uint64_t variable holding the (endianess-aware)
> result.
>
>>> +}
>>> +
>>> +static void control_io_write(void *opaque, target_phys_addr_t addr, uint64_t data, unsigned size)
>>> +{
>>> +    State *s = opaque;
>>> +
>>> +    uint8_t *cmdb = (uint8_t*)&s->cmd;
>>> +    cmdb[addr % CTRL_BYTES] = (uint8_t)data;
>>> +
>>> +    if ((addr + size) % CTRL_BYTES == 0) {
>>> +        qemu_backdoor(ldq_p(&s->cmd), s->data_ptr);
>>> +    }
>
>> Same here.
>
> Pre: same as during reads.
>
> Accumulates writes into s->cmd to build the command the guest is sending us (in
> guest endianess).
>
> When CTRL_BYTES bytes have been written into the device, get the command value
> into host endianess and invoke the user-provided backdoor callback.
>
> This assumes that when executing in KVM, the device handling infrastructure will
> get a lock and only one CPU will be sending a backdoor command until completion.
>
>
> I'll add some comments there and prefix all structs and functions with
> "backdoor_", as otherwise debugging could get harder if everyone started
> avoiding the prefixes.
>
>
> Lluis
>
I have not read the whole thread, but being an advisor in matters of 
information risk management, the very word "backdoor" send quivers along 
my spine. Why use a backdoor at all? The word alone gives us bad vibes.

Using these words will disseminate QEMU from professional use since a 
possible risk might be introduced (even if it seems documented). So, 
please clarify the word "backdoor" or change it into something less 
threatening.

regards,
Frans de Boer.
Frans de Boer - Sept. 29, 2011, 10:35 p.m.
On 09/29/2011 11:49 PM, Lluís Vilanova wrote:
> Blue Swirl writes:
>
>> 2011/9/29 Lluís Vilanova<vilanova@ac.upc.edu>:
>>> +static uint64_t control_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
>>> +{
>>> +    State *s = opaque;
>>> +
>>> +    uint64_t res = ldq_p(&s->size);
>>> +    uint8_t *resb = (uint8_t*)&res;
>>> +    return resb[addr % CTRL_BYTES];
>
>> I don't think these lines do what you mean, but I'm also not sure what
>> it is supposed to mean.
>
> Pre: only can read on a byte-per-byte basis (as stated in control_ops.impl),
> just because the code looks less ugly, and host performance should not be an
> issue here.
>
> The device is treated as a circular buffer of length CTRL_BYTES
>
> Reads are only used to get the size of the data channel.
>
> First line should handle guest/host endianess swapping, although I'm not sure if
> that's the API I'm supposed to use.
>
> Then return the N'th byte of the uint64_t variable holding the (endianess-aware)
> result.
>
>>> +}
>>> +
>>> +static void control_io_write(void *opaque, target_phys_addr_t addr, uint64_t data, unsigned size)
>>> +{
>>> +    State *s = opaque;
>>> +
>>> +    uint8_t *cmdb = (uint8_t*)&s->cmd;
>>> +    cmdb[addr % CTRL_BYTES] = (uint8_t)data;
>>> +
>>> +    if ((addr + size) % CTRL_BYTES == 0) {
>>> +        qemu_backdoor(ldq_p(&s->cmd), s->data_ptr);
>>> +    }
>
>> Same here.
>
> Pre: same as during reads.
>
> Accumulates writes into s->cmd to build the command the guest is sending us (in
> guest endianess).
>
> When CTRL_BYTES bytes have been written into the device, get the command value
> into host endianess and invoke the user-provided backdoor callback.
>
> This assumes that when executing in KVM, the device handling infrastructure will
> get a lock and only one CPU will be sending a backdoor command until completion.
>
>
> I'll add some comments there and prefix all structs and functions with
> "backdoor_", as otherwise debugging could get harder if everyone started
> avoiding the prefixes.
>
>
> Lluis
>
I have not read the whole thread, but being an advisor in matters of 
information risk management, the very word "backdoor" send quivers along 
my spine. Why use a backdoor at all? The word alone gives us bad vibes.

Using these words will disseminate QEMU from professional use since a 
possible risk might be introduced (even if it seems documented). So, 
please clarify the word "backdoor" or change it into something less 
threatening.

regards,
Frans de Boer.
Blue Swirl - Sept. 30, 2011, 8:07 p.m.
2011/9/29 Lluís Vilanova <vilanova@ac.upc.edu>:
> Blue Swirl writes:
>
>> 2011/9/29 Lluís Vilanova <vilanova@ac.upc.edu>:
>>> +static uint64_t control_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
>>> +{
>>> +    State *s = opaque;
>>> +
>>> +    uint64_t res = ldq_p(&s->size);
>>> +    uint8_t *resb = (uint8_t*)&res;
>>> +    return resb[addr % CTRL_BYTES];
>
>> I don't think these lines do what you mean, but I'm also not sure what
>> it is supposed to mean.
>
> Pre: only can read on a byte-per-byte basis (as stated in control_ops.impl),
> just because the code looks less ugly, and host performance should not be an
> issue here.
>
> The device is treated as a circular buffer of length CTRL_BYTES
>
> Reads are only used to get the size of the data channel.
>
> First line should handle guest/host endianess swapping, although I'm not sure if
> that's the API I'm supposed to use.
>
> Then return the N'th byte of the uint64_t variable holding the (endianess-aware)
> result.

That may be the intention, but the first line will load res from guest
memory using an address (&s->size) in host memory. I think the next
two lines are equal to
return res >> (addr % CTRL_BYTES);
but with some obfuscation.

It would be much clearer if the registers were byte arrays so you
could read and write the data directly without pointer arithmetic.

Byte accesses will be slower than larger word size accesses, I thought
performance was one of the goals with this?

>>> +}
>>> +
>>> +static void control_io_write(void *opaque, target_phys_addr_t addr, uint64_t data, unsigned size)
>>> +{
>>> +    State *s = opaque;
>>> +
>>> +    uint8_t *cmdb = (uint8_t*)&s->cmd;
>>> +    cmdb[addr % CTRL_BYTES] = (uint8_t)data;
>>> +
>>> +    if ((addr + size) % CTRL_BYTES == 0) {
>>> +        qemu_backdoor(ldq_p(&s->cmd), s->data_ptr);
>>> +    }
>
>> Same here.
>
> Pre: same as during reads.
>
> Accumulates writes into s->cmd to build the command the guest is sending us (in
> guest endianess).
>
> When CTRL_BYTES bytes have been written into the device, get the command value
> into host endianess and invoke the user-provided backdoor callback.
>
> This assumes that when executing in KVM, the device handling infrastructure will
> get a lock and only one CPU will be sending a backdoor command until completion.
>
>
> I'll add some comments there and prefix all structs and functions with
> "backdoor_", as otherwise debugging could get harder if everyone started
> avoiding the prefixes.
>
>
> Lluis
>
> --
>  "And it's much the same thing with knowledge, for whenever you learn
>  something new, the whole world becomes that much richer."
>  -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
>  Tollbooth
>
Blue Swirl - Sept. 30, 2011, 8:29 p.m.
On Thu, Sep 29, 2011 at 10:35 PM, Frans de Boer <frans@fransdb.nl> wrote:
> On 09/29/2011 11:49 PM, Lluís Vilanova wrote:
>>
>> Blue Swirl writes:
>>
>>> 2011/9/29 Lluís Vilanova<vilanova@ac.upc.edu>:
>>>>
>>>> +static uint64_t control_io_read(void *opaque, target_phys_addr_t addr,
>>>> unsigned size)
>>>> +{
>>>> +    State *s = opaque;
>>>> +
>>>> +    uint64_t res = ldq_p(&s->size);
>>>> +    uint8_t *resb = (uint8_t*)&res;
>>>> +    return resb[addr % CTRL_BYTES];
>>
>>> I don't think these lines do what you mean, but I'm also not sure what
>>> it is supposed to mean.
>>
>> Pre: only can read on a byte-per-byte basis (as stated in
>> control_ops.impl),
>> just because the code looks less ugly, and host performance should not be
>> an
>> issue here.
>>
>> The device is treated as a circular buffer of length CTRL_BYTES
>>
>> Reads are only used to get the size of the data channel.
>>
>> First line should handle guest/host endianess swapping, although I'm not
>> sure if
>> that's the API I'm supposed to use.
>>
>> Then return the N'th byte of the uint64_t variable holding the
>> (endianess-aware)
>> result.
>>
>>>> +}
>>>> +
>>>> +static void control_io_write(void *opaque, target_phys_addr_t addr,
>>>> uint64_t data, unsigned size)
>>>> +{
>>>> +    State *s = opaque;
>>>> +
>>>> +    uint8_t *cmdb = (uint8_t*)&s->cmd;
>>>> +    cmdb[addr % CTRL_BYTES] = (uint8_t)data;
>>>> +
>>>> +    if ((addr + size) % CTRL_BYTES == 0) {
>>>> +        qemu_backdoor(ldq_p(&s->cmd), s->data_ptr);
>>>> +    }
>>
>>> Same here.
>>
>> Pre: same as during reads.
>>
>> Accumulates writes into s->cmd to build the command the guest is sending
>> us (in
>> guest endianess).
>>
>> When CTRL_BYTES bytes have been written into the device, get the command
>> value
>> into host endianess and invoke the user-provided backdoor callback.
>>
>> This assumes that when executing in KVM, the device handling
>> infrastructure will
>> get a lock and only one CPU will be sending a backdoor command until
>> completion.
>>
>>
>> I'll add some comments there and prefix all structs and functions with
>> "backdoor_", as otherwise debugging could get harder if everyone started
>> avoiding the prefixes.
>>
>>
>> Lluis
>>
> I have not read the whole thread, but being an advisor in matters of
> information risk management, the very word "backdoor" send quivers along my
> spine. Why use a backdoor at all? The word alone gives us bad vibes.

Are the other methods for guest to interact with the host, like virtio
services OK then just because of their virtuous names? What about KVM,
isn't it horrible that untrusted guest code is executed by ultimately
trusted kernel module?

Jokes aside, the name could be improved.

> Using these words will disseminate QEMU from professional use since a
> possible risk might be introduced (even if it seems documented). So, please
> clarify the word "backdoor" or change it into something less threatening.

Obviously this device should never be enabled in any untrusted
environment, maybe not even compiled by default unless configured
during build. The same should apply to all debugging devices.
Lluís Vilanova - Sept. 30, 2011, 8:49 p.m.
Blue Swirl writes:

> 2011/9/29 Lluís Vilanova <vilanova@ac.upc.edu>:
>> Blue Swirl writes:
>> 
>>> 2011/9/29 Lluís Vilanova <vilanova@ac.upc.edu>:
>>>> +static uint64_t control_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
>>>> +{
>>>> +    State *s = opaque;
>>>> +
>>>> +    uint64_t res = ldq_p(&s->size);
>>>> +    uint8_t *resb = (uint8_t*)&res;
>>>> +    return resb[addr % CTRL_BYTES];
>> 
>>> I don't think these lines do what you mean, but I'm also not sure what
>>> it is supposed to mean.
>> 
>> Pre: only can read on a byte-per-byte basis (as stated in control_ops.impl),
>> just because the code looks less ugly, and host performance should not be an
>> issue here.
>> 
>> The device is treated as a circular buffer of length CTRL_BYTES
>> 
>> Reads are only used to get the size of the data channel.
>> 
>> First line should handle guest/host endianess swapping, although I'm not sure if
>> that's the API I'm supposed to use.
>> 
>> Then return the N'th byte of the uint64_t variable holding the (endianess-aware)
>> result.

> That may be the intention, but the first line will load res from guest
> memory using an address (&s->size) in host memory.

Ok, I think I found what I really wanted: tswap64


> I think the next two lines are equal to
> return res >> (addr % CTRL_BYTES);
> but with some obfuscation.

But I cannot assume any endianess on neither host or guest. The only thing I can
assume is that the generic device code handling the reads will read from lower
to higher addresses.

In any case, take me with a grain of salt, endianess often confuses me.


> It would be much clearer if the registers were byte arrays so you
> could read and write the data directly without pointer arithmetic.

Is that something present on the device API? Sorry I don't know what you mean by
byte array... for me 'resb' already is a byte array :)


> Byte accesses will be slower than larger word size accesses, I thought
> performance was one of the goals with this?

They will be slower on host time, but will not waste "guest time".

BTW, will the current scheme in KVM provoke one VM exit for each byte or only
one for the whole 64bits?

But yes, I was just too lazy to add code for all the supported sizes from 1 to
8, and let the generic device code pick the best.


Lluis
Blue Swirl - Sept. 30, 2011, 8:59 p.m.
2011/9/30 Lluís Vilanova <vilanova@ac.upc.edu>:
> Blue Swirl writes:
>
>> 2011/9/29 Lluís Vilanova <vilanova@ac.upc.edu>:
>>> Blue Swirl writes:
>>>
>>>> 2011/9/29 Lluís Vilanova <vilanova@ac.upc.edu>:
>>>>> +static uint64_t control_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
>>>>> +{
>>>>> +    State *s = opaque;
>>>>> +
>>>>> +    uint64_t res = ldq_p(&s->size);
>>>>> +    uint8_t *resb = (uint8_t*)&res;
>>>>> +    return resb[addr % CTRL_BYTES];
>>>
>>>> I don't think these lines do what you mean, but I'm also not sure what
>>>> it is supposed to mean.
>>>
>>> Pre: only can read on a byte-per-byte basis (as stated in control_ops.impl),
>>> just because the code looks less ugly, and host performance should not be an
>>> issue here.
>>>
>>> The device is treated as a circular buffer of length CTRL_BYTES
>>>
>>> Reads are only used to get the size of the data channel.
>>>
>>> First line should handle guest/host endianess swapping, although I'm not sure if
>>> that's the API I'm supposed to use.
>>>
>>> Then return the N'th byte of the uint64_t variable holding the (endianess-aware)
>>> result.
>
>> That may be the intention, but the first line will load res from guest
>> memory using an address (&s->size) in host memory.
>
> Ok, I think I found what I really wanted: tswap64
>
>
>> I think the next two lines are equal to
>> return res >> (addr % CTRL_BYTES);
>> but with some obfuscation.
>
> But I cannot assume any endianess on neither host or guest. The only thing I can
> assume is that the generic device code handling the reads will read from lower
> to higher addresses.

Since this is your device, you can specify that the device works only
in little endian, like most if not all PCI devices. Then you can use
le64_to_cpu().

> In any case, take me with a grain of salt, endianess often confuses me.
>
>
>> It would be much clearer if the registers were byte arrays so you
>> could read and write the data directly without pointer arithmetic.
>
> Is that something present on the device API? Sorry I don't know what you mean by
> byte array... for me 'resb' already is a byte array :)

I meant that instead of
    uint64_t size;
    uint64_t cmd;

you'd have
    uint8_t size[8];
    uint8_t cmd[8];

>> Byte accesses will be slower than larger word size accesses, I thought
>> performance was one of the goals with this?
>
> They will be slower on host time, but will not waste "guest time".
>
> BTW, will the current scheme in KVM provoke one VM exit for each byte or only
> one for the whole 64bits?
>
> But yes, I was just too lazy to add code for all the supported sizes from 1 to
> 8, and let the generic device code pick the best.
>
>
> Lluis
>
> --
>  "And it's much the same thing with knowledge, for whenever you learn
>  something new, the whole world becomes that much richer."
>  -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
>  Tollbooth
>

Patch

diff --git a/Makefile.objs b/Makefile.objs
index d39074d..5f54d10 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -398,6 +398,7 @@  $(trace-obj-y): $(GENERATED_HEADERS)
 # backdoor
 
 backdoor-nested-$(CONFIG_USER_ONLY) += user.o
+backdoor-nested-$(CONFIG_SOFTMMU) += softmmu.o
 
 backdoor-obj-y += $(addprefix backdoor/qemu/, $(backdoor-nested-y))
 
diff --git a/backdoor/qemu/softmmu.c b/backdoor/qemu/softmmu.c
new file mode 100644
index 0000000..fdd3a25
--- /dev/null
+++ b/backdoor/qemu/softmmu.c
@@ -0,0 +1,124 @@ 
+/*
+ * QEMU-side management of backdoor channels in softmmu emulation.
+ *
+ * Copyright (C) 2011 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hw/pci.h"
+#include "backdoor/qemu/qemu-backdoor.h"
+
+
+#define PAGE_SIZE TARGET_PAGE_SIZE
+#define CTRL_BYTES sizeof(uint64_t)
+
+
+typedef struct State
+{
+    PCIDevice dev;
+
+    uint8_t pages;
+    uint64_t size;
+
+    uint64_t cmd;
+
+    void *data_ptr;
+    MemoryRegion data;
+    MemoryRegion control;
+} State;
+
+
+static uint64_t control_io_read(void *opaque, target_phys_addr_t addr, unsigned size)
+{
+    State *s = opaque;
+
+    uint64_t res = ldq_p(&s->size);
+    uint8_t *resb = (uint8_t*)&res;
+    return resb[addr % CTRL_BYTES];
+}
+
+static void control_io_write(void *opaque, target_phys_addr_t addr, uint64_t data, unsigned size)
+{
+    State *s = opaque;
+
+    uint8_t *cmdb = (uint8_t*)&s->cmd;
+    cmdb[addr % CTRL_BYTES] = (uint8_t)data;
+
+    if ((addr + size) % CTRL_BYTES == 0) {
+        qemu_backdoor(ldq_p(&s->cmd), s->data_ptr);
+    }
+}
+
+static const MemoryRegionOps control_ops = {
+    .read = control_io_read,
+    .write = control_io_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
+
+static int init(PCIDevice *dev)
+{
+    State *s = DO_UPCAST(State, dev, dev);
+
+    if (s->pages < 1) {
+        fprintf(stderr, "error: backdoor: "
+                "the data channel must have one or more pages\n");
+        return -1;
+    }
+    s->size = s->pages * PAGE_SIZE;
+
+    pci_set_word(s->dev.config + PCI_COMMAND,
+                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+
+    memory_region_init_io(&s->control, &control_ops, s, "backdoor.control",
+                          PAGE_SIZE);
+    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->control);
+
+    memory_region_init_ram(&s->data, &s->dev.qdev, "backdoor.data",
+                           s->size);
+    pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->data);
+    s->data_ptr = qemu_get_ram_ptr(s->data.ram_addr);
+
+    qemu_backdoor_init(s->size);
+
+    return 0;
+}
+
+static int fini(PCIDevice *dev)
+{
+    State *s = DO_UPCAST(State, dev, dev);
+
+    memory_region_destroy(&s->data);
+    memory_region_destroy(&s->control);
+
+    return 0;
+}
+
+
+static PCIDeviceInfo info = {
+    .qdev.name  = "backdoor",
+    .qdev.desc  = "Backdoor communication channel",
+    .qdev.size  = sizeof(State),
+    .init       = init,
+    .exit       = fini,
+    .vendor_id  = PCI_VENDOR_ID_REDHAT_QUMRANET,
+    .device_id  = PCI_DEVICE_ID_BACKDOOR,
+    .class_id   = PCI_CLASS_MEMORY_RAM,
+    .qdev.props = (Property[]) {
+        DEFINE_PROP_UINT8("pages", State, pages, 1),
+        DEFINE_PROP_END_OF_LIST(),
+    }
+};
+
+static void register_device(void)
+{
+    pci_qdev_register(&info);
+}
+
+device_init(register_device)
diff --git a/hw/pci.h b/hw/pci.h
index 86a81c8..4d7d161 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -75,6 +75,7 @@ 
 #define PCI_DEVICE_ID_VIRTIO_BLOCK       0x1001
 #define PCI_DEVICE_ID_VIRTIO_BALLOON     0x1002
 #define PCI_DEVICE_ID_VIRTIO_CONSOLE     0x1003
+#define PCI_DEVICE_ID_BACKDOOR           0x1004
 
 #define FMT_PCIBUS                      PRIx64