diff mbox

[v4,03/11] dataplane: add host memory mapping code

Message ID 1353597412-12232-4-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi Nov. 22, 2012, 3:16 p.m. UTC
The data plane thread needs to map guest physical addresses to host
pointers.  Normally this is done with cpu_physical_memory_map() but the
function assumes the global mutex is held.  The data plane thread does
not touch the global mutex and therefore needs a thread-safe memory
mapping mechanism.

Hostmem registers a MemoryListener similar to how vhost collects and
pushes memory region information into the kernel.  There is a
fine-grained lock on the regions list which is held during lookup and
when installing a new regions list.

When the physical memory map changes the MemoryListener callbacks are
invoked.  They build up a new list of memory regions which is finally
installed when the list has been completed.

Note that this approach is not safe across memory hotplug because mapped
pointers may still be in used across memory unplug.  However, this is
currently a problem for QEMU in general and needs to be addressed in the
future.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/dataplane/Makefile.objs |   3 +
 hw/dataplane/hostmem.c     | 165 +++++++++++++++++++++++++++++++++++++++++++++
 hw/dataplane/hostmem.h     |  52 ++++++++++++++
 3 files changed, 220 insertions(+)
 create mode 100644 hw/dataplane/Makefile.objs
 create mode 100644 hw/dataplane/hostmem.c
 create mode 100644 hw/dataplane/hostmem.h

Comments

Michael S. Tsirkin Nov. 29, 2012, 12:33 p.m. UTC | #1
On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> The data plane thread needs to map guest physical addresses to host
> pointers.  Normally this is done with cpu_physical_memory_map() but the
> function assumes the global mutex is held.  The data plane thread does
> not touch the global mutex and therefore needs a thread-safe memory
> mapping mechanism.
> 
> Hostmem registers a MemoryListener similar to how vhost collects and
> pushes memory region information into the kernel.  There is a
> fine-grained lock on the regions list which is held during lookup and
> when installing a new regions list.
> 
> When the physical memory map changes the MemoryListener callbacks are
> invoked.  They build up a new list of memory regions which is finally
> installed when the list has been completed.
> 
> Note that this approach is not safe across memory hotplug because mapped
> pointers may still be in used across memory unplug.  However, this is
> currently a problem for QEMU in general and needs to be addressed in the
> future.

Sounds like a serious problem.
I'm not sure I understand - do you say this currently a problem for QEMU
virtio? Coul you give an example please?

> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

> ---
>  hw/dataplane/Makefile.objs |   3 +
>  hw/dataplane/hostmem.c     | 165 +++++++++++++++++++++++++++++++++++++++++++++
>  hw/dataplane/hostmem.h     |  52 ++++++++++++++
>  3 files changed, 220 insertions(+)
>  create mode 100644 hw/dataplane/Makefile.objs
>  create mode 100644 hw/dataplane/hostmem.c
>  create mode 100644 hw/dataplane/hostmem.h
> 
> diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
> new file mode 100644
> index 0000000..8c8dea1
> --- /dev/null
> +++ b/hw/dataplane/Makefile.objs
> @@ -0,0 +1,3 @@
> +ifeq ($(CONFIG_VIRTIO), y)
> +common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o
> +endif
> diff --git a/hw/dataplane/hostmem.c b/hw/dataplane/hostmem.c
> new file mode 100644
> index 0000000..48aabf0
> --- /dev/null
> +++ b/hw/dataplane/hostmem.c
> @@ -0,0 +1,165 @@
> +/*
> + * Thread-safe guest to host memory mapping
> + *
> + * Copyright 2012 Red Hat, Inc. and/or its affiliates
> + *
> + * Authors:
> + *   Stefan Hajnoczi <stefanha@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "exec-memory.h"
> +#include "hostmem.h"
> +
> +static int hostmem_lookup_cmp(const void *phys_, const void *region_)
> +{
> +    hwaddr phys = *(const hwaddr *)phys_;
> +    const HostmemRegion *region = region_;
> +
> +    if (phys < region->guest_addr) {
> +        return -1;
> +    } else if (phys >= region->guest_addr + region->size) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }
> +}
> +
> +/**
> + * Map guest physical address to host pointer
> + */
> +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write)
> +{
> +    HostmemRegion *region;
> +    void *host_addr = NULL;
> +    hwaddr offset_within_region;
> +
> +    qemu_mutex_lock(&hostmem->current_regions_lock);
> +    region = bsearch(&phys, hostmem->current_regions,
> +                     hostmem->num_current_regions,
> +                     sizeof(hostmem->current_regions[0]),
> +                     hostmem_lookup_cmp);
> +    if (!region) {
> +        goto out;
> +    }
> +    if (is_write && region->readonly) {
> +        goto out;
> +    }
> +    offset_within_region = phys - region->guest_addr;
> +    if (offset_within_region + len <= region->size) {
> +        host_addr = region->host_addr + offset_within_region;
> +    }
> +out:
> +    qemu_mutex_unlock(&hostmem->current_regions_lock);
> +
> +    return host_addr;
> +}
> +
> +/**
> + * Install new regions list
> + */
> +static void hostmem_listener_commit(MemoryListener *listener)
> +{
> +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> +
> +    qemu_mutex_lock(&hostmem->current_regions_lock);
> +    g_free(hostmem->current_regions);
> +    hostmem->current_regions = hostmem->new_regions;
> +    hostmem->num_current_regions = hostmem->num_new_regions;
> +    qemu_mutex_unlock(&hostmem->current_regions_lock);
> +
> +    /* Reset new regions list */
> +    hostmem->new_regions = NULL;
> +    hostmem->num_new_regions = 0;
> +}
> +
> +/**
> + * Add a MemoryRegionSection to the new regions list
> + */
> +static void hostmem_append_new_region(Hostmem *hostmem,
> +                                      MemoryRegionSection *section)
> +{
> +    void *ram_ptr = memory_region_get_ram_ptr(section->mr);
> +    size_t num = hostmem->num_new_regions;
> +    size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
> +
> +    hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
> +    hostmem->new_regions[num] = (HostmemRegion){
> +        .host_addr = ram_ptr + section->offset_within_region,
> +        .guest_addr = section->offset_within_address_space,
> +        .size = section->size,
> +        .readonly = section->readonly,
> +    };
> +    hostmem->num_new_regions++;
> +}
> +
> +static void hostmem_listener_append_region(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> +
> +    if (memory_region_is_ram(section->mr)) {
> +        hostmem_append_new_region(hostmem, section);
> +    }
> +}
> +
> +/* We don't implement most MemoryListener callbacks, use these nop stubs */
> +static void hostmem_listener_dummy(MemoryListener *listener)
> +{
> +}
> +
> +static void hostmem_listener_section_dummy(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +}
> +
> +static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
> +                                           MemoryRegionSection *section,
> +                                           bool match_data, uint64_t data,
> +                                           EventNotifier *e)
> +{
> +}
> +
> +static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
> +                                                  MemoryRegionSection *section,
> +                                                  hwaddr addr, hwaddr len)
> +{
> +}
> +
> +void hostmem_init(Hostmem *hostmem)
> +{
> +    memset(hostmem, 0, sizeof(*hostmem));
> +
> +    hostmem->listener = (MemoryListener){
> +        .begin = hostmem_listener_dummy,
> +        .commit = hostmem_listener_commit,
> +        .region_add = hostmem_listener_append_region,
> +        .region_del = hostmem_listener_section_dummy,
> +        .region_nop = hostmem_listener_append_region,
> +        .log_start = hostmem_listener_section_dummy,
> +        .log_stop = hostmem_listener_section_dummy,
> +        .log_sync = hostmem_listener_section_dummy,
> +        .log_global_start = hostmem_listener_dummy,
> +        .log_global_stop = hostmem_listener_dummy,
> +        .eventfd_add = hostmem_listener_eventfd_dummy,
> +        .eventfd_del = hostmem_listener_eventfd_dummy,
> +        .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
> +        .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
> +        .priority = 10,
> +    };
> +
> +    memory_listener_register(&hostmem->listener, &address_space_memory);
> +    if (hostmem->num_new_regions > 0) {
> +        hostmem_listener_commit(&hostmem->listener);
> +    }
> +}
> +
> +void hostmem_finalize(Hostmem *hostmem)
> +{
> +    memory_listener_unregister(&hostmem->listener);
> +    g_free(hostmem->new_regions);
> +    g_free(hostmem->current_regions);
> +}
> diff --git a/hw/dataplane/hostmem.h b/hw/dataplane/hostmem.h
> new file mode 100644
> index 0000000..a833b74
> --- /dev/null
> +++ b/hw/dataplane/hostmem.h
> @@ -0,0 +1,52 @@
> +/*
> + * Thread-safe guest to host memory mapping
> + *
> + * Copyright 2012 Red Hat, Inc. and/or its affiliates
> + *
> + * Authors:
> + *   Stefan Hajnoczi <stefanha@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef HOSTMEM_H
> +#define HOSTMEM_H
> +
> +#include "memory.h"
> +#include "qemu-thread.h"
> +
> +typedef struct {
> +    void *host_addr;
> +    hwaddr guest_addr;
> +    uint64_t size;
> +    bool readonly;
> +} HostmemRegion;
> +
> +typedef struct {
> +    /* The listener is invoked when regions change and a new list of regions is
> +     * built up completely before they are installed.
> +     */
> +    MemoryListener listener;
> +    HostmemRegion *new_regions;
> +    size_t num_new_regions;
> +
> +    /* Current regions are accessed from multiple threads either to lookup
> +     * addresses or to install a new list of regions.  The lock protects the
> +     * pointer and the regions.
> +     */
> +    QemuMutex current_regions_lock;
> +    HostmemRegion *current_regions;
> +    size_t num_current_regions;
> +} Hostmem;
> +
> +void hostmem_init(Hostmem *hostmem);
> +void hostmem_finalize(Hostmem *hostmem);
> +
> +/**
> + * Map a guest physical address to a pointer
> + */
> +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write);
> +
> +#endif /* HOSTMEM_H */
> -- 
> 1.8.0
Stefan Hajnoczi Nov. 29, 2012, 12:45 p.m. UTC | #2
On Thu, Nov 29, 2012 at 02:33:11PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> > The data plane thread needs to map guest physical addresses to host
> > pointers.  Normally this is done with cpu_physical_memory_map() but the
> > function assumes the global mutex is held.  The data plane thread does
> > not touch the global mutex and therefore needs a thread-safe memory
> > mapping mechanism.
> > 
> > Hostmem registers a MemoryListener similar to how vhost collects and
> > pushes memory region information into the kernel.  There is a
> > fine-grained lock on the regions list which is held during lookup and
> > when installing a new regions list.
> > 
> > When the physical memory map changes the MemoryListener callbacks are
> > invoked.  They build up a new list of memory regions which is finally
> > installed when the list has been completed.
> > 
> > Note that this approach is not safe across memory hotplug because mapped
> > pointers may still be in used across memory unplug.  However, this is
> > currently a problem for QEMU in general and needs to be addressed in the
> > future.
> 
> Sounds like a serious problem.
> I'm not sure I understand - do you say this currently a problem for QEMU
> virtio? Coul you give an example please?

This is a limitation of the memory API but cannot be triggered by users
today since we don't support memory hot unplug.  I'm just explaining
that virtio-blk-data-plane has the same issue as hw/virtio-blk.c or any
other device emulation code here.

Some more detail:

The issue is that hw/virtio-blk.c submits an asynchronous I/O request on
the host with the guest buffer.  Then virtio-blk emulation returns back
to the caller and continues QEMU execution.

It is unsafe to unplug memory while the I/O request is pending since
there's no mechanism (e.g. refcount) to wait until the guest memory is
no longer in use.

This is a known issue.  There's no way to trigger a problem today but we
need to eventually enhance QEMU's memory API to handle this case.

Stefan
Michael S. Tsirkin Nov. 29, 2012, 12:54 p.m. UTC | #3
On Thu, Nov 29, 2012 at 01:45:19PM +0100, Stefan Hajnoczi wrote:
> On Thu, Nov 29, 2012 at 02:33:11PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> > > The data plane thread needs to map guest physical addresses to host
> > > pointers.  Normally this is done with cpu_physical_memory_map() but the
> > > function assumes the global mutex is held.  The data plane thread does
> > > not touch the global mutex and therefore needs a thread-safe memory
> > > mapping mechanism.
> > > 
> > > Hostmem registers a MemoryListener similar to how vhost collects and
> > > pushes memory region information into the kernel.  There is a
> > > fine-grained lock on the regions list which is held during lookup and
> > > when installing a new regions list.
> > > 
> > > When the physical memory map changes the MemoryListener callbacks are
> > > invoked.  They build up a new list of memory regions which is finally
> > > installed when the list has been completed.
> > > 
> > > Note that this approach is not safe across memory hotplug because mapped
> > > pointers may still be in used across memory unplug.  However, this is
> > > currently a problem for QEMU in general and needs to be addressed in the
> > > future.
> > 
> > Sounds like a serious problem.
> > I'm not sure I understand - do you say this currently a problem for QEMU
> > virtio? Coul you give an example please?
> 
> This is a limitation of the memory API but cannot be triggered by users
> today since we don't support memory hot unplug.  I'm just explaining
> that virtio-blk-data-plane has the same issue as hw/virtio-blk.c or any
> other device emulation code here.
> 
> Some more detail:
> 
> The issue is that hw/virtio-blk.c submits an asynchronous I/O request on
> the host with the guest buffer.  Then virtio-blk emulation returns back
> to the caller and continues QEMU execution.
> 
> It is unsafe to unplug memory while the I/O request is pending since
> there's no mechanism (e.g. refcount) to wait until the guest memory is
> no longer in use.
> 
> This is a known issue.  There's no way to trigger a problem today but we
> need to eventually enhance QEMU's memory API to handle this case.
> 
> Stefan

For this problem we would simply need to flush outstanding aio
before freeing memory for unplug, no refcount necessary.

This patch however introduces the issue in the frontend
and it looks like there won't be any way to solve
it without changing the API.
Michael S. Tsirkin Nov. 29, 2012, 12:57 p.m. UTC | #4
On Thu, Nov 29, 2012 at 02:54:26PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 29, 2012 at 01:45:19PM +0100, Stefan Hajnoczi wrote:
> > On Thu, Nov 29, 2012 at 02:33:11PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> > > > The data plane thread needs to map guest physical addresses to host
> > > > pointers.  Normally this is done with cpu_physical_memory_map() but the
> > > > function assumes the global mutex is held.  The data plane thread does
> > > > not touch the global mutex and therefore needs a thread-safe memory
> > > > mapping mechanism.
> > > > 
> > > > Hostmem registers a MemoryListener similar to how vhost collects and
> > > > pushes memory region information into the kernel.  There is a
> > > > fine-grained lock on the regions list which is held during lookup and
> > > > when installing a new regions list.
> > > > 
> > > > When the physical memory map changes the MemoryListener callbacks are
> > > > invoked.  They build up a new list of memory regions which is finally
> > > > installed when the list has been completed.
> > > > 
> > > > Note that this approach is not safe across memory hotplug because mapped
> > > > pointers may still be in used across memory unplug.  However, this is
> > > > currently a problem for QEMU in general and needs to be addressed in the
> > > > future.
> > > 
> > > Sounds like a serious problem.
> > > I'm not sure I understand - do you say this currently a problem for QEMU
> > > virtio? Coul you give an example please?
> > 
> > This is a limitation of the memory API but cannot be triggered by users
> > today since we don't support memory hot unplug.  I'm just explaining
> > that virtio-blk-data-plane has the same issue as hw/virtio-blk.c or any
> > other device emulation code here.
> > 
> > Some more detail:
> > 
> > The issue is that hw/virtio-blk.c submits an asynchronous I/O request on
> > the host with the guest buffer.  Then virtio-blk emulation returns back
> > to the caller and continues QEMU execution.
> > 
> > It is unsafe to unplug memory while the I/O request is pending since
> > there's no mechanism (e.g. refcount) to wait until the guest memory is
> > no longer in use.
> > 
> > This is a known issue.  There's no way to trigger a problem today but we
> > need to eventually enhance QEMU's memory API to handle this case.
> > 
> > Stefan
> 
> For this problem we would simply need to flush outstanding aio
> before freeing memory for unplug, no refcount necessary.
> 
> This patch however introduces the issue in the frontend
> and it looks like there won't be any way to solve
> it without changing the API.

To clarify, as you say it is not triggerable
so I don't think this is strictly required to address
this at this point though it should not be too hard:
just register callback that flushes the frontend processing.

But if you can't code it at this point, please add
a TODO comment in code.

> -- 
> MST
Michael S. Tsirkin Nov. 29, 2012, 1:54 p.m. UTC | #5
On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> The data plane thread needs to map guest physical addresses to host
> pointers.  Normally this is done with cpu_physical_memory_map() but the
> function assumes the global mutex is held.  The data plane thread does
> not touch the global mutex and therefore needs a thread-safe memory
> mapping mechanism.
> 
> Hostmem registers a MemoryListener similar to how vhost collects and
> pushes memory region information into the kernel.  There is a
> fine-grained lock on the regions list which is held during lookup and
> when installing a new regions list.
> 
> When the physical memory map changes the MemoryListener callbacks are
> invoked.  They build up a new list of memory regions which is finally
> installed when the list has been completed.
> 
> Note that this approach is not safe across memory hotplug because mapped
> pointers may still be in used across memory unplug.  However, this is
> currently a problem for QEMU in general and needs to be addressed in the
> future.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

Worth bothering with binary search?
vhost does a linear search over regions because
the number of ram regions is very small.

> ---
>  hw/dataplane/Makefile.objs |   3 +
>  hw/dataplane/hostmem.c     | 165 +++++++++++++++++++++++++++++++++++++++++++++
>  hw/dataplane/hostmem.h     |  52 ++++++++++++++
>  3 files changed, 220 insertions(+)
>  create mode 100644 hw/dataplane/Makefile.objs
>  create mode 100644 hw/dataplane/hostmem.c
>  create mode 100644 hw/dataplane/hostmem.h
> 
> diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
> new file mode 100644
> index 0000000..8c8dea1
> --- /dev/null
> +++ b/hw/dataplane/Makefile.objs
> @@ -0,0 +1,3 @@
> +ifeq ($(CONFIG_VIRTIO), y)
> +common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o
> +endif
> diff --git a/hw/dataplane/hostmem.c b/hw/dataplane/hostmem.c
> new file mode 100644
> index 0000000..48aabf0
> --- /dev/null
> +++ b/hw/dataplane/hostmem.c
> @@ -0,0 +1,165 @@
> +/*
> + * Thread-safe guest to host memory mapping
> + *
> + * Copyright 2012 Red Hat, Inc. and/or its affiliates
> + *
> + * Authors:
> + *   Stefan Hajnoczi <stefanha@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "exec-memory.h"
> +#include "hostmem.h"
> +
> +static int hostmem_lookup_cmp(const void *phys_, const void *region_)
> +{
> +    hwaddr phys = *(const hwaddr *)phys_;
> +    const HostmemRegion *region = region_;
> +
> +    if (phys < region->guest_addr) {
> +        return -1;
> +    } else if (phys >= region->guest_addr + region->size) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }
> +}
> +
> +/**
> + * Map guest physical address to host pointer
> + */
> +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write)
> +{
> +    HostmemRegion *region;
> +    void *host_addr = NULL;
> +    hwaddr offset_within_region;
> +
> +    qemu_mutex_lock(&hostmem->current_regions_lock);
> +    region = bsearch(&phys, hostmem->current_regions,
> +                     hostmem->num_current_regions,
> +                     sizeof(hostmem->current_regions[0]),
> +                     hostmem_lookup_cmp);
> +    if (!region) {
> +        goto out;
> +    }
> +    if (is_write && region->readonly) {
> +        goto out;
> +    }
> +    offset_within_region = phys - region->guest_addr;
> +    if (offset_within_region + len <= region->size) {
> +        host_addr = region->host_addr + offset_within_region;
> +    }
> +out:
> +    qemu_mutex_unlock(&hostmem->current_regions_lock);
> +
> +    return host_addr;
> +}
> +
> +/**
> + * Install new regions list
> + */
> +static void hostmem_listener_commit(MemoryListener *listener)
> +{
> +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> +
> +    qemu_mutex_lock(&hostmem->current_regions_lock);
> +    g_free(hostmem->current_regions);
> +    hostmem->current_regions = hostmem->new_regions;
> +    hostmem->num_current_regions = hostmem->num_new_regions;
> +    qemu_mutex_unlock(&hostmem->current_regions_lock);
> +
> +    /* Reset new regions list */
> +    hostmem->new_regions = NULL;
> +    hostmem->num_new_regions = 0;
> +}
> +
> +/**
> + * Add a MemoryRegionSection to the new regions list
> + */
> +static void hostmem_append_new_region(Hostmem *hostmem,
> +                                      MemoryRegionSection *section)
> +{
> +    void *ram_ptr = memory_region_get_ram_ptr(section->mr);
> +    size_t num = hostmem->num_new_regions;
> +    size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
> +
> +    hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
> +    hostmem->new_regions[num] = (HostmemRegion){
> +        .host_addr = ram_ptr + section->offset_within_region,
> +        .guest_addr = section->offset_within_address_space,
> +        .size = section->size,
> +        .readonly = section->readonly,
> +    };
> +    hostmem->num_new_regions++;
> +}
> +
> +static void hostmem_listener_append_region(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> +
> +    if (memory_region_is_ram(section->mr)) {
> +        hostmem_append_new_region(hostmem, section);
> +    }

I think you also need to remove VGA region since you
don't mark these pages as dirty so access there won't work.

> +}
> +
> +/* We don't implement most MemoryListener callbacks, use these nop stubs */
> +static void hostmem_listener_dummy(MemoryListener *listener)
> +{
> +}
> +
> +static void hostmem_listener_section_dummy(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +}
> +
> +static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
> +                                           MemoryRegionSection *section,
> +                                           bool match_data, uint64_t data,
> +                                           EventNotifier *e)
> +{
> +}
> +
> +static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
> +                                                  MemoryRegionSection *section,
> +                                                  hwaddr addr, hwaddr len)
> +{
> +}
> +
> +void hostmem_init(Hostmem *hostmem)
> +{
> +    memset(hostmem, 0, sizeof(*hostmem));
> +
> +    hostmem->listener = (MemoryListener){
> +        .begin = hostmem_listener_dummy,
> +        .commit = hostmem_listener_commit,
> +        .region_add = hostmem_listener_append_region,
> +        .region_del = hostmem_listener_section_dummy,
> +        .region_nop = hostmem_listener_append_region,
> +        .log_start = hostmem_listener_section_dummy,
> +        .log_stop = hostmem_listener_section_dummy,
> +        .log_sync = hostmem_listener_section_dummy,
> +        .log_global_start = hostmem_listener_dummy,
> +        .log_global_stop = hostmem_listener_dummy,
> +        .eventfd_add = hostmem_listener_eventfd_dummy,
> +        .eventfd_del = hostmem_listener_eventfd_dummy,
> +        .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
> +        .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
> +        .priority = 10,
> +    };
> +
> +    memory_listener_register(&hostmem->listener, &address_space_memory);
> +    if (hostmem->num_new_regions > 0) {
> +        hostmem_listener_commit(&hostmem->listener);
> +    }
> +}
> +
> +void hostmem_finalize(Hostmem *hostmem)
> +{
> +    memory_listener_unregister(&hostmem->listener);
> +    g_free(hostmem->new_regions);
> +    g_free(hostmem->current_regions);
> +}
> diff --git a/hw/dataplane/hostmem.h b/hw/dataplane/hostmem.h
> new file mode 100644
> index 0000000..a833b74
> --- /dev/null
> +++ b/hw/dataplane/hostmem.h
> @@ -0,0 +1,52 @@
> +/*
> + * Thread-safe guest to host memory mapping
> + *
> + * Copyright 2012 Red Hat, Inc. and/or its affiliates
> + *
> + * Authors:
> + *   Stefan Hajnoczi <stefanha@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef HOSTMEM_H
> +#define HOSTMEM_H
> +
> +#include "memory.h"
> +#include "qemu-thread.h"
> +
> +typedef struct {
> +    void *host_addr;
> +    hwaddr guest_addr;
> +    uint64_t size;
> +    bool readonly;
> +} HostmemRegion;
> +
> +typedef struct {
> +    /* The listener is invoked when regions change and a new list of regions is
> +     * built up completely before they are installed.
> +     */
> +    MemoryListener listener;
> +    HostmemRegion *new_regions;
> +    size_t num_new_regions;
> +
> +    /* Current regions are accessed from multiple threads either to lookup
> +     * addresses or to install a new list of regions.  The lock protects the
> +     * pointer and the regions.
> +     */
> +    QemuMutex current_regions_lock;
> +    HostmemRegion *current_regions;
> +    size_t num_current_regions;
> +} Hostmem;
> +
> +void hostmem_init(Hostmem *hostmem);
> +void hostmem_finalize(Hostmem *hostmem);
> +
> +/**
> + * Map a guest physical address to a pointer
> + */
> +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write);
> +
> +#endif /* HOSTMEM_H */
> -- 
> 1.8.0
Stefan Hajnoczi Nov. 29, 2012, 2:26 p.m. UTC | #6
On Thu, Nov 29, 2012 at 03:54:25PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> > The data plane thread needs to map guest physical addresses to host
> > pointers.  Normally this is done with cpu_physical_memory_map() but the
> > function assumes the global mutex is held.  The data plane thread does
> > not touch the global mutex and therefore needs a thread-safe memory
> > mapping mechanism.
> > 
> > Hostmem registers a MemoryListener similar to how vhost collects and
> > pushes memory region information into the kernel.  There is a
> > fine-grained lock on the regions list which is held during lookup and
> > when installing a new regions list.
> > 
> > When the physical memory map changes the MemoryListener callbacks are
> > invoked.  They build up a new list of memory regions which is finally
> > installed when the list has been completed.
> > 
> > Note that this approach is not safe across memory hotplug because mapped
> > pointers may still be in used across memory unplug.  However, this is
> > currently a problem for QEMU in general and needs to be addressed in the
> > future.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> 
> Worth bothering with binary search?
> vhost does a linear search over regions because
> the number of ram regions is very small.

memory.c does binary search.  I did the same but in practice there are
<20 regions for a simple VM.  It's probably not worth it but without
performance results this is speculation.

I think there's no harm in using binary search to start with.

> > +static void hostmem_listener_append_region(MemoryListener *listener,
> > +                                           MemoryRegionSection *section)
> > +{
> > +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> > +
> > +    if (memory_region_is_ram(section->mr)) {
> > +        hostmem_append_new_region(hostmem, section);
> > +    }
> 
> I think you also need to remove VGA region since you
> don't mark these pages as dirty so access there won't work.

I don't understand.  If memory in the VGA region returns true from
memory_region_is_ram(), why would there be a problem?
Michael S. Tsirkin Nov. 29, 2012, 2:36 p.m. UTC | #7
On Thu, Nov 29, 2012 at 03:26:56PM +0100, Stefan Hajnoczi wrote:
> On Thu, Nov 29, 2012 at 03:54:25PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> > > The data plane thread needs to map guest physical addresses to host
> > > pointers.  Normally this is done with cpu_physical_memory_map() but the
> > > function assumes the global mutex is held.  The data plane thread does
> > > not touch the global mutex and therefore needs a thread-safe memory
> > > mapping mechanism.
> > > 
> > > Hostmem registers a MemoryListener similar to how vhost collects and
> > > pushes memory region information into the kernel.  There is a
> > > fine-grained lock on the regions list which is held during lookup and
> > > when installing a new regions list.
> > > 
> > > When the physical memory map changes the MemoryListener callbacks are
> > > invoked.  They build up a new list of memory regions which is finally
> > > installed when the list has been completed.
> > > 
> > > Note that this approach is not safe across memory hotplug because mapped
> > > pointers may still be in used across memory unplug.  However, this is
> > > currently a problem for QEMU in general and needs to be addressed in the
> > > future.
> > > 
> > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > 
> > Worth bothering with binary search?
> > vhost does a linear search over regions because
> > the number of ram regions is very small.
> 
> memory.c does binary search.  I did the same but in practice there are
> <20 regions for a simple VM.  It's probably not worth it but without
> performance results this is speculation.
> 
> I think there's no harm in using binary search to start with.
> 
> > > +static void hostmem_listener_append_region(MemoryListener *listener,
> > > +                                           MemoryRegionSection *section)
> > > +{
> > > +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> > > +
> > > +    if (memory_region_is_ram(section->mr)) {
> > > +        hostmem_append_new_region(hostmem, section);
> > > +    }
> > 
> > I think you also need to remove VGA region since you
> > don't mark these pages as dirty so access there won't work.
> 
> I don't understand.  If memory in the VGA region returns true from
> memory_region_is_ram(), why would there be a problem?

If you change this memory but you don't update the display.
Never happens with non buggy guests but we should catch and fail if it does.
Paolo Bonzini Nov. 29, 2012, 3:26 p.m. UTC | #8
> > I don't understand.  If memory in the VGA region returns true from
> > memory_region_is_ram(), why would there be a problem?
> 
> If you change this memory but you don't update the display.
> Never happens with non buggy guests but we should catch and fail if
> it does.

Actually it _could_ happen with an MS-DOS guest that does the oh-so-retro

    DEF SEG=&HB800
    BLOAD "file.pic", 0

(I think).

Paolo
Stefan Hajnoczi Dec. 5, 2012, 8:13 a.m. UTC | #9
On Thu, Nov 29, 2012 at 02:57:05PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 29, 2012 at 02:54:26PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 29, 2012 at 01:45:19PM +0100, Stefan Hajnoczi wrote:
> > > On Thu, Nov 29, 2012 at 02:33:11PM +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> > > > > The data plane thread needs to map guest physical addresses to host
> > > > > pointers.  Normally this is done with cpu_physical_memory_map() but the
> > > > > function assumes the global mutex is held.  The data plane thread does
> > > > > not touch the global mutex and therefore needs a thread-safe memory
> > > > > mapping mechanism.
> > > > > 
> > > > > Hostmem registers a MemoryListener similar to how vhost collects and
> > > > > pushes memory region information into the kernel.  There is a
> > > > > fine-grained lock on the regions list which is held during lookup and
> > > > > when installing a new regions list.
> > > > > 
> > > > > When the physical memory map changes the MemoryListener callbacks are
> > > > > invoked.  They build up a new list of memory regions which is finally
> > > > > installed when the list has been completed.
> > > > > 
> > > > > Note that this approach is not safe across memory hotplug because mapped
> > > > > pointers may still be in used across memory unplug.  However, this is
> > > > > currently a problem for QEMU in general and needs to be addressed in the
> > > > > future.
> > > > 
> > > > Sounds like a serious problem.
> > > > I'm not sure I understand - do you say this currently a problem for QEMU
> > > > virtio? Coul you give an example please?
> > > 
> > > This is a limitation of the memory API but cannot be triggered by users
> > > today since we don't support memory hot unplug.  I'm just explaining
> > > that virtio-blk-data-plane has the same issue as hw/virtio-blk.c or any
> > > other device emulation code here.
> > > 
> > > Some more detail:
> > > 
> > > The issue is that hw/virtio-blk.c submits an asynchronous I/O request on
> > > the host with the guest buffer.  Then virtio-blk emulation returns back
> > > to the caller and continues QEMU execution.
> > > 
> > > It is unsafe to unplug memory while the I/O request is pending since
> > > there's no mechanism (e.g. refcount) to wait until the guest memory is
> > > no longer in use.
> > > 
> > > This is a known issue.  There's no way to trigger a problem today but we
> > > need to eventually enhance QEMU's memory API to handle this case.
> > > 
> > > Stefan
> > 
> > For this problem we would simply need to flush outstanding aio
> > before freeing memory for unplug, no refcount necessary.
> > 
> > This patch however introduces the issue in the frontend
> > and it looks like there won't be any way to solve
> > it without changing the API.
> 
> To clarify, as you say it is not triggerable
> so I don't think this is strictly required to address
> this at this point though it should not be too hard:
> just register callback that flushes the frontend processing.
> 
> But if you can't code it at this point, please add
> a TODO comment in code.

Yes, I'm adding a TODO and your suggestion to flush the frontend sounds
like a simple solution - we already quiesce at other critical points
like live migration.

Stefan
Stefan Hajnoczi Dec. 5, 2012, 8:31 a.m. UTC | #10
On Thu, Nov 29, 2012 at 04:36:08PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 29, 2012 at 03:26:56PM +0100, Stefan Hajnoczi wrote:
> > On Thu, Nov 29, 2012 at 03:54:25PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> > > > The data plane thread needs to map guest physical addresses to host
> > > > pointers.  Normally this is done with cpu_physical_memory_map() but the
> > > > function assumes the global mutex is held.  The data plane thread does
> > > > not touch the global mutex and therefore needs a thread-safe memory
> > > > mapping mechanism.
> > > > 
> > > > Hostmem registers a MemoryListener similar to how vhost collects and
> > > > pushes memory region information into the kernel.  There is a
> > > > fine-grained lock on the regions list which is held during lookup and
> > > > when installing a new regions list.
> > > > 
> > > > When the physical memory map changes the MemoryListener callbacks are
> > > > invoked.  They build up a new list of memory regions which is finally
> > > > installed when the list has been completed.
> > > > 
> > > > Note that this approach is not safe across memory hotplug because mapped
> > > > pointers may still be in used across memory unplug.  However, this is
> > > > currently a problem for QEMU in general and needs to be addressed in the
> > > > future.
> > > > 
> > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > 
> > > Worth bothering with binary search?
> > > vhost does a linear search over regions because
> > > the number of ram regions is very small.
> > 
> > memory.c does binary search.  I did the same but in practice there are
> > <20 regions for a simple VM.  It's probably not worth it but without
> > performance results this is speculation.
> > 
> > I think there's no harm in using binary search to start with.
> > 
> > > > +static void hostmem_listener_append_region(MemoryListener *listener,
> > > > +                                           MemoryRegionSection *section)
> > > > +{
> > > > +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> > > > +
> > > > +    if (memory_region_is_ram(section->mr)) {
> > > > +        hostmem_append_new_region(hostmem, section);
> > > > +    }
> > > 
> > > I think you also need to remove VGA region since you
> > > don't mark these pages as dirty so access there won't work.
> > 
> > I don't understand.  If memory in the VGA region returns true from
> > memory_region_is_ram(), why would there be a problem?
> 
> If you change this memory but you don't update the display.
> Never happens with non buggy guests but we should catch and fail if it does.

Okay, I took a look at the VGA code and I think it makes sense now.  We
have VRAM as a regular RAM region so that writes to it are cheap.  To
avoid scanning or redrawing VRAM on every update we use dirty logging.

Since virtio-blk-data-plane does not mark pages dirty an I/O buffer in
VRAM would fail to update the display correctly.

I will try to put in a check to omit the VGA region.  It can be dropped
in the future when we use the memory API with dirty logging from the
data plane thread.

Stefan
Michael S. Tsirkin Dec. 5, 2012, 11:22 a.m. UTC | #11
On Wed, Dec 05, 2012 at 09:31:56AM +0100, Stefan Hajnoczi wrote:
> On Thu, Nov 29, 2012 at 04:36:08PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 29, 2012 at 03:26:56PM +0100, Stefan Hajnoczi wrote:
> > > On Thu, Nov 29, 2012 at 03:54:25PM +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Nov 22, 2012 at 04:16:44PM +0100, Stefan Hajnoczi wrote:
> > > > > The data plane thread needs to map guest physical addresses to host
> > > > > pointers.  Normally this is done with cpu_physical_memory_map() but the
> > > > > function assumes the global mutex is held.  The data plane thread does
> > > > > not touch the global mutex and therefore needs a thread-safe memory
> > > > > mapping mechanism.
> > > > > 
> > > > > Hostmem registers a MemoryListener similar to how vhost collects and
> > > > > pushes memory region information into the kernel.  There is a
> > > > > fine-grained lock on the regions list which is held during lookup and
> > > > > when installing a new regions list.
> > > > > 
> > > > > When the physical memory map changes the MemoryListener callbacks are
> > > > > invoked.  They build up a new list of memory regions which is finally
> > > > > installed when the list has been completed.
> > > > > 
> > > > > Note that this approach is not safe across memory hotplug because mapped
> > > > > pointers may still be in used across memory unplug.  However, this is
> > > > > currently a problem for QEMU in general and needs to be addressed in the
> > > > > future.
> > > > > 
> > > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > > 
> > > > Worth bothering with binary search?
> > > > vhost does a linear search over regions because
> > > > the number of ram regions is very small.
> > > 
> > > memory.c does binary search.  I did the same but in practice there are
> > > <20 regions for a simple VM.  It's probably not worth it but without
> > > performance results this is speculation.
> > > 
> > > I think there's no harm in using binary search to start with.
> > > 
> > > > > +static void hostmem_listener_append_region(MemoryListener *listener,
> > > > > +                                           MemoryRegionSection *section)
> > > > > +{
> > > > > +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> > > > > +
> > > > > +    if (memory_region_is_ram(section->mr)) {
> > > > > +        hostmem_append_new_region(hostmem, section);
> > > > > +    }
> > > > 
> > > > I think you also need to remove VGA region since you
> > > > don't mark these pages as dirty so access there won't work.
> > > 
> > > I don't understand.  If memory in the VGA region returns true from
> > > memory_region_is_ram(), why would there be a problem?
> > 
> > If you change this memory but you don't update the display.
> > Never happens with non buggy guests but we should catch and fail if it does.
> 
> Okay, I took a look at the VGA code and I think it makes sense now.  We
> have VRAM as a regular RAM region so that writes to it are cheap.  To
> avoid scanning or redrawing VRAM on every update we use dirty logging.
> 
> Since virtio-blk-data-plane does not mark pages dirty an I/O buffer in
> VRAM would fail to update the display correctly.
> 
> I will try to put in a check to omit the VGA region.

There are many ways to do this but I guess the simplest
is to detect dirty logging and skip that region.

>  It can be dropped
> in the future when we use the memory API with dirty logging from the
> data plane thread.
> 
> Stefan

Or not - there's also the issue that e.g. cirrus doing tricks
with memory mapping at data path. So skipping
that region might help performance.
diff mbox

Patch

diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
new file mode 100644
index 0000000..8c8dea1
--- /dev/null
+++ b/hw/dataplane/Makefile.objs
@@ -0,0 +1,3 @@ 
+ifeq ($(CONFIG_VIRTIO), y)
+common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o
+endif
diff --git a/hw/dataplane/hostmem.c b/hw/dataplane/hostmem.c
new file mode 100644
index 0000000..48aabf0
--- /dev/null
+++ b/hw/dataplane/hostmem.c
@@ -0,0 +1,165 @@ 
+/*
+ * Thread-safe guest to host memory mapping
+ *
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "exec-memory.h"
+#include "hostmem.h"
+
+static int hostmem_lookup_cmp(const void *phys_, const void *region_)
+{
+    hwaddr phys = *(const hwaddr *)phys_;
+    const HostmemRegion *region = region_;
+
+    if (phys < region->guest_addr) {
+        return -1;
+    } else if (phys >= region->guest_addr + region->size) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+/**
+ * Map guest physical address to host pointer
+ */
+void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write)
+{
+    HostmemRegion *region;
+    void *host_addr = NULL;
+    hwaddr offset_within_region;
+
+    qemu_mutex_lock(&hostmem->current_regions_lock);
+    region = bsearch(&phys, hostmem->current_regions,
+                     hostmem->num_current_regions,
+                     sizeof(hostmem->current_regions[0]),
+                     hostmem_lookup_cmp);
+    if (!region) {
+        goto out;
+    }
+    if (is_write && region->readonly) {
+        goto out;
+    }
+    offset_within_region = phys - region->guest_addr;
+    if (offset_within_region + len <= region->size) {
+        host_addr = region->host_addr + offset_within_region;
+    }
+out:
+    qemu_mutex_unlock(&hostmem->current_regions_lock);
+
+    return host_addr;
+}
+
+/**
+ * Install new regions list
+ */
+static void hostmem_listener_commit(MemoryListener *listener)
+{
+    Hostmem *hostmem = container_of(listener, Hostmem, listener);
+
+    qemu_mutex_lock(&hostmem->current_regions_lock);
+    g_free(hostmem->current_regions);
+    hostmem->current_regions = hostmem->new_regions;
+    hostmem->num_current_regions = hostmem->num_new_regions;
+    qemu_mutex_unlock(&hostmem->current_regions_lock);
+
+    /* Reset new regions list */
+    hostmem->new_regions = NULL;
+    hostmem->num_new_regions = 0;
+}
+
+/**
+ * Add a MemoryRegionSection to the new regions list
+ */
+static void hostmem_append_new_region(Hostmem *hostmem,
+                                      MemoryRegionSection *section)
+{
+    void *ram_ptr = memory_region_get_ram_ptr(section->mr);
+    size_t num = hostmem->num_new_regions;
+    size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
+
+    hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
+    hostmem->new_regions[num] = (HostmemRegion){
+        .host_addr = ram_ptr + section->offset_within_region,
+        .guest_addr = section->offset_within_address_space,
+        .size = section->size,
+        .readonly = section->readonly,
+    };
+    hostmem->num_new_regions++;
+}
+
+static void hostmem_listener_append_region(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    Hostmem *hostmem = container_of(listener, Hostmem, listener);
+
+    if (memory_region_is_ram(section->mr)) {
+        hostmem_append_new_region(hostmem, section);
+    }
+}
+
+/* We don't implement most MemoryListener callbacks, use these nop stubs */
+static void hostmem_listener_dummy(MemoryListener *listener)
+{
+}
+
+static void hostmem_listener_section_dummy(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+}
+
+static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
+                                           MemoryRegionSection *section,
+                                           bool match_data, uint64_t data,
+                                           EventNotifier *e)
+{
+}
+
+static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
+                                                  MemoryRegionSection *section,
+                                                  hwaddr addr, hwaddr len)
+{
+}
+
+void hostmem_init(Hostmem *hostmem)
+{
+    memset(hostmem, 0, sizeof(*hostmem));
+
+    hostmem->listener = (MemoryListener){
+        .begin = hostmem_listener_dummy,
+        .commit = hostmem_listener_commit,
+        .region_add = hostmem_listener_append_region,
+        .region_del = hostmem_listener_section_dummy,
+        .region_nop = hostmem_listener_append_region,
+        .log_start = hostmem_listener_section_dummy,
+        .log_stop = hostmem_listener_section_dummy,
+        .log_sync = hostmem_listener_section_dummy,
+        .log_global_start = hostmem_listener_dummy,
+        .log_global_stop = hostmem_listener_dummy,
+        .eventfd_add = hostmem_listener_eventfd_dummy,
+        .eventfd_del = hostmem_listener_eventfd_dummy,
+        .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
+        .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
+        .priority = 10,
+    };
+
+    memory_listener_register(&hostmem->listener, &address_space_memory);
+    if (hostmem->num_new_regions > 0) {
+        hostmem_listener_commit(&hostmem->listener);
+    }
+}
+
+void hostmem_finalize(Hostmem *hostmem)
+{
+    memory_listener_unregister(&hostmem->listener);
+    g_free(hostmem->new_regions);
+    g_free(hostmem->current_regions);
+}
diff --git a/hw/dataplane/hostmem.h b/hw/dataplane/hostmem.h
new file mode 100644
index 0000000..a833b74
--- /dev/null
+++ b/hw/dataplane/hostmem.h
@@ -0,0 +1,52 @@ 
+/*
+ * Thread-safe guest to host memory mapping
+ *
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HOSTMEM_H
+#define HOSTMEM_H
+
+#include "memory.h"
+#include "qemu-thread.h"
+
+typedef struct {
+    void *host_addr;
+    hwaddr guest_addr;
+    uint64_t size;
+    bool readonly;
+} HostmemRegion;
+
+typedef struct {
+    /* The listener is invoked when regions change and a new list of regions is
+     * built up completely before they are installed.
+     */
+    MemoryListener listener;
+    HostmemRegion *new_regions;
+    size_t num_new_regions;
+
+    /* Current regions are accessed from multiple threads either to lookup
+     * addresses or to install a new list of regions.  The lock protects the
+     * pointer and the regions.
+     */
+    QemuMutex current_regions_lock;
+    HostmemRegion *current_regions;
+    size_t num_current_regions;
+} Hostmem;
+
+void hostmem_init(Hostmem *hostmem);
+void hostmem_finalize(Hostmem *hostmem);
+
+/**
+ * Map a guest physical address to a pointer
+ */
+void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write);
+
+#endif /* HOSTMEM_H */