diff mbox

[v7,03/12] dataplane: add host memory mapping code

Message ID 1355508045-28120-4-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi Dec. 14, 2012, 6 p.m. UTC
The data plane thread needs to map guest physical addresses to host
pointers.  Normally this is done with cpu_physical_memory_map() but the
function assumes the global mutex is held.  The data plane thread does
not touch the global mutex and therefore needs a thread-safe memory
mapping mechanism.

Hostmem registers a MemoryListener similar to how vhost collects and
pushes memory region information into the kernel.  There is a
fine-grained lock on the regions list which is held during lookup and
when installing a new regions list.

When the physical memory map changes the MemoryListener callbacks are
invoked.  They build up a new list of memory regions which is finally
installed when the list has been completed.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/Makefile.objs           |   2 +-
 hw/dataplane/Makefile.objs |   3 +
 hw/dataplane/hostmem.c     | 176 +++++++++++++++++++++++++++++++++++++++++++++
 hw/dataplane/hostmem.h     |  57 +++++++++++++++
 4 files changed, 237 insertions(+), 1 deletion(-)
 create mode 100644 hw/dataplane/Makefile.objs
 create mode 100644 hw/dataplane/hostmem.c
 create mode 100644 hw/dataplane/hostmem.h

Comments

Blue Swirl Dec. 14, 2012, 8:34 p.m. UTC | #1
On Fri, Dec 14, 2012 at 6:00 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> The data plane thread needs to map guest physical addresses to host
> pointers.  Normally this is done with cpu_physical_memory_map() but the
> function assumes the global mutex is held.  The data plane thread does
> not touch the global mutex and therefore needs a thread-safe memory
> mapping mechanism.
>
> Hostmem registers a MemoryListener similar to how vhost collects and
> pushes memory region information into the kernel.  There is a
> fine-grained lock on the regions list which is held during lookup and
> when installing a new regions list.
>
> When the physical memory map changes the MemoryListener callbacks are
> invoked.  They build up a new list of memory regions which is finally
> installed when the list has been completed.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/Makefile.objs           |   2 +-
>  hw/dataplane/Makefile.objs |   3 +
>  hw/dataplane/hostmem.c     | 176 +++++++++++++++++++++++++++++++++++++++++++++
>  hw/dataplane/hostmem.h     |  57 +++++++++++++++
>  4 files changed, 237 insertions(+), 1 deletion(-)
>  create mode 100644 hw/dataplane/Makefile.objs
>  create mode 100644 hw/dataplane/hostmem.c
>  create mode 100644 hw/dataplane/hostmem.h
>
> diff --git a/hw/Makefile.objs b/hw/Makefile.objs
> index d581d8d..cec84bc 100644
> --- a/hw/Makefile.objs
> +++ b/hw/Makefile.objs
> @@ -1,4 +1,4 @@
> -common-obj-y = usb/ ide/
> +common-obj-y = usb/ ide/ dataplane/
>  common-obj-y += loader.o
>  common-obj-$(CONFIG_VIRTIO) += virtio-console.o
>  common-obj-$(CONFIG_VIRTIO) += virtio-rng.o
> diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
> new file mode 100644
> index 0000000..8c8dea1
> --- /dev/null
> +++ b/hw/dataplane/Makefile.objs
> @@ -0,0 +1,3 @@
> +ifeq ($(CONFIG_VIRTIO), y)
> +common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o
> +endif
> diff --git a/hw/dataplane/hostmem.c b/hw/dataplane/hostmem.c
> new file mode 100644
> index 0000000..92f6196
> --- /dev/null
> +++ b/hw/dataplane/hostmem.c
> @@ -0,0 +1,176 @@
> +/*
> + * Thread-safe guest to host memory mapping
> + *
> + * Copyright 2012 Red Hat, Inc. and/or its affiliates
> + *
> + * Authors:
> + *   Stefan Hajnoczi <stefanha@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "exec-memory.h"
> +#include "hostmem.h"
> +
> +static int hostmem_lookup_cmp(const void *phys_, const void *region_)
> +{
> +    hwaddr phys = *(const hwaddr *)phys_;
> +    const HostmemRegion *region = region_;
> +
> +    if (phys < region->guest_addr) {
> +        return -1;
> +    } else if (phys >= region->guest_addr + region->size) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }
> +}
> +
> +/**
> + * Map guest physical address to host pointer
> + */
> +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write)
> +{
> +    HostmemRegion *region;
> +    void *host_addr = NULL;
> +    hwaddr offset_within_region;
> +
> +    qemu_mutex_lock(&hostmem->current_regions_lock);
> +    region = bsearch(&phys, hostmem->current_regions,
> +                     hostmem->num_current_regions,
> +                     sizeof(hostmem->current_regions[0]),
> +                     hostmem_lookup_cmp);
> +    if (!region) {
> +        goto out;
> +    }
> +    if (is_write && region->readonly) {
> +        goto out;
> +    }
> +    offset_within_region = phys - region->guest_addr;
> +    if (len <= region->size - offset_within_region) {
> +        host_addr = region->host_addr + offset_within_region;
> +    }
> +out:
> +    qemu_mutex_unlock(&hostmem->current_regions_lock);
> +
> +    return host_addr;
> +}
> +
> +/**
> + * Install new regions list
> + */
> +static void hostmem_listener_commit(MemoryListener *listener)
> +{
> +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> +
> +    qemu_mutex_lock(&hostmem->current_regions_lock);
> +    g_free(hostmem->current_regions);
> +    hostmem->current_regions = hostmem->new_regions;
> +    hostmem->num_current_regions = hostmem->num_new_regions;
> +    qemu_mutex_unlock(&hostmem->current_regions_lock);
> +
> +    /* Reset new regions list */
> +    hostmem->new_regions = NULL;
> +    hostmem->num_new_regions = 0;
> +}
> +
> +/**
> + * Add a MemoryRegionSection to the new regions list
> + */
> +static void hostmem_append_new_region(Hostmem *hostmem,
> +                                      MemoryRegionSection *section)
> +{
> +    void *ram_ptr = memory_region_get_ram_ptr(section->mr);
> +    size_t num = hostmem->num_new_regions;
> +    size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
> +
> +    hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
> +    hostmem->new_regions[num] = (HostmemRegion){
> +        .host_addr = ram_ptr + section->offset_within_region,
> +        .guest_addr = section->offset_within_address_space,
> +        .size = section->size,
> +        .readonly = section->readonly,
> +    };
> +    hostmem->num_new_regions++;
> +}
> +
> +static void hostmem_listener_append_region(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +    Hostmem *hostmem = container_of(listener, Hostmem, listener);
> +
> +    /* Ignore non-RAM regions, we may not be able to map them */
> +    if (!memory_region_is_ram(section->mr)) {
> +        return;
> +    }
> +
> +    /* Ignore regions with dirty logging, we cannot mark them dirty */
> +    if (memory_region_is_logging(section->mr)) {
> +        return;
> +    }
> +
> +    hostmem_append_new_region(hostmem, section);
> +}
> +
> +/* We don't implement most MemoryListener callbacks, use these nop stubs */
> +static void hostmem_listener_dummy(MemoryListener *listener)
> +{
> +}
> +
> +static void hostmem_listener_section_dummy(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +}
> +
> +static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
> +                                           MemoryRegionSection *section,
> +                                           bool match_data, uint64_t data,
> +                                           EventNotifier *e)
> +{
> +}
> +
> +static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
> +                                                  MemoryRegionSection *section,
> +                                                  hwaddr addr, hwaddr len)
> +{
> +}
> +
> +void hostmem_init(Hostmem *hostmem)
> +{
> +    memset(hostmem, 0, sizeof(*hostmem));
> +
> +    qemu_mutex_init(&hostmem->current_regions_lock);
> +
> +    hostmem->listener = (MemoryListener){
> +        .begin = hostmem_listener_dummy,
> +        .commit = hostmem_listener_commit,
> +        .region_add = hostmem_listener_append_region,
> +        .region_del = hostmem_listener_section_dummy,
> +        .region_nop = hostmem_listener_append_region,
> +        .log_start = hostmem_listener_section_dummy,
> +        .log_stop = hostmem_listener_section_dummy,
> +        .log_sync = hostmem_listener_section_dummy,
> +        .log_global_start = hostmem_listener_dummy,
> +        .log_global_stop = hostmem_listener_dummy,
> +        .eventfd_add = hostmem_listener_eventfd_dummy,
> +        .eventfd_del = hostmem_listener_eventfd_dummy,
> +        .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
> +        .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
> +        .priority = 10,
> +    };
> +
> +    memory_listener_register(&hostmem->listener, &address_space_memory);
> +    if (hostmem->num_new_regions > 0) {
> +        hostmem_listener_commit(&hostmem->listener);
> +    }
> +}
> +
> +void hostmem_finalize(Hostmem *hostmem)
> +{
> +    memory_listener_unregister(&hostmem->listener);
> +    g_free(hostmem->new_regions);
> +    g_free(hostmem->current_regions);
> +    qemu_mutex_destroy(&hostmem->current_regions_lock);
> +}
> diff --git a/hw/dataplane/hostmem.h b/hw/dataplane/hostmem.h
> new file mode 100644
> index 0000000..6d87841
> --- /dev/null
> +++ b/hw/dataplane/hostmem.h
> @@ -0,0 +1,57 @@
> +/*
> + * Thread-safe guest to host memory mapping
> + *
> + * Copyright 2012 Red Hat, Inc. and/or its affiliates
> + *
> + * Authors:
> + *   Stefan Hajnoczi <stefanha@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef HOSTMEM_H
> +#define HOSTMEM_H
> +
> +#include "memory.h"
> +#include "qemu-thread.h"
> +
> +typedef struct {
> +    void *host_addr;
> +    hwaddr guest_addr;
> +    uint64_t size;
> +    bool readonly;
> +} HostmemRegion;

This should be HostMemRegion.

> +
> +typedef struct {
> +    /* The listener is invoked when regions change and a new list of regions is
> +     * built up completely before they are installed.
> +     */
> +    MemoryListener listener;
> +    HostmemRegion *new_regions;
> +    size_t num_new_regions;
> +
> +    /* Current regions are accessed from multiple threads either to lookup
> +     * addresses or to install a new list of regions.  The lock protects the
> +     * pointer and the regions.
> +     */
> +    QemuMutex current_regions_lock;
> +    HostmemRegion *current_regions;
> +    size_t num_current_regions;
> +} Hostmem;

And this HostMem.

> +
> +void hostmem_init(Hostmem *hostmem);
> +void hostmem_finalize(Hostmem *hostmem);
> +
> +/**
> + * Map a guest physical address to a pointer
> + *
> + * Note that there is map/unmap mechanism here.  The caller must ensure that
> + * mapped memory is no longer used across events like hot memory unplug.  This
> + * can be done with other mechanisms like bdrv_drain_all() that quiesce
> + * in-flight I/O.
> + */
> +void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write);
> +
> +#endif /* HOSTMEM_H */
> --
> 1.8.0.2
>
Stefan Hajnoczi Dec. 17, 2012, 9:10 a.m. UTC | #2
On Fri, Dec 14, 2012 at 9:34 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
> On Fri, Dec 14, 2012 at 6:00 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>> +typedef struct {
>> +    void *host_addr;
>> +    hwaddr guest_addr;
>> +    uint64_t size;
>> +    bool readonly;
>> +} HostmemRegion;
>
> This should be HostMemRegion.
>
>> +
>> +typedef struct {
>> +    /* The listener is invoked when regions change and a new list of regions is
>> +     * built up completely before they are installed.
>> +     */
>> +    MemoryListener listener;
>> +    HostmemRegion *new_regions;
>> +    size_t num_new_regions;
>> +
>> +    /* Current regions are accessed from multiple threads either to lookup
>> +     * addresses or to install a new list of regions.  The lock protects the
>> +     * pointer and the regions.
>> +     */
>> +    QemuMutex current_regions_lock;
>> +    HostmemRegion *current_regions;
>> +    size_t num_current_regions;
>> +} Hostmem;
>
> And this HostMem.

Okay, will fix both.

Stefan
diff mbox

Patch

diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index d581d8d..cec84bc 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -1,4 +1,4 @@ 
-common-obj-y = usb/ ide/
+common-obj-y = usb/ ide/ dataplane/
 common-obj-y += loader.o
 common-obj-$(CONFIG_VIRTIO) += virtio-console.o
 common-obj-$(CONFIG_VIRTIO) += virtio-rng.o
diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
new file mode 100644
index 0000000..8c8dea1
--- /dev/null
+++ b/hw/dataplane/Makefile.objs
@@ -0,0 +1,3 @@ 
+ifeq ($(CONFIG_VIRTIO), y)
+common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o
+endif
diff --git a/hw/dataplane/hostmem.c b/hw/dataplane/hostmem.c
new file mode 100644
index 0000000..92f6196
--- /dev/null
+++ b/hw/dataplane/hostmem.c
@@ -0,0 +1,176 @@ 
+/*
+ * Thread-safe guest to host memory mapping
+ *
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "exec-memory.h"
+#include "hostmem.h"
+
+static int hostmem_lookup_cmp(const void *phys_, const void *region_)
+{
+    hwaddr phys = *(const hwaddr *)phys_;
+    const HostmemRegion *region = region_;
+
+    if (phys < region->guest_addr) {
+        return -1;
+    } else if (phys >= region->guest_addr + region->size) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+/**
+ * Map guest physical address to host pointer
+ */
+void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write)
+{
+    HostmemRegion *region;
+    void *host_addr = NULL;
+    hwaddr offset_within_region;
+
+    qemu_mutex_lock(&hostmem->current_regions_lock);
+    region = bsearch(&phys, hostmem->current_regions,
+                     hostmem->num_current_regions,
+                     sizeof(hostmem->current_regions[0]),
+                     hostmem_lookup_cmp);
+    if (!region) {
+        goto out;
+    }
+    if (is_write && region->readonly) {
+        goto out;
+    }
+    offset_within_region = phys - region->guest_addr;
+    if (len <= region->size - offset_within_region) {
+        host_addr = region->host_addr + offset_within_region;
+    }
+out:
+    qemu_mutex_unlock(&hostmem->current_regions_lock);
+
+    return host_addr;
+}
+
+/**
+ * Install new regions list
+ */
+static void hostmem_listener_commit(MemoryListener *listener)
+{
+    Hostmem *hostmem = container_of(listener, Hostmem, listener);
+
+    qemu_mutex_lock(&hostmem->current_regions_lock);
+    g_free(hostmem->current_regions);
+    hostmem->current_regions = hostmem->new_regions;
+    hostmem->num_current_regions = hostmem->num_new_regions;
+    qemu_mutex_unlock(&hostmem->current_regions_lock);
+
+    /* Reset new regions list */
+    hostmem->new_regions = NULL;
+    hostmem->num_new_regions = 0;
+}
+
+/**
+ * Add a MemoryRegionSection to the new regions list
+ */
+static void hostmem_append_new_region(Hostmem *hostmem,
+                                      MemoryRegionSection *section)
+{
+    void *ram_ptr = memory_region_get_ram_ptr(section->mr);
+    size_t num = hostmem->num_new_regions;
+    size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
+
+    hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
+    hostmem->new_regions[num] = (HostmemRegion){
+        .host_addr = ram_ptr + section->offset_within_region,
+        .guest_addr = section->offset_within_address_space,
+        .size = section->size,
+        .readonly = section->readonly,
+    };
+    hostmem->num_new_regions++;
+}
+
+static void hostmem_listener_append_region(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    Hostmem *hostmem = container_of(listener, Hostmem, listener);
+
+    /* Ignore non-RAM regions, we may not be able to map them */
+    if (!memory_region_is_ram(section->mr)) {
+        return;
+    }
+
+    /* Ignore regions with dirty logging, we cannot mark them dirty */
+    if (memory_region_is_logging(section->mr)) {
+        return;
+    }
+
+    hostmem_append_new_region(hostmem, section);
+}
+
+/* We don't implement most MemoryListener callbacks, use these nop stubs */
+static void hostmem_listener_dummy(MemoryListener *listener)
+{
+}
+
+static void hostmem_listener_section_dummy(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+}
+
+static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
+                                           MemoryRegionSection *section,
+                                           bool match_data, uint64_t data,
+                                           EventNotifier *e)
+{
+}
+
+static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
+                                                  MemoryRegionSection *section,
+                                                  hwaddr addr, hwaddr len)
+{
+}
+
+void hostmem_init(Hostmem *hostmem)
+{
+    memset(hostmem, 0, sizeof(*hostmem));
+
+    qemu_mutex_init(&hostmem->current_regions_lock);
+
+    hostmem->listener = (MemoryListener){
+        .begin = hostmem_listener_dummy,
+        .commit = hostmem_listener_commit,
+        .region_add = hostmem_listener_append_region,
+        .region_del = hostmem_listener_section_dummy,
+        .region_nop = hostmem_listener_append_region,
+        .log_start = hostmem_listener_section_dummy,
+        .log_stop = hostmem_listener_section_dummy,
+        .log_sync = hostmem_listener_section_dummy,
+        .log_global_start = hostmem_listener_dummy,
+        .log_global_stop = hostmem_listener_dummy,
+        .eventfd_add = hostmem_listener_eventfd_dummy,
+        .eventfd_del = hostmem_listener_eventfd_dummy,
+        .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
+        .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
+        .priority = 10,
+    };
+
+    memory_listener_register(&hostmem->listener, &address_space_memory);
+    if (hostmem->num_new_regions > 0) {
+        hostmem_listener_commit(&hostmem->listener);
+    }
+}
+
+void hostmem_finalize(Hostmem *hostmem)
+{
+    memory_listener_unregister(&hostmem->listener);
+    g_free(hostmem->new_regions);
+    g_free(hostmem->current_regions);
+    qemu_mutex_destroy(&hostmem->current_regions_lock);
+}
diff --git a/hw/dataplane/hostmem.h b/hw/dataplane/hostmem.h
new file mode 100644
index 0000000..6d87841
--- /dev/null
+++ b/hw/dataplane/hostmem.h
@@ -0,0 +1,57 @@ 
+/*
+ * Thread-safe guest to host memory mapping
+ *
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HOSTMEM_H
+#define HOSTMEM_H
+
+#include "memory.h"
+#include "qemu-thread.h"
+
+typedef struct {
+    void *host_addr;
+    hwaddr guest_addr;
+    uint64_t size;
+    bool readonly;
+} HostmemRegion;
+
+typedef struct {
+    /* The listener is invoked when regions change and a new list of regions is
+     * built up completely before they are installed.
+     */
+    MemoryListener listener;
+    HostmemRegion *new_regions;
+    size_t num_new_regions;
+
+    /* Current regions are accessed from multiple threads either to lookup
+     * addresses or to install a new list of regions.  The lock protects the
+     * pointer and the regions.
+     */
+    QemuMutex current_regions_lock;
+    HostmemRegion *current_regions;
+    size_t num_current_regions;
+} Hostmem;
+
+void hostmem_init(Hostmem *hostmem);
+void hostmem_finalize(Hostmem *hostmem);
+
+/**
+ * Map a guest physical address to a pointer
+ *
+ * Note that there is map/unmap mechanism here.  The caller must ensure that
+ * mapped memory is no longer used across events like hot memory unplug.  This
+ * can be done with other mechanisms like bdrv_drain_all() that quiesce
+ * in-flight I/O.
+ */
+void *hostmem_lookup(Hostmem *hostmem, hwaddr phys, hwaddr len, bool is_write);
+
+#endif /* HOSTMEM_H */