diff mbox

[v4,2/7] memory: introduce MemoryRegion container with reserved HVA range

Message ID 1436442444-132020-3-git-send-email-imammedo@redhat.com
State New
Headers show

Commit Message

Igor Mammedov July 9, 2015, 11:47 a.m. UTC
Patch adds
  - memory_region_init_hva_range()
  - memory_region_add_subregion_to_hva()
  - memory_region_find_hva_range()
API to allocate, map into and lookup reserved HVA MemoryRegion.

MemoryRegion with reserved HVA range will be used for
providing linear 1:1 HVA->GVA mapping for RAM MemoryRegion-s
that are added as subregions inside it.

It will be used for memory hotplug and vhost integration,
reducing all hotplugged MemoryRegions down to a single
memory range descriptor, which allows to overcome
vhost's limitation on number of allowed memory ranges.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
v1->v4:
  - fix offset calculation in memory_region_find_hva_range()
  - add memory_region_add_subregion_to_hva()
RFC->v1:
  - rename:
       memory_region_init_rsvd_hva -> memory_region_init_hva_range
       memory_region_find_rsvd_hva -> memory_region_find_hva_range
  - replace using ram_addr with "void *rsvd_hva"
  - guard linux specific calls with ifdef
  - split memory reservation into qemu_ram_reserve_hva()
---
 exec.c                    | 30 ++++++++++++++++++++++
 include/exec/cpu-common.h |  2 ++
 include/exec/memory.h     | 63 +++++++++++++++++++++++++++++++++++++++++++++--
 memory.c                  | 50 +++++++++++++++++++++++++++++++++++++
 4 files changed, 143 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/exec.c b/exec.c
index ca53537..562dae5 100644
--- a/exec.c
+++ b/exec.c
@@ -1339,6 +1339,36 @@  static int memory_try_enable_merging(void *addr, size_t len)
     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
 }
 
+#ifdef __linux__
+void *qemu_ram_reserve_hva(ram_addr_t length)
+{
+    return mmap(0, length, PROT_NONE,
+                MAP_NORESERVE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+}
+
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva)
+{
+    RAMBlock *block = find_ram_block(addr);
+
+    assert(block);
+    block->host = mremap(block->host, block->used_length,
+                      block->used_length,
+                      MREMAP_MAYMOVE | MREMAP_FIXED, new_hva);
+    memory_try_enable_merging(block->host, block->used_length);
+    qemu_ram_setup_dump(block->host, block->used_length);
+}
+#else
+void *qemu_ram_reserve_hva(ram_addr_t length)
+{
+    return NULL;
+}
+
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva)
+{
+    assert(0);
+}
+#endif
+
 /* Only legal before guest might have detected the memory size: e.g. on
  * incoming migration, or right after reset.
  *
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 9fb1d54..301f50b 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -62,6 +62,8 @@  typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
 
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+void *qemu_ram_reserve_hva(ram_addr_t length);
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva);
 /* This should not be used by devices.  */
 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 1394715..1f2cbd1 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -173,6 +173,7 @@  struct MemoryRegion {
     bool terminates;
     bool romd_mode;
     bool ram;
+    void *rsvd_hva;
     bool skip_dump;
     bool readonly; /* For RAM regions */
     bool enabled;
@@ -285,6 +286,26 @@  void memory_region_init(MemoryRegion *mr,
                         uint64_t size);
 
 /**
+ * memory_region_init_hva_range: Initialize a reserved HVA memory region
+ *
+ * The container for RAM memory regions.
+ * When adding subregion with memory_region_add_subregion(), subregion's
+ * backing host memory will be remapped inside of the reserved by this
+ * region HVA.
+ * Supported only on Linux. If memory reservation and remapping is not
+ * implemented for platform, this call degrades to regular memory_region_init().
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @owner: the object that tracks the region's reference count
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region; any subregions beyond this size will be clipped
+ */
+void memory_region_init_hva_range(MemoryRegion *mr,
+                                  struct Object *owner,
+                                  const char *name,
+                                  uint64_t size);
+
+/**
  * memory_region_ref: Add 1 to a memory region's reference count
  *
  * Whenever memory regions are accessed outside the BQL, they need to be
@@ -634,8 +655,8 @@  int memory_region_get_fd(MemoryRegion *mr);
  * memory_region_get_ram_ptr: Get a pointer into a RAM memory region.
  *
  * Returns a host pointer to a RAM memory region (created with
- * memory_region_init_ram() or memory_region_init_ram_ptr()).  Use with
- * care.
+ * memory_region_init_ram() or memory_region_init_ram_ptr()) or
+ * memory_region_init_hva_range(). Use with care.
  *
  * @mr: the memory region being queried.
  */
@@ -909,6 +930,24 @@  void memory_region_del_eventfd(MemoryRegion *mr,
 void memory_region_add_subregion(MemoryRegion *mr,
                                  hwaddr offset,
                                  MemoryRegion *subregion);
+
+/**
+ * memory_region_add_subregion_to_hva: Add a subregion to a HVA container.
+ *
+ * the same as memory_region_add_subregion() with only difference that
+ * it remaps RAM subregion's backing memory into HVA range of @mr.
+ * If HVA region is not supported by host the call degrades to and behaves as
+ * memory_region_add_subregion().
+ *
+ * @mr: the region to contain the new subregion; must be a container
+ *      initialized with memory_region_init().
+ * @offset: the offset relative to @mr where @subregion is added.
+ * @subregion: the subregion to be added.
+ */
+void memory_region_add_subregion_to_hva(MemoryRegion *mr,
+                                        hwaddr offset,
+                                        MemoryRegion *subregion);
+
 /**
  * memory_region_add_subregion_overlap: Add a subregion to a container
  *                                      with overlap.
@@ -1052,6 +1091,26 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size);
 
 /**
+ * memory_region_find_hva_range: finds a parent MemoryRegion with
+ * reserved HVA and translates it into a #MemoryRegionSection.
+ *
+ * Locates the first parent #MemoryRegion of @mr that is
+ * of reserved HVA type.
+ *
+ * Returns a #MemoryRegionSection that describes a reserved HVA
+ * memory region.
+ *    .@offset_within_address_space is offset of found
+ *      (in the .@mr field) memory region relative to the address
+ *      space that contains it.
+ *    .@offset_within_region is offset of @mr relative
+ *      to the returned region (in the .@mr field).
+ *    .@size is size of found memory region
+ *
+ * @mr: a MemoryRegion whose HVA parent is looked up
+ */
+MemoryRegionSection memory_region_find_hva_range(MemoryRegion *mr);
+
+/**
  * address_space_sync_dirty_bitmap: synchronize the dirty log for all memory
  *
  * Synchronizes the dirty page log for an entire address space.
diff --git a/memory.c b/memory.c
index ec07ae8..bf6aa4e 100644
--- a/memory.c
+++ b/memory.c
@@ -929,6 +929,15 @@  void memory_region_init(MemoryRegion *mr,
     }
 }
 
+void memory_region_init_hva_range(MemoryRegion *mr,
+                                  Object *owner,
+                                  const char *name,
+                                  uint64_t size)
+{
+    memory_region_init(mr, owner, name, size);
+    mr->rsvd_hva = qemu_ram_reserve_hva(memory_region_size(mr));
+}
+
 static void memory_region_get_addr(Object *obj, Visitor *v, void *opaque,
                                    const char *name, Error **errp)
 {
@@ -1517,6 +1526,10 @@  int memory_region_get_fd(MemoryRegion *mr)
 
 void *memory_region_get_ram_ptr(MemoryRegion *mr)
 {
+    if (mr->rsvd_hva) {
+        return mr->rsvd_hva;
+    }
+
     if (mr->alias) {
         return memory_region_get_ram_ptr(mr->alias) + mr->alias_offset;
     }
@@ -1777,6 +1790,17 @@  void memory_region_add_subregion_overlap(MemoryRegion *mr,
     memory_region_add_subregion_common(mr, offset, subregion);
 }
 
+void memory_region_add_subregion_to_hva(MemoryRegion *mr,
+                                        hwaddr offset,
+                                        MemoryRegion *subregion)
+{
+    if (mr->rsvd_hva && subregion->ram) {
+        qemu_ram_remap_hva(subregion->ram_addr,
+                           memory_region_get_ram_ptr(mr) + offset);
+    }
+    memory_region_add_subregion(mr, offset, subregion);
+}
+
 void memory_region_del_subregion(MemoryRegion *mr,
                                  MemoryRegion *subregion)
 {
@@ -1897,6 +1921,32 @@  bool memory_region_is_mapped(MemoryRegion *mr)
     return mr->container ? true : false;
 }
 
+MemoryRegionSection memory_region_find_hva_range(MemoryRegion *mr)
+{
+    MemoryRegionSection ret = { .mr = NULL };
+    MemoryRegion *hva_container = NULL;
+    hwaddr addr = 0;
+    MemoryRegion *root;
+
+    for (root = mr; root->container; root = root->container) {
+        if (!hva_container && root->rsvd_hva) {
+            hva_container = root;
+            ret.offset_within_region = addr;
+        }
+        addr += root->addr;
+    }
+
+    ret.address_space = memory_region_to_address_space(root);
+    if (!ret.address_space || !hva_container) {
+        return ret;
+    }
+
+    ret.mr = hva_container;
+    ret.offset_within_address_space = addr;
+    ret.size = int128_make64(memory_region_size(ret.mr));
+    return ret;
+}
+
 MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size)
 {