@@ -1339,6 +1339,36 @@ static int memory_try_enable_merging(void *addr, size_t len)
return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
}
+#ifdef __linux__
+void *qemu_ram_reserve_hva(ram_addr_t length)
+{
+ return mmap(0, length, PROT_NONE,
+ MAP_NORESERVE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+}
+
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva)
+{
+ RAMBlock *block = find_ram_block(addr);
+
+ assert(block);
+ block->host = mremap(block->host, block->used_length,
+ block->used_length,
+ MREMAP_MAYMOVE | MREMAP_FIXED, new_hva);
+ memory_try_enable_merging(block->host, block->used_length);
+ qemu_ram_setup_dump(block->host, block->used_length);
+}
+#else
+void *qemu_ram_reserve_hva(ram_addr_t length)
+{
+ return NULL;
+}
+
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva)
+{
+ assert(0);
+}
+#endif
+
/* Only legal before guest might have detected the memory size: e.g. on
* incoming migration, or right after reset.
*
@@ -62,6 +62,8 @@ typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+void *qemu_ram_reserve_hva(ram_addr_t length);
+void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva);
/* This should not be used by devices. */
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
@@ -173,6 +173,7 @@ struct MemoryRegion {
bool terminates;
bool romd_mode;
bool ram;
+ void *rsvd_hva;
bool skip_dump;
bool readonly; /* For RAM regions */
bool enabled;
@@ -285,6 +286,26 @@ void memory_region_init(MemoryRegion *mr,
uint64_t size);
/**
+ * memory_region_init_hva_range: Initialize a reserved HVA memory region
+ *
+ * The container for RAM memory regions.
+ * When adding subregion with memory_region_add_subregion(), subregion's
+ * backing host memory will be remapped inside of the reserved by this
+ * region HVA.
+ * Supported only on Linux. If memory reservation and remapping is not
+ * implemented for platform, this call degrades to regular memory_region_init().
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @owner: the object that tracks the region's reference count
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region; any subregions beyond this size will be clipped
+ */
+void memory_region_init_hva_range(MemoryRegion *mr,
+ struct Object *owner,
+ const char *name,
+ uint64_t size);
+
+/**
* memory_region_ref: Add 1 to a memory region's reference count
*
* Whenever memory regions are accessed outside the BQL, they need to be
@@ -634,8 +655,8 @@ int memory_region_get_fd(MemoryRegion *mr);
* memory_region_get_ram_ptr: Get a pointer into a RAM memory region.
*
* Returns a host pointer to a RAM memory region (created with
- * memory_region_init_ram() or memory_region_init_ram_ptr()). Use with
- * care.
+ * memory_region_init_ram() or memory_region_init_ram_ptr()) or
+ * memory_region_init_hva_range(). Use with care.
*
* @mr: the memory region being queried.
*/
@@ -909,6 +930,24 @@ void memory_region_del_eventfd(MemoryRegion *mr,
void memory_region_add_subregion(MemoryRegion *mr,
hwaddr offset,
MemoryRegion *subregion);
+
+/**
+ * memory_region_add_subregion_to_hva: Add a subregion to a HVA container.
+ *
+ * the same as memory_region_add_subregion() with only difference that
+ * it remaps RAM subregion's backing memory into HVA range of @mr.
+ * If HVA region is not supported by host the call degrades to and behaves as
+ * memory_region_add_subregion().
+ *
+ * @mr: the region to contain the new subregion; must be a container
+ * initialized with memory_region_init().
+ * @offset: the offset relative to @mr where @subregion is added.
+ * @subregion: the subregion to be added.
+ */
+void memory_region_add_subregion_to_hva(MemoryRegion *mr,
+ hwaddr offset,
+ MemoryRegion *subregion);
+
/**
* memory_region_add_subregion_overlap: Add a subregion to a container
* with overlap.
@@ -1052,6 +1091,26 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
hwaddr addr, uint64_t size);
/**
+ * memory_region_find_hva_range: finds a parent MemoryRegion with
+ * reserved HVA and translates it into a #MemoryRegionSection.
+ *
+ * Locates the first parent #MemoryRegion of @mr that is
+ * of reserved HVA type.
+ *
+ * Returns a #MemoryRegionSection that describes a reserved HVA
+ * memory region.
+ * .@offset_within_address_space is offset of found
+ * (in the .@mr field) memory region relative to the address
+ * space that contains it.
+ * .@offset_within_region is offset of @mr relative
+ * to the returned region (in the .@mr field).
+ * .@size is size of found memory region
+ *
+ * @mr: a MemoryRegion whose HVA parent is looked up
+ */
+MemoryRegionSection memory_region_find_hva_range(MemoryRegion *mr);
+
+/**
* address_space_sync_dirty_bitmap: synchronize the dirty log for all memory
*
* Synchronizes the dirty page log for an entire address space.
@@ -929,6 +929,15 @@ void memory_region_init(MemoryRegion *mr,
}
}
+void memory_region_init_hva_range(MemoryRegion *mr,
+ Object *owner,
+ const char *name,
+ uint64_t size)
+{
+ memory_region_init(mr, owner, name, size);
+ mr->rsvd_hva = qemu_ram_reserve_hva(memory_region_size(mr));
+}
+
static void memory_region_get_addr(Object *obj, Visitor *v, void *opaque,
const char *name, Error **errp)
{
@@ -1517,6 +1526,10 @@ int memory_region_get_fd(MemoryRegion *mr)
void *memory_region_get_ram_ptr(MemoryRegion *mr)
{
+ if (mr->rsvd_hva) {
+ return mr->rsvd_hva;
+ }
+
if (mr->alias) {
return memory_region_get_ram_ptr(mr->alias) + mr->alias_offset;
}
@@ -1777,6 +1790,17 @@ void memory_region_add_subregion_overlap(MemoryRegion *mr,
memory_region_add_subregion_common(mr, offset, subregion);
}
+void memory_region_add_subregion_to_hva(MemoryRegion *mr,
+ hwaddr offset,
+ MemoryRegion *subregion)
+{
+ if (mr->rsvd_hva && subregion->ram) {
+ qemu_ram_remap_hva(subregion->ram_addr,
+ memory_region_get_ram_ptr(mr) + offset);
+ }
+ memory_region_add_subregion(mr, offset, subregion);
+}
+
void memory_region_del_subregion(MemoryRegion *mr,
MemoryRegion *subregion)
{
@@ -1897,6 +1921,32 @@ bool memory_region_is_mapped(MemoryRegion *mr)
return mr->container ? true : false;
}
+MemoryRegionSection memory_region_find_hva_range(MemoryRegion *mr)
+{
+ MemoryRegionSection ret = { .mr = NULL };
+ MemoryRegion *hva_container = NULL;
+ hwaddr addr = 0;
+ MemoryRegion *root;
+
+ for (root = mr; root->container; root = root->container) {
+ if (!hva_container && root->rsvd_hva) {
+ hva_container = root;
+ ret.offset_within_region = addr;
+ }
+ addr += root->addr;
+ }
+
+ ret.address_space = memory_region_to_address_space(root);
+ if (!ret.address_space || !hva_container) {
+ return ret;
+ }
+
+ ret.mr = hva_container;
+ ret.offset_within_address_space = addr;
+ ret.size = int128_make64(memory_region_size(ret.mr));
+ return ret;
+}
+
MemoryRegionSection memory_region_find(MemoryRegion *mr,
hwaddr addr, uint64_t size)
{
Patch adds - memory_region_init_hva_range() - memory_region_add_subregion_to_hva() - memory_region_find_hva_range() API to allocate, map into and lookup reserved HVA MemoryRegion. MemoryRegion with reserved HVA range will be used for providing linear 1:1 HVA->GVA mapping for RAM MemoryRegion-s that are added as subregions inside it. It will be used for memory hotplug and vhost integration, reducing all hotplugged MemoryRegions down to a single memory range descriptor, which allows to overcome vhost's limitation on number of allowed memory ranges. Signed-off-by: Igor Mammedov <imammedo@redhat.com> --- v1->v4: - fix offset calculation in memory_region_find_hva_range() - add memory_region_add_subregion_to_hva() RFC->v1: - rename: memory_region_init_rsvd_hva -> memory_region_init_hva_range memory_region_find_rsvd_hva -> memory_region_find_hva_range - replace using ram_addr with "void *rsvd_hva" - guard linux specific calls with ifdef - split memory reservation into qemu_ram_reserve_hva() --- exec.c | 30 ++++++++++++++++++++++ include/exec/cpu-common.h | 2 ++ include/exec/memory.h | 63 +++++++++++++++++++++++++++++++++++++++++++++-- memory.c | 50 +++++++++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+), 2 deletions(-)