get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/1839914/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 1839914,
    "url": "http://patchwork.ozlabs.org/api/patches/1839914/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20230926185738.277351-17-david@redhat.com/",
    "project": {
        "id": 14,
        "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api",
        "name": "QEMU Development",
        "link_name": "qemu-devel",
        "list_id": "qemu-devel.nongnu.org",
        "list_email": "qemu-devel@nongnu.org",
        "web_url": "",
        "scm_url": "",
        "webscm_url": "",
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20230926185738.277351-17-david@redhat.com>",
    "list_archive_url": null,
    "date": "2023-09-26T18:57:36",
    "name": "[v4,16/18] virtio-mem: Expose device memory dynamically via multiple memslots if enabled",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "3cd2fea210b86b8fc90148937e06bcfdaac8f1ae",
    "submitter": {
        "id": 70402,
        "url": "http://patchwork.ozlabs.org/api/people/70402/?format=api",
        "name": "David Hildenbrand",
        "email": "david@redhat.com"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20230926185738.277351-17-david@redhat.com/mbox/",
    "series": [
        {
            "id": 374991,
            "url": "http://patchwork.ozlabs.org/api/series/374991/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=374991",
            "date": "2023-09-26T18:57:23",
            "name": "virtio-mem: Expose device memory through multiple memslots",
            "version": 4,
            "mbox": "http://patchwork.ozlabs.org/series/374991/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/1839914/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/1839914/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>",
        "X-Original-To": "incoming@patchwork.ozlabs.org",
        "Delivered-To": "patchwork-incoming@legolas.ozlabs.org",
        "Authentication-Results": [
            "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256\n header.s=mimecast20190719 header.b=YSNCI/kk;\n\tdkim-atps=neutral",
            "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=patchwork.ozlabs.org)"
        ],
        "Received": [
            "from lists.gnu.org (lists.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4Rw8Hb18vCz1ynX\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 27 Sep 2023 05:02:03 +1000 (AEST)",
            "from localhost ([::1] helo=lists1p.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces@nongnu.org>)\n\tid 1qlDIF-0002Dr-NZ; Tue, 26 Sep 2023 15:00:24 -0400",
            "from eggs.gnu.org ([2001:470:142:3::10])\n by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <david@redhat.com>) id 1qlDHv-0001oC-PN\n for qemu-devel@nongnu.org; Tue, 26 Sep 2023 15:00:11 -0400",
            "from us-smtp-delivery-124.mimecast.com ([170.10.133.124])\n by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <david@redhat.com>) id 1qlDHs-0003Fw-Fq\n for qemu-devel@nongnu.org; Tue, 26 Sep 2023 15:00:03 -0400",
            "from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com\n [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS\n (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id\n us-mta-80-9MIqkCUJMXW4b70zUgM2KQ-1; Tue, 26 Sep 2023 14:59:58 -0400",
            "from smtp.corp.redhat.com (int-mx04.intmail.prod.int.rdu2.redhat.com\n [10.11.54.4])\n (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n (No client certificate requested)\n by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 929DA811E7D;\n Tue, 26 Sep 2023 18:59:57 +0000 (UTC)",
            "from t14s.fritz.box (unknown [10.39.192.33])\n by smtp.corp.redhat.com (Postfix) with ESMTP id B62362026D4B;\n Tue, 26 Sep 2023 18:59:52 +0000 (UTC)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n s=mimecast20190719; t=1695754799;\n h=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n to:to:cc:cc:mime-version:mime-version:\n content-transfer-encoding:content-transfer-encoding:\n in-reply-to:in-reply-to:references:references;\n bh=vV4PQc9MX5cpMSAkjQdv/L1WcUiVtYlZmV5E59fKZWI=;\n b=YSNCI/kkJxhnJY5oxKMf+OU5YIHd5nd9zjju1sIUio1qaFcHJZxpEmYkM4gvt2K/EF2wsH\n FVhIIR2i7cmPaT0KNLbK3uENXlaMgc9xrTdCA2ujqnTUbeB3OVdMwakY5LP3cpRGr+tjZu\n uIcvPqCcTQ2JP1nk0PKrrqZC28/3oKA=",
        "X-MC-Unique": "9MIqkCUJMXW4b70zUgM2KQ-1",
        "From": "David Hildenbrand <david@redhat.com>",
        "To": "qemu-devel@nongnu.org",
        "Cc": "David Hildenbrand <david@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>,\n Igor Mammedov <imammedo@redhat.com>,\n Xiao Guangrong <xiaoguangrong.eric@gmail.com>,\n \"Michael S. Tsirkin\" <mst@redhat.com>, Peter Xu <peterx@redhat.com>,\n\t=?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>,\n Eduardo Habkost <eduardo@habkost.net>,\n Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,\n Yanan Wang <wangyanan55@huawei.com>, Michal Privoznik <mprivozn@redhat.com>,\n\t=?utf-8?q?Daniel_P_=2E_Berrang=C3=A9?= <berrange@redhat.com>,\n Gavin Shan <gshan@redhat.com>, Alex Williamson <alex.williamson@redhat.com>,\n Stefan Hajnoczi <stefanha@redhat.com>,\n \"Maciej S . Szmigiero\" <mail@maciej.szmigiero.name>, kvm@vger.kernel.org",
        "Subject": "[PATCH v4 16/18] virtio-mem: Expose device memory dynamically via\n multiple memslots if enabled",
        "Date": "Tue, 26 Sep 2023 20:57:36 +0200",
        "Message-ID": "<20230926185738.277351-17-david@redhat.com>",
        "In-Reply-To": "<20230926185738.277351-1-david@redhat.com>",
        "References": "<20230926185738.277351-1-david@redhat.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-Scanned-By": "MIMEDefang 3.1 on 10.11.54.4",
        "Received-SPF": "pass client-ip=170.10.133.124; envelope-from=david@redhat.com;\n helo=us-smtp-delivery-124.mimecast.com",
        "X-Spam_score_int": "-20",
        "X-Spam_score": "-2.1",
        "X-Spam_bar": "--",
        "X-Spam_report": "(-2.1 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001,\n DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1,\n RCVD_IN_DNSWL_NONE=-0.0001, RCVD_IN_MSPIKE_H3=0.001, RCVD_IN_MSPIKE_WL=0.001,\n SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no",
        "X-Spam_action": "no action",
        "X-BeenThere": "qemu-devel@nongnu.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "<qemu-devel.nongnu.org>",
        "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>",
        "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>",
        "List-Post": "<mailto:qemu-devel@nongnu.org>",
        "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>",
        "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>",
        "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org",
        "Sender": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org"
    },
    "content": "Having large virtio-mem devices that only expose little memory to a VM\nis currently a problem: we map the whole sparse memory region into the\nguest using a single memslot, resulting in one gigantic memslot in KVM.\nKVM allocates metadata for the whole memslot, which can result in quite\nsome memory waste.\n\nAssuming we have a 1 TiB virtio-mem device and only expose little (e.g.,\n1 GiB) memory, we would create a single 1 TiB memslot and KVM has to\nallocate metadata for that 1 TiB memslot: on x86, this implies allocating\na significant amount of memory for metadata:\n\n(1) RMAP: 8 bytes per 4 KiB, 8 bytes per 2 MiB, 8 bytes per 1 GiB\n    -> For 1 TiB: 2147483648 + 4194304 + 8192 = ~ 2 GiB (0.2 %)\n\n    With the TDP MMU (cat /sys/module/kvm/parameters/tdp_mmu) this gets\n    allocated lazily when required for nested VMs\n(2) gfn_track: 2 bytes per 4 KiB\n    -> For 1 TiB: 536870912 = ~512 MiB (0.05 %)\n(3) lpage_info: 4 bytes per 2 MiB, 4 bytes per 1 GiB\n    -> For 1 TiB: 2097152 + 4096 = ~2 MiB (0.0002 %)\n(4) 2x dirty bitmaps for tracking: 2x 1 bit per 4 KiB page\n    -> For 1 TiB: 536870912 = 64 MiB (0.006 %)\n\nSo we primarily care about (1) and (2). The bad thing is, that the\nmemory consumption *doubles* once SMM is enabled, because we create the\nmemslot once for !SMM and once for SMM.\n\nHaving a 1 TiB memslot without the TDP MMU consumes around:\n* With SMM: 5 GiB\n* Without SMM: 2.5 GiB\nHaving a 1 TiB memslot with the TDP MMU consumes around:\n* With SMM: 1 GiB\n* Without SMM: 512 MiB\n\n... and that's really something we want to optimize, to be able to just\nstart a VM with small boot memory (e.g., 4 GiB) and a virtio-mem device\nthat can grow very large (e.g., 1 TiB).\n\nConsequently, using multiple memslots and only mapping the memslots we\nreally need can significantly reduce memory waste and speed up\nmemslot-related operations. Let's expose the sparse RAM memory region using\nmultiple memslots, mapping only the memslots we currently need into our\ndevice memory region container.\n\nThe feature can be enabled using \"dynamic-memslots=on\" and requires\n\"unplugged-inaccessible=on\", which is nowadays the default.\n\nOnce enabled, we'll auto-detect the number of memslots to use based on the\nmemslot limit provided by the core. We'll use at most 1 memslot per\ngigabyte. Note that our global limit of memslots accross all memory devices\nis currently set to 256: even with multiple large virtio-mem devices,\nwe'd still have a sane limit on the number of memslots used.\n\nThe default is to not dynamically map memslot for now\n(\"dynamic-memslots=off\"). The optimization must be enabled manually,\nbecause some vhost setups (e.g., hotplug of vhost-user devices) might be\nproblematic until we support more memslots especially in vhost-user backends.\n\nNote that \"dynamic-memslots=on\" is just a hint that multiple memslots\n*may* be used for internal optimizations, not that multiple memslots\n*must* be used. The actual number of memslots that are used is an\ninternal detail: for example, once memslot metadata is no longer an\nissue, we could simply stop optimizing for that. Migration source and\ndestination can differ on the setting of \"dynamic-memslots\".\n\nSigned-off-by: David Hildenbrand <david@redhat.com>\n---\n hw/virtio/virtio-mem-pci.c     |  21 +++\n hw/virtio/virtio-mem.c         | 288 +++++++++++++++++++++++++++++++++\n include/hw/virtio/virtio-mem.h |  32 +++-\n 3 files changed, 340 insertions(+), 1 deletion(-)",
    "diff": "diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c\nindex c4597e029e..1b4e9a3284 100644\n--- a/hw/virtio/virtio-mem-pci.c\n+++ b/hw/virtio/virtio-mem-pci.c\n@@ -48,6 +48,25 @@ static MemoryRegion *virtio_mem_pci_get_memory_region(MemoryDeviceState *md,\n     return vmc->get_memory_region(vmem, errp);\n }\n \n+static void virtio_mem_pci_decide_memslots(MemoryDeviceState *md,\n+                                           unsigned int limit)\n+{\n+    VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);\n+    VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);\n+    VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);\n+\n+    vmc->decide_memslots(vmem, limit);\n+}\n+\n+static unsigned int virtio_mem_pci_get_memslots(MemoryDeviceState *md)\n+{\n+    VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);\n+    VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);\n+    VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);\n+\n+    return vmc->get_memslots(vmem);\n+}\n+\n static uint64_t virtio_mem_pci_get_plugged_size(const MemoryDeviceState *md,\n                                                 Error **errp)\n {\n@@ -150,6 +169,8 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, void *data)\n     mdc->set_addr = virtio_mem_pci_set_addr;\n     mdc->get_plugged_size = virtio_mem_pci_get_plugged_size;\n     mdc->get_memory_region = virtio_mem_pci_get_memory_region;\n+    mdc->decide_memslots = virtio_mem_pci_decide_memslots;\n+    mdc->get_memslots = virtio_mem_pci_get_memslots;\n     mdc->fill_device_info = virtio_mem_pci_fill_device_info;\n     mdc->get_min_alignment = virtio_mem_pci_get_min_alignment;\n \ndiff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c\nindex 0cf47df9cf..e1e4250e69 100644\n--- a/hw/virtio/virtio-mem.c\n+++ b/hw/virtio/virtio-mem.c\n@@ -66,6 +66,13 @@ static uint32_t virtio_mem_default_thp_size(void)\n     return default_thp_size;\n }\n \n+/*\n+ * The minimum memslot size depends on this setting (\"sane default\"), the\n+ * device block size, and the memory backend page size. The last (or single)\n+ * memslot might be smaller than this constant.\n+ */\n+#define VIRTIO_MEM_MIN_MEMSLOT_SIZE (1 * GiB)\n+\n /*\n  * We want to have a reasonable default block size such that\n  * 1. We avoid splitting THPs when unplugging memory, which degrades\n@@ -483,6 +490,96 @@ static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,\n     return true;\n }\n \n+static void virtio_mem_activate_memslot(VirtIOMEM *vmem, unsigned int idx)\n+{\n+    const uint64_t memslot_offset = idx * vmem->memslot_size;\n+\n+    assert(vmem->memslots);\n+\n+    /*\n+     * Instead of enabling/disabling memslots, we add/remove them. This should\n+     * make address space updates faster, because we don't have to loop over\n+     * many disabled subregions.\n+     */\n+    if (memory_region_is_mapped(&vmem->memslots[idx])) {\n+        return;\n+    }\n+    memory_region_add_subregion(vmem->mr, memslot_offset, &vmem->memslots[idx]);\n+}\n+\n+static void virtio_mem_deactivate_memslot(VirtIOMEM *vmem, unsigned int idx)\n+{\n+    assert(vmem->memslots);\n+\n+    if (!memory_region_is_mapped(&vmem->memslots[idx])) {\n+        return;\n+    }\n+    memory_region_del_subregion(vmem->mr, &vmem->memslots[idx]);\n+}\n+\n+static void virtio_mem_activate_memslots_to_plug(VirtIOMEM *vmem,\n+                                                 uint64_t offset, uint64_t size)\n+{\n+    const unsigned int start_idx = offset / vmem->memslot_size;\n+    const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /\n+                                 vmem->memslot_size;\n+    unsigned int idx;\n+\n+    if (!vmem->dynamic_memslots) {\n+        return;\n+    }\n+\n+    /* Activate all involved memslots in a single transaction. */\n+    memory_region_transaction_begin();\n+    for (idx = start_idx; idx < end_idx; idx++) {\n+        virtio_mem_activate_memslot(vmem, idx);\n+    }\n+    memory_region_transaction_commit();\n+}\n+\n+static void virtio_mem_deactivate_unplugged_memslots(VirtIOMEM *vmem,\n+                                                     uint64_t offset,\n+                                                     uint64_t size)\n+{\n+    const uint64_t region_size = memory_region_size(&vmem->memdev->mr);\n+    const unsigned int start_idx = offset / vmem->memslot_size;\n+    const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /\n+                                 vmem->memslot_size;\n+    unsigned int idx;\n+\n+    if (!vmem->dynamic_memslots) {\n+        return;\n+    }\n+\n+    /* Deactivate all memslots with unplugged blocks in a single transaction. */\n+    memory_region_transaction_begin();\n+    for (idx = start_idx; idx < end_idx; idx++) {\n+        const uint64_t memslot_offset = idx * vmem->memslot_size;\n+        uint64_t memslot_size = vmem->memslot_size;\n+\n+        /* The size of the last memslot might be smaller. */\n+        if (idx == vmem->nb_memslots - 1) {\n+            memslot_size = region_size - memslot_offset;\n+        }\n+\n+        /*\n+         * Partially covered memslots might still have some blocks plugged and\n+         * have to remain active if that's the case.\n+         */\n+        if (offset > memslot_offset ||\n+            offset + size < memslot_offset + memslot_size) {\n+            const uint64_t gpa = vmem->addr + memslot_offset;\n+\n+            if (!virtio_mem_is_range_unplugged(vmem, gpa, memslot_size)) {\n+                continue;\n+            }\n+        }\n+\n+        virtio_mem_deactivate_memslot(vmem, idx);\n+    }\n+    memory_region_transaction_commit();\n+}\n+\n static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,\n                                       uint64_t size, bool plug)\n {\n@@ -500,6 +597,8 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,\n         }\n         virtio_mem_notify_unplug(vmem, offset, size);\n         virtio_mem_set_range_unplugged(vmem, start_gpa, size);\n+        /* Deactivate completely unplugged memslots after updating the state. */\n+        virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);\n         return 0;\n     }\n \n@@ -527,7 +626,20 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,\n     }\n \n     if (!ret) {\n+        /*\n+         * Activate before notifying and rollback in case of any errors.\n+         *\n+         * When activating a yet inactive memslot, memory notifiers will get\n+         * notified about the added memory region and can register with the\n+         * RamDiscardManager; this will traverse all plugged blocks and skip the\n+         * blocks we are plugging here. The following notification will inform\n+         * registered listeners about the blocks we're plugging.\n+         */\n+        virtio_mem_activate_memslots_to_plug(vmem, offset, size);\n         ret = virtio_mem_notify_plug(vmem, offset, size);\n+        if (ret) {\n+            virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);\n+        }\n     }\n     if (ret) {\n         /* Could be preallocation or a notifier populated memory. */\n@@ -620,6 +732,7 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,\n \n static int virtio_mem_unplug_all(VirtIOMEM *vmem)\n {\n+    const uint64_t region_size = memory_region_size(&vmem->memdev->mr);\n     RAMBlock *rb = vmem->memdev->mr.ram_block;\n \n     if (vmem->size) {\n@@ -634,6 +747,9 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem)\n         bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);\n         vmem->size = 0;\n         notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);\n+\n+        /* Deactivate all memslots after updating the state. */\n+        virtio_mem_deactivate_unplugged_memslots(vmem, 0, region_size);\n     }\n \n     trace_virtio_mem_unplugged_all();\n@@ -790,6 +906,43 @@ static void virtio_mem_system_reset(void *opaque)\n     virtio_mem_unplug_all(vmem);\n }\n \n+static void virtio_mem_prepare_mr(VirtIOMEM *vmem)\n+{\n+    const uint64_t region_size = memory_region_size(&vmem->memdev->mr);\n+\n+    assert(!vmem->mr && vmem->dynamic_memslots);\n+    vmem->mr = g_new0(MemoryRegion, 1);\n+    memory_region_init(vmem->mr, OBJECT(vmem), \"virtio-mem\",\n+                       region_size);\n+    vmem->mr->align = memory_region_get_alignment(&vmem->memdev->mr);\n+}\n+\n+static void virtio_mem_prepare_memslots(VirtIOMEM *vmem)\n+{\n+    const uint64_t region_size = memory_region_size(&vmem->memdev->mr);\n+    unsigned int idx;\n+\n+    g_assert(!vmem->memslots && vmem->nb_memslots && vmem->dynamic_memslots);\n+    vmem->memslots = g_new0(MemoryRegion, vmem->nb_memslots);\n+\n+    /* Initialize our memslots, but don't map them yet. */\n+    for (idx = 0; idx < vmem->nb_memslots; idx++) {\n+        const uint64_t memslot_offset = idx * vmem->memslot_size;\n+        uint64_t memslot_size = vmem->memslot_size;\n+        char name[20];\n+\n+        /* The size of the last memslot might be smaller. */\n+        if (idx == vmem->nb_memslots - 1) {\n+            memslot_size = region_size - memslot_offset;\n+        }\n+\n+        snprintf(name, sizeof(name), \"memslot-%u\", idx);\n+        memory_region_init_alias(&vmem->memslots[idx], OBJECT(vmem), name,\n+                                 &vmem->memdev->mr, memslot_offset,\n+                                 memslot_size);\n+    }\n+}\n+\n static void virtio_mem_device_realize(DeviceState *dev, Error **errp)\n {\n     MachineState *ms = MACHINE(qdev_get_machine());\n@@ -861,6 +1014,14 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)\n     vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;\n #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */\n \n+    if (vmem->dynamic_memslots &&\n+        vmem->unplugged_inaccessible != ON_OFF_AUTO_ON) {\n+        error_setg(errp, \"'%s' property set to 'on' requires '%s' to be 'on'\",\n+                   VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP,\n+                   VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);\n+        return;\n+    }\n+\n     /*\n      * If the block size wasn't configured by the user, use a sane default. This\n      * allows using hugetlbfs backends of any page size without manual\n@@ -930,6 +1091,25 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)\n     virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));\n     vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);\n \n+    /*\n+     * With \"dynamic-memslots=off\" (old behavior) we always map the whole\n+     * RAM memory region directly.\n+     */\n+    if (vmem->dynamic_memslots) {\n+        if (!vmem->mr) {\n+            virtio_mem_prepare_mr(vmem);\n+        }\n+        if (vmem->nb_memslots <= 1) {\n+            vmem->nb_memslots = 1;\n+            vmem->memslot_size = memory_region_size(&vmem->memdev->mr);\n+        }\n+        if (!vmem->memslots) {\n+            virtio_mem_prepare_memslots(vmem);\n+        }\n+    } else {\n+        assert(!vmem->mr && !vmem->nb_memslots && !vmem->memslots);\n+    }\n+\n     host_memory_backend_set_mapped(vmem->memdev, true);\n     vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));\n     if (vmem->early_migration) {\n@@ -984,11 +1164,31 @@ static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)\n                                                virtio_mem_discard_range_cb);\n }\n \n+static int virtio_mem_activate_memslot_range_cb(VirtIOMEM *vmem, void *arg,\n+                                                uint64_t offset, uint64_t size)\n+{\n+    virtio_mem_activate_memslots_to_plug(vmem, offset, size);\n+    return 0;\n+}\n+\n static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem)\n {\n     RamDiscardListener *rdl;\n     int ret;\n \n+    /*\n+     * We restored the bitmap and updated the requested size; activate all\n+     * memslots (so listeners register) before notifying about plugged blocks.\n+     */\n+    if (vmem->dynamic_memslots) {\n+        /*\n+         * We don't expect any active memslots at this point to deactivate: no\n+         * memory was plugged on the migration destination.\n+         */\n+        virtio_mem_for_each_plugged_range(vmem, NULL,\n+                                          virtio_mem_activate_memslot_range_cb);\n+    }\n+\n     /*\n      * We started out with all memory discarded and our memory region is mapped\n      * into an address space. Replay, now that we updated the bitmap.\n@@ -1251,11 +1451,79 @@ static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)\n     if (!vmem->memdev) {\n         error_setg(errp, \"'%s' property must be set\", VIRTIO_MEM_MEMDEV_PROP);\n         return NULL;\n+    } else if (vmem->dynamic_memslots) {\n+        if (!vmem->mr) {\n+            virtio_mem_prepare_mr(vmem);\n+        }\n+        return vmem->mr;\n     }\n \n     return &vmem->memdev->mr;\n }\n \n+static void virtio_mem_decide_memslots(VirtIOMEM *vmem, unsigned int limit)\n+{\n+    uint64_t region_size, memslot_size, min_memslot_size;\n+    unsigned int memslots;\n+    RAMBlock *rb;\n+\n+    if (!vmem->dynamic_memslots) {\n+        return;\n+    }\n+\n+    /* We're called exactly once, before realizing the device. */\n+    assert(!vmem->nb_memslots);\n+\n+    /* If realizing the device will fail, just assume a single memslot. */\n+    if (limit <= 1 || !vmem->memdev || !vmem->memdev->mr.ram_block) {\n+        vmem->nb_memslots = 1;\n+        return;\n+    }\n+\n+    rb = vmem->memdev->mr.ram_block;\n+    region_size = memory_region_size(&vmem->memdev->mr);\n+\n+    /*\n+     * Determine the default block size now, to determine the minimum memslot\n+     * size. We want the minimum slot size to be at least the device block size.\n+     */\n+    if (!vmem->block_size) {\n+        vmem->block_size = virtio_mem_default_block_size(rb);\n+    }\n+    /* If realizing the device will fail, just assume a single memslot. */\n+    if (vmem->block_size < qemu_ram_pagesize(rb) ||\n+        !QEMU_IS_ALIGNED(region_size, vmem->block_size)) {\n+        vmem->nb_memslots = 1;\n+        return;\n+    }\n+\n+    /*\n+     * All memslots except the last one have a reasonable minimum size, and\n+     * and all memslot sizes are aligned to the device block size.\n+     */\n+    memslot_size = QEMU_ALIGN_UP(region_size / limit, vmem->block_size);\n+    min_memslot_size = MAX(vmem->block_size, VIRTIO_MEM_MIN_MEMSLOT_SIZE);\n+    memslot_size = MAX(memslot_size, min_memslot_size);\n+\n+    memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;\n+    if (memslots != 1) {\n+        vmem->memslot_size = memslot_size;\n+    }\n+    vmem->nb_memslots = memslots;\n+}\n+\n+static unsigned int virtio_mem_get_memslots(VirtIOMEM *vmem)\n+{\n+    if (!vmem->dynamic_memslots) {\n+        /* Exactly one static RAM memory region. */\n+        return 1;\n+    }\n+\n+    /* We're called after instructed to make a decision. */\n+    g_assert(vmem->nb_memslots);\n+    return vmem->nb_memslots;\n+}\n+\n static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,\n                                                 Notifier *notifier)\n {\n@@ -1393,6 +1661,21 @@ static void virtio_mem_instance_init(Object *obj)\n                         NULL, NULL);\n }\n \n+static void virtio_mem_instance_finalize(Object *obj)\n+{\n+    VirtIOMEM *vmem = VIRTIO_MEM(obj);\n+\n+    /*\n+     * Note: the core already dropped the references on all memory regions\n+     * (it's passed as the owner to memory_region_init_*()) and finalized\n+     * these objects. We can simply free the memory.\n+     */\n+    g_free(vmem->memslots);\n+    vmem->memslots = NULL;\n+    g_free(vmem->mr);\n+    vmem->mr = NULL;\n+}\n+\n static Property virtio_mem_properties[] = {\n     DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),\n     DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),\n@@ -1405,6 +1688,8 @@ static Property virtio_mem_properties[] = {\n #endif\n     DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,\n                      early_migration, true),\n+    DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM,\n+                     dynamic_memslots, false),\n     DEFINE_PROP_END_OF_LIST(),\n };\n \n@@ -1572,6 +1857,8 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)\n \n     vmc->fill_device_info = virtio_mem_fill_device_info;\n     vmc->get_memory_region = virtio_mem_get_memory_region;\n+    vmc->decide_memslots = virtio_mem_decide_memslots;\n+    vmc->get_memslots = virtio_mem_get_memslots;\n     vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;\n     vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;\n     vmc->unplug_request_check = virtio_mem_unplug_request_check;\n@@ -1589,6 +1876,7 @@ static const TypeInfo virtio_mem_info = {\n     .parent = TYPE_VIRTIO_DEVICE,\n     .instance_size = sizeof(VirtIOMEM),\n     .instance_init = virtio_mem_instance_init,\n+    .instance_finalize = virtio_mem_instance_finalize,\n     .class_init = virtio_mem_class_init,\n     .class_size = sizeof(VirtIOMEMClass),\n     .interfaces = (InterfaceInfo[]) {\ndiff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h\nindex ab0fe2b4f2..5f5b02b8f9 100644\n--- a/include/hw/virtio/virtio-mem.h\n+++ b/include/hw/virtio/virtio-mem.h\n@@ -33,6 +33,7 @@ OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass,\n #define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP \"unplugged-inaccessible\"\n #define VIRTIO_MEM_EARLY_MIGRATION_PROP \"x-early-migration\"\n #define VIRTIO_MEM_PREALLOC_PROP \"prealloc\"\n+#define VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP \"dynamic-memslots\"\n \n struct VirtIOMEM {\n     VirtIODevice parent_obj;\n@@ -44,7 +45,28 @@ struct VirtIOMEM {\n     int32_t bitmap_size;\n     unsigned long *bitmap;\n \n-    /* assigned memory backend and memory region */\n+    /*\n+     * With \"dynamic-memslots=on\": Device memory region in which we dynamically\n+     * map the memslots.\n+     */\n+    MemoryRegion *mr;\n+\n+    /*\n+     * With \"dynamic-memslots=on\": The individual memslots (aliases into the\n+     * memory backend).\n+     */\n+    MemoryRegion *memslots;\n+\n+    /* With \"dynamic-memslots=on\": The total number of memslots. */\n+    uint16_t nb_memslots;\n+\n+    /*\n+     * With \"dynamic-memslots=on\": Size of one memslot (the size of the\n+     * last one can differ).\n+     */\n+    uint64_t memslot_size;\n+\n+    /* Assigned memory backend with the RAM memory region. */\n     HostMemoryBackend *memdev;\n \n     /* NUMA node */\n@@ -82,6 +104,12 @@ struct VirtIOMEM {\n      */\n     bool early_migration;\n \n+    /*\n+     * Whether we dynamically map (multiple, if possible) memslots instead of\n+     * statically mapping the whole RAM memory region.\n+     */\n+    bool dynamic_memslots;\n+\n     /* notifiers to notify when \"size\" changes */\n     NotifierList size_change_notifiers;\n \n@@ -96,6 +124,8 @@ struct VirtIOMEMClass {\n     /* public */\n     void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi);\n     MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp);\n+    void (*decide_memslots)(VirtIOMEM *vmem, unsigned int limit);\n+    unsigned int (*get_memslots)(VirtIOMEM *vmem);\n     void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);\n     void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);\n     void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp);\n",
    "prefixes": [
        "v4",
        "16/18"
    ]
}