Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2217387/?format=api
{ "id": 2217387, "url": "http://patchwork.ozlabs.org/api/patches/2217387/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260328130110.166469-1-vikingtc4@gmail.com/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260328130110.166469-1-vikingtc4@gmail.com>", "list_archive_url": null, "date": "2026-03-28T13:01:10", "name": "migration/ram: avoid page population in ram_handle_zero via madvise", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "09f5b6be454b91e636f480f7c3434e2e324b4e88", "submitter": { "id": 92831, "url": "http://patchwork.ozlabs.org/api/people/92831/?format=api", "name": "Trieu Huynh", "email": "vikingtc4@gmail.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260328130110.166469-1-vikingtc4@gmail.com/mbox/", "series": [ { "id": 497866, "url": "http://patchwork.ozlabs.org/api/series/497866/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=497866", "date": "2026-03-28T13:01:10", "name": "migration/ram: avoid page population in ram_handle_zero via madvise", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/497866/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2217387/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2217387/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=gmail.com header.i=@gmail.com header.a=rsa-sha256\n header.s=20251104 header.b=cMgClWvh;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=patchwork.ozlabs.org)" ], "Received": [ "from lists.gnu.org (lists.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fjd293VxSz1y1j\n\tfor <incoming@patchwork.ozlabs.org>; Sun, 29 Mar 2026 00:01:53 +1100 (AEDT)", "from localhost ([::1] helo=lists1p.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces@nongnu.org>)\n\tid 1w6TIG-0003QV-R3; Sat, 28 Mar 2026 09:01:36 -0400", "from eggs.gnu.org ([2001:470:142:3::10])\n by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <vikingtc4@gmail.com>)\n id 1w6TI2-0003PS-Ay\n for qemu-devel@nongnu.org; Sat, 28 Mar 2026 09:01:26 -0400", "from mail-pj1-x1034.google.com ([2607:f8b0:4864:20::1034])\n by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_128_GCM_SHA256:128)\n (Exim 4.90_1) (envelope-from <vikingtc4@gmail.com>)\n id 1w6TI0-0002UX-9r\n for qemu-devel@nongnu.org; Sat, 28 Mar 2026 09:01:22 -0400", "by mail-pj1-x1034.google.com with SMTP id\n 98e67ed59e1d1-35d99bae2ebso201394a91.3\n for <qemu-devel@nongnu.org>; Sat, 28 Mar 2026 06:01:18 -0700 (PDT)", "from trieu2-huynh-trieuhpn-ubuntu24.bee-live.svc.cluster.local\n ([27.122.242.65]) by smtp.gmail.com with ESMTPSA id\n 98e67ed59e1d1-35c2d8f96e9sm2463978a91.1.2026.03.28.06.01.14\n (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n Sat, 28 Mar 2026 06:01:16 -0700 (PDT)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=gmail.com; s=20251104; t=1774702877; x=1775307677; darn=nongnu.org;\n h=content-transfer-encoding:mime-version:message-id:date:subject:cc\n :to:from:from:to:cc:subject:date:message-id:reply-to;\n bh=vziqdk81suTBSw8GCoOakWLJDxRdW1194aCRU8sKqEc=;\n b=cMgClWvhK2YXTEdBgGUv3RnDyoP8qPGVEH9/+qys9ys1nn4TPgoKkrrKSBYQRH2GPk\n iT5CQv/4e2hUL5HkWm4vSV1MSamUGq9UkdcYgD4HpE2wGysRatQMOirEbisU50ntT4bb\n k62MhOP16setvWknnuJYSn4iSkJ+yFgKBpePBHKoPQsBcEVt64CJ4lbKSwzxtR0jr0LB\n bk+m13pHYWeQQN/QXLvyHX+vhxiotMGKk/Jjnm80rYyE56gFq5liEXhCThrAM/INVS3O\n nNEf7l+WPTCUXaoX+NHBmXuJgBhZdqcN2SkcqdI3DSe//lUPdM3SAB4hJr8jakXH8L9o\n DLGA==", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20251104; t=1774702877; x=1775307677;\n h=content-transfer-encoding:mime-version:message-id:date:subject:cc\n :to:from:x-gm-gg:x-gm-message-state:from:to:cc:subject:date\n :message-id:reply-to;\n bh=vziqdk81suTBSw8GCoOakWLJDxRdW1194aCRU8sKqEc=;\n b=AMOmuBXluO+KNhuE4SYT+bXoNAwQkLXUt+dWzort9AQftrh6IN2KhkigyiaGhyGOvc\n DG0mZqVSZiMwCisot3M/2fNUJBrKeTa3gwtmJAPJMzPKLIdeFsoNOKIYBwepBnCznMLA\n t/oic2VI8PAdQTpDyb8DftgAc7XRUowareRZ3reohaMeZJsYFyVisjPQwvsauTgs4gkI\n +AzEuRyuM7Ya98J3JVxFIUAj/+kzYwRCXban45DBC88HT4x6kAmUtLIuZVOxVOR+9XEK\n hnOcAa2CJjeqa9qyODSB/sU9LrpcOpvtzvy/XynbV+NiBCrYaEPdZpBpE05QSuurnxeQ\n TtvA==", "X-Gm-Message-State": "AOJu0YxPVIkClVDiVWH56zXw2onsXh1289wodDMyAlKiyqvhDsYwxp0T\n n7fuxbpHmavn5QlvRFhE/dPSiSfXBmxxTtTLlg0iWpeWFvz8UietFuHNDhLv8XLX", "X-Gm-Gg": "ATEYQzwhebgIBSGF5ArjHV2StPO0o2MQjO8AVt8J2Gn2NCDoRdGS94CUwQHuMe+TOFQ\n Wqonhu9gC/X6NaAmlOqptwq7hakBsMM/W+DPDryauj31JEMVwTdX6VQdT0juXE633uMics1vw8l\n rOrKZTvi4tnlMo9Cd3CRSbnApo7iYbZENoDJDHy0GjsFgF3IXE9XVvewM0bsmoNP+MfgLw/XdS5\n yOOXyBhfNaxCQhtUpfxBthngwL1RspXXGL5XFkbTTF0/dNYIhJQlD7wZtsm1AzK+5Tveir/hcBZ\n fNqZi1ktxgyO0UrXfziI2a9rmUC0dA8g0Cn2A6y3wJTrlA8S2finYkJ+PffTAvOat0fM0mgN+DP\n PiNDR30+GFY3XGAy12ItvdLyU8fxHSXKKKCyJZrpF8Rht560ZWSZKQ1qHMX+6bZProaJiFXVsD3\n c//YXz53mtY+sNTGu3WaPkiC7jVg+XigV3bXZp3lKQUFz1SB1E17uTpQIayvY0RJFo1SsBKVZ9K\n NyInmX1Wg==", "X-Received": "by 2002:a17:90b:48d2:b0:35b:e529:7089 with SMTP id\n 98e67ed59e1d1-35c2fffebadmr6381549a91.10.1774702876567;\n Sat, 28 Mar 2026 06:01:16 -0700 (PDT)", "From": "Trieu Huynh <vikingtc4@gmail.com>", "To": "qemu-devel@nongnu.org", "Cc": "Trieu Huynh <vikingtc4@gmail.com>, Peter Xu <peterx@redhat.com>,\n Fabiano Rosas <farosas@suse.de>, Li Zhijian <lizhijian@fujitsu.com>", "Subject": "[PATCH] migration/ram: avoid page population in ram_handle_zero via\n madvise", "Date": "Sat, 28 Mar 2026 22:01:10 +0900", "Message-ID": "<20260328130110.166469-1-vikingtc4@gmail.com>", "X-Mailer": "git-send-email 2.43.0", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Received-SPF": "pass client-ip=2607:f8b0:4864:20::1034;\n envelope-from=vikingtc4@gmail.com; helo=mail-pj1-x1034.google.com", "X-Spam_score_int": "-17", "X-Spam_score": "-1.8", "X-Spam_bar": "-", "X-Spam_report": "(-1.8 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1,\n DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1,\n FREEMAIL_ENVFROM_END_DIGIT=0.25, FREEMAIL_FROM=0.001,\n RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001,\n SPF_PASS=-0.001 autolearn=ham autolearn_force=no", "X-Spam_action": "no action", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "qemu development <qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org" }, "content": "When the destination receives a zero page during precopy migration,\nram_handle_zero() calls buffer_is_zero() which reads the page. For\nanonymous mmap this is benign (reads map to the shared zero page), but\nfor memory-backend-memfd (mmap(MAP_SHARED) of a memfd) even a read\ncommits a physical page in the tmpfs page cache.\n\nAs a result, after migration all zero pages of the guest are committed\non the destination, turning a sparse RSS into a fully-populated one\n(see GitLab issue #2839: a 256 GB VM went from ~4 GB RSS before\nmigration to ~256 GB after).\n\nAdd a bool can_discard parameter and call madvise(MADV_DONTNEED) when\nit is true. This releases tmpfs/anonymous pages back to the kernel's\nzero-page pool without reading the mapping at all. The madvise is\nissued before any read or write, preventing the initial page fault\nentirely.\n\nCallers pass can_discard = !(block->flags & RAM_PREALLOC) so that\nbackends with prealloc=on are unaffected: deliberately pre-faulted pages\nmust not be discarded. On the destination side vCPUs are paused\n(RUN_STATE_INMIGRATE) while precopy pages are loaded, so madvise is\nrace-free.\n\nAfter migration for VM with 4GB RAM, the RSS on destination was reduced\nto 247 MB (vs 4148 MB before change), measured via VmRSS in\n/proc/$PID/status.\n\nRelates-to: https://wiki.qemu.org/ToDo/LiveMigration#Avoid_page_population_when_page_is_not_populated\nSee-also: https://gitlab.com/qemu-project/qemu/-/issues/2839\n\nSigned-off-by: Trieu Huynh <vikingtc4@gmail.com>\n---\n migration/ram.c | 16 +++++++++++++---\n migration/ram.h | 2 +-\n migration/rdma.c | 9 ++++++++-\n 3 files changed, 22 insertions(+), 5 deletions(-)", "diff": "diff --git a/migration/ram.c b/migration/ram.c\nindex 2a7e958e87..e57613e29d 100644\n--- a/migration/ram.c\n+++ b/migration/ram.c\n@@ -3638,9 +3638,13 @@ static inline void *colo_cache_from_block_offset(RAMBlock *block,\n *\n * @host: host address for the zero page\n * @size: size of the zero page\n+ * @can_discard: check whether RAMBlock was created with prealloc=on\n */\n-void ram_handle_zero(void *host, uint64_t size)\n+void ram_handle_zero(void *host, uint64_t size, bool can_discard)\n {\n+ if (can_discard && qemu_madvise(host, size, QEMU_MADV_DONTNEED) == 0) {\n+ return;\n+ }\n if (!buffer_is_zero(host, size)) {\n memset(host, 0, size);\n }\n@@ -4086,7 +4090,7 @@ static bool handle_zero_mapped_ram(RAMBlock *block, unsigned long from_bit_idx,\n block->idstr);\n return false;\n }\n- ram_handle_zero(host, size);\n+ ram_handle_zero(host, size, !(block->flags & RAM_PREALLOC));\n \n return true;\n }\n@@ -4421,7 +4425,13 @@ static int ram_load_precopy(QEMUFile *f)\n ret = -EINVAL;\n break;\n }\n- ram_handle_zero(host, TARGET_PAGE_SIZE);\n+ {\n+ ram_addr_t ram_offset;\n+ RAMBlock *rb = qemu_ram_block_from_host(host, false,\n+ &ram_offset);\n+ bool can_discard = rb && !(rb->flags & RAM_PREALLOC);\n+ ram_handle_zero(host, TARGET_PAGE_SIZE, can_discard);\n+ }\n break;\n \n case RAM_SAVE_FLAG_PAGE:\ndiff --git a/migration/ram.h b/migration/ram.h\nindex 41697a7599..faa80f27d1 100644\n--- a/migration/ram.h\n+++ b/migration/ram.h\n@@ -90,7 +90,7 @@ int ram_discard_range(const char *block_name, uint64_t start, size_t length);\n int ram_postcopy_incoming_init(MigrationIncomingState *mis, Error **errp);\n int ram_load_postcopy(QEMUFile *f, int channel);\n \n-void ram_handle_zero(void *host, uint64_t size);\n+void ram_handle_zero(void *host, uint64_t size, bool can_discard);\n \n void ram_transferred_add(uint64_t bytes);\n void ram_release_page(const char *rbname, uint64_t offset);\ndiff --git a/migration/rdma.c b/migration/rdma.c\nindex 55ab85650a..d4c36af5b9 100644\n--- a/migration/rdma.c\n+++ b/migration/rdma.c\n@@ -28,6 +28,7 @@\n #include \"qemu/error-report.h\"\n #include \"qemu/main-loop.h\"\n #include \"qemu/module.h\"\n+#include \"system/ramblock.h\"\n #include \"qemu/rcu.h\"\n #include \"qemu/sockets.h\"\n #include \"qemu/bitmap.h\"\n@@ -3413,7 +3414,13 @@ int rdma_registration_handle(QEMUFile *f)\n comp->value);\n goto err;\n }\n- ram_handle_zero(host_addr, comp->length);\n+ {\n+ ram_addr_t ram_offset;\n+ RAMBlock *rb = qemu_ram_block_from_host(host_addr, false,\n+ &ram_offset);\n+ bool can_discard = rb && !(rb->flags & RAM_PREALLOC);\n+ ram_handle_zero(host_addr, comp->length, can_discard);\n+ }\n break;\n \n case RDMA_CONTROL_REGISTER_FINISHED:\n", "prefixes": [] }