Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.2/patches/2225798/?format=api
{ "id": 2225798, "url": "http://patchwork.ozlabs.org/api/1.2/patches/2225798/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260421155628.3600671-2-den@openvz.org/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/1.2/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260421155628.3600671-2-den@openvz.org>", "list_archive_url": null, "date": "2026-04-21T15:56:27", "name": "[1/2] block/io: serialise discard and write-zeroes against in-flight writes", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "975616b2eb24abe1f94810c4d929fd6b32a240bf", "submitter": { "id": 71296, "url": "http://patchwork.ozlabs.org/api/1.2/people/71296/?format=api", "name": "Denis V. Lunev\" via qemu development", "email": "qemu-devel@nongnu.org" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260421155628.3600671-2-den@openvz.org/mbox/", "series": [ { "id": 500841, "url": "http://patchwork.ozlabs.org/api/1.2/series/500841/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=500841", "date": "2026-04-21T15:56:27", "name": "block/io: fix reproducible silent data corruption in write-vs-discard race", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/500841/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2225798/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2225798/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n secure) header.d=virtuozzo.com header.i=@virtuozzo.com header.a=rsa-sha256\n header.s=relay header.b=A9T/9Lrr;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists1p.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=patchwork.ozlabs.org)" ], "Received": [ "from lists1p.gnu.org (lists1p.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g0Rns11kDz1yJG\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 22 Apr 2026 01:57:35 +1000 (AEST)", "from localhost ([::1] helo=lists1p.gnu.org)\n\tby lists1p.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces@nongnu.org>)\n\tid 1wFDSo-0002fQ-60; Tue, 21 Apr 2026 11:56:38 -0400", "from eggs.gnu.org ([2001:470:142:3::10])\n by lists1p.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <den@openvz.org>)\n id 1wFDSl-0002ej-GV; Tue, 21 Apr 2026 11:56:35 -0400", "from relay.virtuozzo.com ([130.117.225.111])\n by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <den@openvz.org>)\n id 1wFDSj-0003JB-GB; Tue, 21 Apr 2026 11:56:35 -0400", "from ch-demo-asa.virtuozzo.com ([130.117.225.8] helo=iris.sw.ru)\n by relay.virtuozzo.com with esmtp (Exim 4.96)\n (envelope-from <den@openvz.org>) id 1wFDQ2-001k3k-24;\n Tue, 21 Apr 2026 17:56:19 +0200" ], "DKIM-Signature": "v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;\n d=virtuozzo.com; s=relay; h=MIME-Version:Message-ID:Date:Subject:From:\n Content-Type; bh=tux/oh3wBPkdouV7SFgL9r8W8VUWDFSoRd15nfBxmrk=; b=A9T/9Lrr5SfM\n bJhieSxqX3VcBQnUNyJDfbYehWF7F76ucUnXVNyFxPeVWRaLcO82k+SI5w9ZE1yvna1P53wcuRu9R\n l+dN/eZ2WWqorNJTdvQ3xORssEgQj7y67LMDqCpNXfps7SaVXN/CmlpXacRjfQxpIEGu39Ek+a3I+\n HIb31CiWPIwtz+7wBUyHYNcJWebKwB8jpxhOYnN62HmMqcpjZNvJzpt0ImV6UMhFxmVQc3wfSk1UC\n TZxTRrlNY1mSM3wg95uw259YN9dO9gh7aJ0VovS33GlYFIqnVQu1lbaOCJya1pMg4XB5Gt8GvhQtj\n xlX1TRqYMCZSsawN1M567w==;", "To": "qemu-devel@nongnu.org,\n\tqemu-block@nongnu.org,\n\tqemu-stable@nongnu.org", "Cc": "den@openvz.org, Stefan Hajnoczi <stefanha@redhat.com>,\n Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>", "Subject": "[PATCH 1/2] block/io: serialise discard and write-zeroes against\n in-flight writes", "Date": "Tue, 21 Apr 2026 17:56:27 +0200", "Message-ID": "<20260421155628.3600671-2-den@openvz.org>", "X-Mailer": "git-send-email 2.51.0", "In-Reply-To": "<20260421155628.3600671-1-den@openvz.org>", "References": "<20260421155628.3600671-1-den@openvz.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Received-SPF": "softfail client-ip=130.117.225.111;\n envelope-from=den@openvz.org;\n helo=relay.virtuozzo.com", "X-Spam_score_int": "-34", "X-Spam_score": "-3.5", "X-Spam_bar": "---", "X-Spam_report": "(-3.5 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1,\n DKIM_VALID=-0.1, RCVD_IN_DNSWL_MED=-2.3, SPF_HELO_NONE=0.001,\n SPF_SOFTFAIL=0.665 autolearn=ham autolearn_force=no", "X-Spam_action": "no action", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "qemu development <qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Reply-to": "\"Denis V. Lunev\" <den@openvz.org>", "From": "\"Denis V. Lunev\" via qemu development <qemu-devel@nongnu.org>", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org" }, "content": "qcow2's write path drops s->lock around the data I/O of an allocating\nwrite. A concurrent discard (or MAY_UNMAP write-zeroes) on the same\nguest offset lands the cluster-free operation in that window. The\noriginal writer then reacquires the lock and unconditionally writes\nL2[G] = alloc_offset | OFLAG_COPIED on its now-stale l2meta, binding\nthe L2 entry to a freed cluster:\n\n WRITE coroutine DISCARD coroutine\n --------------- -----------------\n qcow2_co_pwritev_part:\n lock(s->lock)\n qcow2_alloc_host_offset:\n handle_copied reads L2[G] = C | OFLAG_COPIED\n builds l2meta { alloc=C, keep_old_clusters=true }\n unlock(s->lock) -->\n bdrv_co_pwritev_part (data I/O) lock(s->lock)\n qcow2_co_pdiscard on G:\n discard_in_l2_slice\n set_l2_entry(G, 0)\n free_any_cluster(C):\n rc(C) 1 -> 0\n unlock(s->lock)\n lock(s->lock)\n qcow2_handle_l2meta(link_l2=true):\n qcow2_alloc_cluster_link_l2:\n set_l2_entry(G, C | OFLAG_COPIED) <- stale alloc onto\n freed cluster\n\nThe next allocator pass re-hands C out on rc=0, so we end up with two\nL2 entries aliasing one host cluster. On disk this shows up in\nqemu-img check as refcount=0 with a live OFLAG_COPIED reference or as\nrefcount < reference; at runtime the next discard on either alias\nprints \"qcow2_free_clusters failed: Invalid argument\" on stderr with\nno guest-visible error.\n\nMark both discards and all write-zeroes (with or without MAY_UNMAP)\nas BDRV_REQ_SERIALISING in the generic block layer. Their\ntracked_request then waits for overlapping in-flight writes,\nincluding non-serialising ones, to finish their format-driver commit\nbefore any L2/refcount mutation happens.\n\nSigned-off-by: Denis V. Lunev <den@openvz.org>\nCc: Stefan Hajnoczi <stefanha@redhat.com>\nCc: Kevin Wolf <kwolf@redhat.com>\nCc: Hanna Reitz <hreitz@redhat.com>\n---\n block/io.c | 25 ++++++++++++++++++++++++-\n 1 file changed, 24 insertions(+), 1 deletion(-)", "diff": "diff --git a/block/io.c b/block/io.c\nindex dd5f13c694..9f23029b95 100644\n--- a/block/io.c\n+++ b/block/io.c\n@@ -2097,6 +2097,16 @@ bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req,\n max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),\n align);\n \n+ /*\n+ * Zero-writes (with or without MAY_UNMAP) mutate L2 entries / refcounts\n+ * in the format driver and therefore race with concurrent in-flight\n+ * regular writes that have dropped their internal mutex for the data\n+ * I/O. See the comment in bdrv_co_pdiscard(). Serialise them.\n+ */\n+ if (flags & BDRV_REQ_ZERO_WRITE) {\n+ flags |= BDRV_REQ_SERIALISING;\n+ }\n+\n ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags);\n \n if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&\n@@ -3192,7 +3202,20 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,\n bdrv_inc_in_flight(bs);\n tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);\n \n- ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);\n+ /*\n+ * Discards must serialise against overlapping in-flight writes.\n+ * A format driver's write path may drop its internal mutex around\n+ * the data I/O while still holding a pending cluster-allocation\n+ * commit (see qcow2's handle_copied / qcow2_alloc_cluster_link_l2\n+ * sequence). A concurrent discard that clears L2 and drops the\n+ * refcount during that window leaves the writer pointing at a\n+ * freed cluster - the root of the refcount/reference aliasing\n+ * corruption family. Marking the discard serialising makes it wait\n+ * for the in-flight write's tracked_request to complete before any\n+ * L2/refcount mutation happens.\n+ */\n+ ret = bdrv_co_write_req_prepare(child, offset, bytes, &req,\n+ BDRV_REQ_SERIALISING);\n if (ret < 0) {\n goto out;\n }\n", "prefixes": [ "1/2" ] }