Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2197260/?format=api
{ "id": 2197260, "url": "http://patchwork.ozlabs.org/api/patches/2197260/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260217152517.271422-5-alexander@mihalicyn.com/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260217152517.271422-5-alexander@mihalicyn.com>", "list_archive_url": null, "date": "2026-02-17T15:25:17", "name": "[4/4] hw/nvme: add basic live migration support", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "e4c1878799c8cdef368df3c5fc0e24e80f62d342", "submitter": { "id": 81630, "url": "http://patchwork.ozlabs.org/api/people/81630/?format=api", "name": "Alexander Mikhalitsyn", "email": "alexander@mihalicyn.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20260217152517.271422-5-alexander@mihalicyn.com/mbox/", "series": [ { "id": 492445, "url": "http://patchwork.ozlabs.org/api/series/492445/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=492445", "date": "2026-02-17T15:25:13", "name": "hw/nvme: add basic live migration support", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/492445/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2197260/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2197260/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n secure) header.d=mihalicyn.com header.i=@mihalicyn.com header.a=rsa-sha256\n header.s=mihalicyn header.b=YtQ/qOrE;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=patchwork.ozlabs.org)" ], "Received": [ "from lists.gnu.org (lists.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fFkgd3nc3z1xwD\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 18 Feb 2026 02:53:01 +1100 (AEDT)", "from localhost ([::1] helo=lists1p.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces@nongnu.org>)\n\tid 1vsNMC-0002KI-JJ; Tue, 17 Feb 2026 10:51:24 -0500", "from eggs.gnu.org ([2001:470:142:3::10])\n by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <alexander@mihalicyn.com>)\n id 1vsMx6-0003c9-Pi\n for qemu-devel@nongnu.org; Tue, 17 Feb 2026 10:25:28 -0500", "from mail-wm1-x331.google.com ([2a00:1450:4864:20::331])\n by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_128_GCM_SHA256:128)\n (Exim 4.90_1) (envelope-from <alexander@mihalicyn.com>)\n id 1vsMx3-0005g1-K0\n for qemu-devel@nongnu.org; Tue, 17 Feb 2026 10:25:28 -0500", "by mail-wm1-x331.google.com with SMTP id\n 5b1f17b1804b1-4806f3fc50bso50903015e9.0\n for <qemu-devel@nongnu.org>; Tue, 17 Feb 2026 07:25:25 -0800 (PST)", "from alex-laptop.lan\n (p200300cf574bcf00ffb15ba913f79a3b.dip0.t-ipconnect.de.\n [2003:cf:574b:cf00:ffb1:5ba9:13f7:9a3b])\n by smtp.gmail.com with ESMTPSA id\n 5b1f17b1804b1-4835dd0e327sm519169015e9.14.2026.02.17.07.25.23\n (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n Tue, 17 Feb 2026 07:25:23 -0800 (PST)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=mihalicyn.com; s=mihalicyn; t=1771341924; x=1771946724; darn=nongnu.org;\n h=content-transfer-encoding:mime-version:references:in-reply-to\n :message-id:date:subject:cc:to:from:from:to:cc:subject:date\n :message-id:reply-to;\n bh=X9nhnmahfRPZOwCo/9FB5xouigszThFTho5JgnvAt7o=;\n b=YtQ/qOrEgM+XhqSCCTiCiGqNNSMvZgR0GiEG7WCG8oEWDfJyDDS2TlMgqg6vhtoN8j\n ueu7DzgTxbmbSyjibDWpGxr3xewzshRDvUdaVZ3wBlVlo0aAl3FBNd1Mjwab+bHgk0Ne\n chdj3d4bsu93C7Hm2uSLy09hRy71wdGhL8b2Q=", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20230601; t=1771341924; x=1771946724;\n h=content-transfer-encoding:mime-version:references:in-reply-to\n :message-id:date:subject:cc:to:from:x-gm-gg:x-gm-message-state:from\n :to:cc:subject:date:message-id:reply-to;\n bh=X9nhnmahfRPZOwCo/9FB5xouigszThFTho5JgnvAt7o=;\n b=OCdcz9/yHmpSmGeRX0sRUZIg/pZL782t0s19pQt31acoe0TyXCcZbQeGX7PWP5kr6C\n GL9XyMgghFI0N4hUXfytDJoHPNAmxp9PmXq4LkwZFYoczdIlT6zm50HK2NS30xFoCUe3\n nUojTWLFgFDBIMASGv/fIRYusJnBGO3SoPqPNUUPHWc7T9bnfMnfkuA3pNPdyMvZazqp\n gjqSmJzUi2wyOlITXKl5Z9l+vaS38Rz+B2r8Dgg+OzWDdoOmMMuSQFA/YDyGxtAOS+tX\n hNKOdMU0oBPmp0GJO3UpO1gWVeiFDKtlwAWxGvpgKj6y8YQKBz/iBzaPrzgDhmktSGte\n C+Xw==", "X-Gm-Message-State": "AOJu0YyUC82PDZC1FS10Xa46ut+CR5FZJv/0xUxbc0vFz6RHjjTt39Tc\n KSHu+iIMHhvOkKepIGmjkuvlXDEDaxxb17rFw4wtOCGeuk2H2Kc/ikNXmH1+UkwWs9XDAbRwq8n\n 20nYw", "X-Gm-Gg": "AZuq6aL4CUEZTJ3FpYHB63RqB/5oAzDmDIvYEkP4Sy92oWoPufnRJhzGxjOaVemvFrh\n BBzRBZlR32fvSZ2IgsQie4F+C9lHWRsAgpOZ+Q9uG4q7SESf9yqohIf7dH4Asw2LEtYfdCiBFcw\n 1AJ9A9lMnUD/i+8IOXX3oq0CXUEjpMgoEXvVBdif20B8SrkLuJWjd6o+7P+sk4BVdho0wLV8T4T\n OU6ZgCvvu6IlP6MPnZy9tBQsrqocrtye1nrW3qsJMe1veqFYTuOwdDZgpkJEmijpqDhpNaV2zDX\n POaXAJRhnL1hHsUeyHymPN2nr0LtwpIA1lF0K+IlEaLKKu1Ec84Fc/DWL9AOGIeOpgwbsZRkaaY\n fSYRWKgeG/nLhFq7CuD1TxXXHwTaLGgbujXXGz+X2TAI5ppP3gWHk51x5gzez41FCVJQ+qLMsp2\n vjCVtFQOyiZzoyvvofH2HVJzA4/qyKl/Oq8UtSF0xee7BjdkhRURnu1zeZ1zzcl9ZrnprlSInnV\n n9i/y+EFN5/paaY2/LHTXQ=", "X-Received": "by 2002:a05:600c:8b61:b0:47e:e78a:c832 with SMTP id\n 5b1f17b1804b1-48379c286d4mr184174245e9.37.1771341924129;\n Tue, 17 Feb 2026 07:25:24 -0800 (PST)", "From": "Alexander Mikhalitsyn <alexander@mihalicyn.com>", "To": "qemu-devel@nongnu.org", "Cc": "Jesper Devantier <foss@defmacro.it>, Peter Xu <peterx@redhat.com>,\n Klaus Jensen <its@irrelevant.dk>, Fabiano Rosas <farosas@suse.de>,\n qemu-block@nongnu.org, Keith Busch <kbusch@kernel.org>,\n Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>", "Subject": "[PATCH 4/4] hw/nvme: add basic live migration support", "Date": "Tue, 17 Feb 2026 16:25:17 +0100", "Message-ID": "<20260217152517.271422-5-alexander@mihalicyn.com>", "X-Mailer": "git-send-email 2.47.3", "In-Reply-To": "<20260217152517.271422-1-alexander@mihalicyn.com>", "References": "<20260217152517.271422-1-alexander@mihalicyn.com>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Received-SPF": "pass client-ip=2a00:1450:4864:20::331;\n envelope-from=alexander@mihalicyn.com; helo=mail-wm1-x331.google.com", "X-Spam_score_int": "-20", "X-Spam_score": "-2.1", "X-Spam_bar": "--", "X-Spam_report": "(-2.1 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1,\n DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1,\n RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001,\n SPF_PASS=-0.001 autolearn=ham autolearn_force=no", "X-Spam_action": "no action", "X-Mailman-Approved-At": "Tue, 17 Feb 2026 10:51:21 -0500", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "qemu development <qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org" }, "content": "From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>\n\nIt has some limitations:\n- only one NVMe namespace is supported\n- SMART counters are not preserved\n- CMB is not supported\n- PMR is not supported\n- SPDM is not supported\n- SR-IOV is not supported\n- AERs are not fully supported\n\nSigned-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>\n---\n hw/nvme/ctrl.c | 413 ++++++++++++++++++++++++++++++++++++++++++-\n hw/nvme/nvme.h | 2 +\n hw/nvme/trace-events | 9 +\n 3 files changed, 415 insertions(+), 9 deletions(-)", "diff": "diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c\nindex 89cc26d745b..a92837844df 100644\n--- a/hw/nvme/ctrl.c\n+++ b/hw/nvme/ctrl.c\n@@ -208,6 +208,7 @@\n #include \"hw/pci/pcie_sriov.h\"\n #include \"system/spdm-socket.h\"\n #include \"migration/blocker.h\"\n+#include \"migration/qemu-file-types.h\"\n #include \"migration/vmstate.h\"\n \n #include \"nvme.h\"\n@@ -4901,6 +4902,25 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,\n __nvme_init_sq(sq);\n }\n \n+static void nvme_restore_sq(NvmeSQueue *sq_from)\n+{\n+ NvmeCtrl *n = sq_from->ctrl;\n+ NvmeSQueue *sq = sq_from;\n+\n+ if (sq_from->sqid == 0) {\n+ sq = &n->admin_sq;\n+ sq->ctrl = n;\n+ sq->dma_addr = sq_from->dma_addr;\n+ sq->sqid = sq_from->sqid;\n+ sq->size = sq_from->size;\n+ sq->cqid = sq_from->cqid;\n+ sq->head = sq_from->head;\n+ sq->tail = sq_from->tail;\n+ }\n+\n+ __nvme_init_sq(sq);\n+}\n+\n static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)\n {\n NvmeSQueue *sq;\n@@ -5603,6 +5623,27 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,\n __nvme_init_cq(cq);\n }\n \n+static void nvme_restore_cq(NvmeCQueue *cq_from)\n+{\n+ NvmeCtrl *n = cq_from->ctrl;\n+ NvmeCQueue *cq = cq_from;\n+\n+ if (cq_from->cqid == 0) {\n+ cq = &n->admin_cq;\n+ cq->ctrl = n;\n+ cq->cqid = cq_from->cqid;\n+ cq->size = cq_from->size;\n+ cq->dma_addr = cq_from->dma_addr;\n+ cq->phase = cq_from->phase;\n+ cq->irq_enabled = cq_from->irq_enabled;\n+ cq->vector = cq_from->vector;\n+ cq->head = cq_from->head;\n+ cq->tail = cq_from->tail;\n+ }\n+\n+ __nvme_init_cq(cq);\n+}\n+\n static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)\n {\n NvmeCQueue *cq;\n@@ -7291,7 +7332,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)\n n->dbbuf_eis = eis_addr;\n n->dbbuf_enabled = true;\n \n- for (i = 0; i < n->params.max_ioqpairs + 1; i++) {\n+ for (i = 0; i < n->num_queues; i++) {\n NvmeSQueue *sq = n->sq[i];\n NvmeCQueue *cq = n->cq[i];\n \n@@ -7731,7 +7772,7 @@ static int nvme_atomic_write_check(NvmeCtrl *n, NvmeCmd *cmd,\n /*\n * Walk the queues to see if there are any atomic conflicts.\n */\n- for (i = 1; i < n->params.max_ioqpairs + 1; i++) {\n+ for (i = 1; i < n->num_queues; i++) {\n NvmeSQueue *sq;\n NvmeRequest *req;\n NvmeRwCmd *req_rw;\n@@ -7801,6 +7842,10 @@ static void nvme_process_sq(void *opaque)\n NvmeCmd cmd;\n NvmeRequest *req;\n \n+ if (qatomic_read(&n->stop_processing_sq)) {\n+ return;\n+ }\n+\n if (n->dbbuf_enabled) {\n nvme_update_sq_tail(sq);\n }\n@@ -7809,6 +7854,10 @@ static void nvme_process_sq(void *opaque)\n NvmeAtomic *atomic;\n bool cmd_is_atomic;\n \n+ if (qatomic_read(&n->stop_processing_sq)) {\n+ return;\n+ }\n+\n addr = sq->dma_addr + (sq->head << NVME_SQES);\n if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {\n trace_pci_nvme_err_addr_read(addr);\n@@ -7917,12 +7966,12 @@ static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)\n nvme_ns_drain(ns);\n }\n \n- for (i = 0; i < n->params.max_ioqpairs + 1; i++) {\n+ for (i = 0; i < n->num_queues; i++) {\n if (n->sq[i] != NULL) {\n nvme_free_sq(n->sq[i], n);\n }\n }\n- for (i = 0; i < n->params.max_ioqpairs + 1; i++) {\n+ for (i = 0; i < n->num_queues; i++) {\n if (n->cq[i] != NULL) {\n nvme_free_cq(n->cq[i], n);\n }\n@@ -8592,6 +8641,8 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp)\n params->max_ioqpairs = params->num_queues - 1;\n }\n \n+ n->num_queues = params->max_ioqpairs + 1;\n+\n if (n->namespace.blkconf.blk && n->subsys) {\n error_setg(errp, \"subsystem support is unavailable with legacy \"\n \"namespace ('drive' property)\");\n@@ -8746,8 +8797,8 @@ static void nvme_init_state(NvmeCtrl *n)\n n->conf_msix_qsize = n->params.msix_qsize;\n }\n \n- n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);\n- n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);\n+ n->sq = g_new0(NvmeSQueue *, n->num_queues);\n+ n->cq = g_new0(NvmeCQueue *, n->num_queues);\n n->temperature = NVME_TEMPERATURE;\n n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;\n n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);\n@@ -8990,7 +9041,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)\n }\n \n if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {\n- bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL);\n+ bar_size = nvme_mbar_size(n->num_queues, 0, NULL, NULL);\n memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, \"nvme\",\n bar_size);\n pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |\n@@ -9002,7 +9053,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)\n /* add one to max_ioqpairs to account for the admin queue pair */\n if (!pci_is_vf(pci_dev)) {\n nr_vectors = n->params.msix_qsize;\n- bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,\n+ bar_size = nvme_mbar_size(n->num_queues,\n nr_vectors, &msix_table_offset,\n &msix_pba_offset);\n } else {\n@@ -9552,9 +9603,353 @@ static uint32_t nvme_pci_read_config(PCIDevice *dev, uint32_t address, int len)\n return pci_default_read_config(dev, address, len);\n }\n \n+static int nvme_ctrl_pre_save(void *opaque)\n+{\n+ NvmeCtrl *n = opaque;\n+ int i;\n+\n+ trace_pci_nvme_pre_save_enter(n);\n+\n+ /* ask SQ processing code not to take new requests */\n+ qatomic_set(&n->stop_processing_sq, true);\n+\n+ /* prevent new in-flight IO from appearing */\n+ for (i = 0; i < n->num_queues; i++) {\n+ NvmeSQueue *sq = n->sq[i];\n+\n+ if (!sq)\n+ continue;\n+\n+ qemu_bh_cancel(sq->bh);\n+ }\n+\n+ /* drain all IO */\n+ for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {\n+ NvmeNamespace *ns;\n+\n+ ns = nvme_ns(n, i);\n+ if (!ns) {\n+ continue;\n+ }\n+\n+ trace_pci_nvme_pre_save_ns_drain(n, i);\n+ nvme_ns_drain(ns);\n+ }\n+\n+ /*\n+ * Now, we should take care of AERs.\n+ * It is a bit tricky, because AER can be queued\n+ * (added to n->aer_queue) when something happens,\n+ * but then we need to wait until guest submits\n+ * NVME_ADM_CMD_ASYNC_EV_REQ, only after this\n+ * we can get remove it from aer_queue and produce\n+ * CQE on that NVME_ADM_CMD_ASYNC_EV_REQ command.\n+ *\n+ * If we are unlucky, and guest haven't submited\n+ * NVME_ADM_CMD_ASYNC_EV_REQ recently, but there\n+ * are a few events in aer_queue, then nvme_process_aers()\n+ * is useless. But we should at least try.\n+ */\n+ nvme_process_aers(n);\n+\n+ /*\n+ * Now we go in a hard way:\n+ * 1. Remove all queued events.\n+ * 2. Abort all NVME_ADM_CMD_ASYNC_EV_REQ requests.\n+ *\n+ * TODO: dump/restore this stuff?\n+ */\n+ while (!QTAILQ_EMPTY(&n->aer_queue)) {\n+ NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);\n+ QTAILQ_REMOVE(&n->aer_queue, event, entry);\n+ n->aer_queued--;\n+ g_free(event);\n+ }\n+\n+ for (i = 0; i < n->outstanding_aers; i++) {\n+ NvmeRequest *re = n->aer_reqs[i];\n+ memmove(n->aer_reqs + i, n->aer_reqs + i + 1,\n+ (n->outstanding_aers - i - 1) * sizeof(NvmeRequest *));\n+ n->outstanding_aers--;\n+ re->status = NVME_CMD_ABORT_REQ;\n+ nvme_enqueue_req_completion(&n->admin_cq, re);\n+ }\n+\n+ /*\n+ * nvme_enqueue_req_completion() will schedule BH for Admin CQ,\n+ * but we are under BQL and this scheduled BH won't be executed.\n+ * Let's manually call nvme_post_cqes().\n+ */\n+ qemu_bh_cancel(n->admin_cq.bh);\n+ nvme_post_cqes(&n->admin_cq);\n+\n+ if (n->aer_queued != 0 || n->outstanding_aers != 0 || !QTAILQ_EMPTY(&n->aer_queue)) {\n+ error_report(\"%s: AERs migrations is not supported aer_queued=%d outstanding_aers=%d qtailq_empty=%d\",\n+ __func__, n->aer_queued, n->outstanding_aers, QTAILQ_EMPTY(&n->aer_queue));\n+ goto err;\n+ }\n+\n+ /* wait when all in-flight IO requests are processed */\n+ for (i = 0; i < n->num_queues; i++) {\n+ NvmeSQueue *sq = n->sq[i];\n+\n+ if (!sq)\n+ continue;\n+\n+ trace_pci_nvme_pre_save_sq_out_req_drain_wait(n, i, sq->head, sq->tail, sq->size);\n+\n+ while (!QTAILQ_EMPTY(&sq->out_req_list)) {\n+ cpu_relax();\n+ }\n+\n+ trace_pci_nvme_pre_save_sq_out_req_drain_wait_end(n, i, sq->head, sq->tail);\n+ }\n+\n+ /* wait when all IO requests completions are written to guest memory */\n+ for (i = 0; i < n->num_queues; i++) {\n+ NvmeCQueue *cq = n->cq[i];\n+\n+ if (!cq)\n+ continue;\n+\n+ trace_pci_nvme_pre_save_cq_req_drain_wait(n, i, cq->head, cq->tail, cq->size);\n+\n+ while (!QTAILQ_EMPTY(&cq->req_list)) {\n+ /*\n+ * nvme_post_cqes() can't do its job of cleaning cq->req_list\n+ * when CQ is full, it means that we need to save what we have in\n+ * cq->req_list and restore it back on VM resume.\n+ *\n+ * Good thing is that this can only happen when guest hasn't\n+ * processed CQ for a long time and at the same time, many SQEs\n+ * are in flight.\n+ *\n+ * For now, let's just block migration in this rare case.\n+ */\n+ if (nvme_cq_full(cq)) {\n+ error_report(\"%s: no free space in CQ (not supported)\", __func__);\n+ goto err;\n+ }\n+\n+ cpu_relax();\n+ }\n+\n+ trace_pci_nvme_pre_save_cq_req_drain_wait_end(n, i, cq->head, cq->tail);\n+ }\n+\n+ for (uint32_t nsid = 0; nsid <= NVME_MAX_NAMESPACES; nsid++) {\n+ NvmeNamespace *ns = n->namespaces[nsid];\n+\n+ if (!ns)\n+ continue;\n+\n+ if (ns != &n->namespace) {\n+ error_report(\"%s: only one NVMe namespace is supported for migration\", __func__);\n+ goto err;\n+ }\n+ }\n+\n+ return 0;\n+\n+err:\n+ /* restore sq processing back to normal */\n+ qatomic_set(&n->stop_processing_sq, false);\n+ return -1;\n+}\n+\n+static bool nvme_ctrl_post_load(void *opaque, int version_id, Error **errp)\n+{\n+ NvmeCtrl *n = opaque;\n+ int i;\n+\n+ trace_pci_nvme_post_load_enter(n);\n+\n+ /* restore CQs first */\n+ for (i = 0; i < n->num_queues; i++) {\n+ NvmeCQueue *cq = n->cq[i];\n+\n+ if (!cq)\n+ continue;\n+\n+ cq->ctrl = n;\n+ nvme_restore_cq(cq);\n+ trace_pci_nvme_post_load_restore_cq(n, i, cq->head, cq->tail, cq->size);\n+\n+ if (i == 0) {\n+ /*\n+ * Admin CQ lives in n->admin_cq, we don't need\n+ * memory allocated for it in get_ptrs_array_entry() anymore.\n+ *\n+ * nvme_restore_cq() also takes care of:\n+ * n->cq[0] = &n->admin_cq;\n+ * so n->cq[0] remains valid.\n+ */\n+ g_free(cq);\n+ }\n+ }\n+\n+ for (i = 0; i < n->num_queues; i++) {\n+ NvmeSQueue *sq = n->sq[i];\n+\n+ if (!sq)\n+ continue;\n+\n+ sq->ctrl = n;\n+ nvme_restore_sq(sq);\n+ trace_pci_nvme_post_load_restore_sq(n, i, sq->head, sq->tail, sq->size);\n+\n+ if (i == 0) {\n+ /* same as for CQ */\n+ g_free(sq);\n+ }\n+ }\n+\n+ /*\n+ * We need to attach namespaces (currently, only one namespace is\n+ * supported for migration).\n+ * This logic comes from nvme_start_ctrl().\n+ */\n+ for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {\n+ NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);\n+\n+ if (!ns || (!ns->params.shared && ns->ctrl != n)) {\n+ continue;\n+ }\n+\n+ if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {\n+ if (!ns->attached || ns->params.shared) {\n+ nvme_attach_ns(n, ns);\n+ }\n+ }\n+ }\n+\n+ /* schedule SQ processing */\n+ for (i = 0; i < n->num_queues; i++) {\n+ NvmeSQueue *sq = n->sq[i];\n+\n+ if (!sq)\n+ continue;\n+\n+ qemu_bh_schedule(sq->bh);\n+ }\n+\n+ /*\n+ * We ensured in pre_save() that cq->req_list was empty,\n+ * so we don't need to schedule BH for CQ processing.\n+ */\n+\n+ return true;\n+}\n+\n+static const VMStateDescription nvme_vmstate_bar = {\n+ .name = \"nvme-bar\",\n+ .minimum_version_id = 1,\n+ .version_id = 1,\n+ .fields = (const VMStateField[]) {\n+ VMSTATE_UINT64(cap, NvmeBar),\n+ VMSTATE_UINT32(vs, NvmeBar),\n+ VMSTATE_UINT32(intms, NvmeBar),\n+ VMSTATE_UINT32(intmc, NvmeBar),\n+ VMSTATE_UINT32(cc, NvmeBar),\n+ VMSTATE_UINT8_ARRAY(rsvd24, NvmeBar, 4),\n+ VMSTATE_UINT32(csts, NvmeBar),\n+ VMSTATE_UINT32(nssr, NvmeBar),\n+ VMSTATE_UINT32(aqa, NvmeBar),\n+ VMSTATE_UINT64(asq, NvmeBar),\n+ VMSTATE_UINT64(acq, NvmeBar),\n+ VMSTATE_UINT32(cmbloc, NvmeBar),\n+ VMSTATE_UINT32(cmbsz, NvmeBar),\n+ VMSTATE_UINT32(bpinfo, NvmeBar),\n+ VMSTATE_UINT32(bprsel, NvmeBar),\n+ VMSTATE_UINT64(bpmbl, NvmeBar),\n+ VMSTATE_UINT64(cmbmsc, NvmeBar),\n+ VMSTATE_UINT32(cmbsts, NvmeBar),\n+ VMSTATE_UINT8_ARRAY(rsvd92, NvmeBar, 3492),\n+ VMSTATE_UINT32(pmrcap, NvmeBar),\n+ VMSTATE_UINT32(pmrctl, NvmeBar),\n+ VMSTATE_UINT32(pmrsts, NvmeBar),\n+ VMSTATE_UINT32(pmrebs, NvmeBar),\n+ VMSTATE_UINT32(pmrswtp, NvmeBar),\n+ VMSTATE_UINT32(pmrmscl, NvmeBar),\n+ VMSTATE_UINT32(pmrmscu, NvmeBar),\n+ VMSTATE_UINT8_ARRAY(css, NvmeBar, 484),\n+ VMSTATE_END_OF_LIST()\n+ },\n+};\n+\n+static const VMStateDescription nvme_vmstate_cqueue = {\n+ .name = \"nvme-cq\",\n+ .version_id = 1,\n+ .minimum_version_id = 1,\n+ .fields = (const VMStateField[]) {\n+ VMSTATE_UINT8(phase, NvmeCQueue),\n+ VMSTATE_UINT16(cqid, NvmeCQueue),\n+ VMSTATE_UINT16(irq_enabled, NvmeCQueue),\n+ VMSTATE_UINT32(head, NvmeCQueue),\n+ VMSTATE_UINT32(tail, NvmeCQueue),\n+ VMSTATE_UINT32(vector, NvmeCQueue),\n+ VMSTATE_UINT32(size, NvmeCQueue),\n+ VMSTATE_UINT64(dma_addr, NvmeCQueue),\n+ /* db_addr, ei_addr, etc will be recalculated */\n+ VMSTATE_END_OF_LIST()\n+ }\n+};\n+\n+static const VMStateDescription nvme_vmstate_squeue = {\n+ .name = \"nvme-sq\",\n+ .version_id = 1,\n+ .minimum_version_id = 1,\n+ .fields = (const VMStateField[]) {\n+ VMSTATE_UINT16(sqid, NvmeSQueue),\n+ VMSTATE_UINT16(cqid, NvmeSQueue),\n+ VMSTATE_UINT32(head, NvmeSQueue),\n+ VMSTATE_UINT32(tail, NvmeSQueue),\n+ VMSTATE_UINT32(size, NvmeSQueue),\n+ VMSTATE_UINT64(dma_addr, NvmeSQueue),\n+ /* db_addr, ei_addr, etc will be recalculated */\n+ VMSTATE_END_OF_LIST()\n+ }\n+};\n+\n static const VMStateDescription nvme_vmstate = {\n .name = \"nvme\",\n- .unmigratable = 1,\n+ .minimum_version_id = 1,\n+ .version_id = 1,\n+ .pre_save = nvme_ctrl_pre_save,\n+ .post_load_errp = nvme_ctrl_post_load,\n+ .fields = (const VMStateField[]) {\n+ VMSTATE_PCI_DEVICE(parent_obj, NvmeCtrl),\n+ VMSTATE_MSIX(parent_obj, NvmeCtrl),\n+ VMSTATE_STRUCT(bar, NvmeCtrl, 0, nvme_vmstate_bar, NvmeBar),\n+\n+ VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_ALLOC(\n+ sq, NvmeCtrl, num_queues, 0, nvme_vmstate_squeue, NvmeSQueue),\n+ VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_ALLOC(\n+ cq, NvmeCtrl, num_queues, 0, nvme_vmstate_cqueue, NvmeCQueue),\n+\n+ VMSTATE_BOOL(qs_created, NvmeCtrl),\n+ VMSTATE_UINT32(page_size, NvmeCtrl),\n+ VMSTATE_UINT16(page_bits, NvmeCtrl),\n+ VMSTATE_UINT16(max_prp_ents, NvmeCtrl),\n+ VMSTATE_UINT32(max_q_ents, NvmeCtrl),\n+ VMSTATE_UINT8(outstanding_aers, NvmeCtrl),\n+ VMSTATE_UINT32(irq_status, NvmeCtrl),\n+ VMSTATE_INT32(cq_pending, NvmeCtrl),\n+\n+ VMSTATE_UINT64(host_timestamp, NvmeCtrl),\n+ VMSTATE_UINT64(timestamp_set_qemu_clock_ms, NvmeCtrl),\n+ VMSTATE_UINT64(starttime_ms, NvmeCtrl),\n+ VMSTATE_UINT16(temperature, NvmeCtrl),\n+ VMSTATE_UINT8(smart_critical_warning, NvmeCtrl),\n+\n+ VMSTATE_UINT32(conf_msix_qsize, NvmeCtrl),\n+ VMSTATE_UINT32(conf_ioqpairs, NvmeCtrl),\n+ VMSTATE_UINT64(dbbuf_dbs, NvmeCtrl),\n+ VMSTATE_UINT64(dbbuf_eis, NvmeCtrl),\n+ VMSTATE_BOOL(dbbuf_enabled, NvmeCtrl),\n+\n+ VMSTATE_END_OF_LIST()\n+ },\n };\n \n static void nvme_class_init(ObjectClass *oc, const void *data)\ndiff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h\nindex 457b6637249..9c5f53c688c 100644\n--- a/hw/nvme/nvme.h\n+++ b/hw/nvme/nvme.h\n@@ -638,6 +638,7 @@ typedef struct NvmeCtrl {\n \n NvmeNamespace namespace;\n NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];\n+ uint32_t num_queues;\n NvmeSQueue **sq;\n NvmeCQueue **cq;\n NvmeSQueue admin_sq;\n@@ -669,6 +670,7 @@ typedef struct NvmeCtrl {\n \n /* Migration-related stuff */\n Error *migration_blocker;\n+ bool stop_processing_sq;\n } NvmeCtrl;\n \n typedef enum NvmeResetType {\ndiff --git a/hw/nvme/trace-events b/hw/nvme/trace-events\nindex 6be0bfa1c1f..b9c5868a942 100644\n--- a/hw/nvme/trace-events\n+++ b/hw/nvme/trace-events\n@@ -7,6 +7,15 @@ pci_nvme_dbbuf_config(uint64_t dbs_addr, uint64_t eis_addr) \"dbs_addr=0x%\"PRIx64\n pci_nvme_map_addr(uint64_t addr, uint64_t len) \"addr 0x%\"PRIx64\" len %\"PRIu64\"\"\n pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) \"addr 0x%\"PRIx64\" len %\"PRIu64\"\"\n pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) \"trans_len %\"PRIu64\" len %\"PRIu32\" prp1 0x%\"PRIx64\" prp2 0x%\"PRIx64\" num_prps %d\"\n+pci_nvme_pre_save_enter(void *n) \"n=%p\"\n+pci_nvme_pre_save_ns_drain(void *n, int i) \"n=%p i=%d\"\n+pci_nvme_pre_save_sq_out_req_drain_wait(void *n, int i, uint32_t head, uint32_t tail, uint32_t size) \"n=%p i=%d head=0x%\"PRIx32\" tail=0x%\"PRIx32\" size=0x%\"PRIx32\"\"\n+pci_nvme_pre_save_sq_out_req_drain_wait_end(void *n, int i, uint32_t head, uint32_t tail) \"n=%p i=%d head=0x%\"PRIx32\" tail=0x%\"PRIx32\"\"\n+pci_nvme_pre_save_cq_req_drain_wait(void *n, int i, uint32_t head, uint32_t tail, uint32_t size) \"n=%p i=%d head=0x%\"PRIx32\" tail=0x%\"PRIx32\" size=0x%\"PRIx32\"\"\n+pci_nvme_pre_save_cq_req_drain_wait_end(void *n, int i, uint32_t head, uint32_t tail) \"n=%p i=%d head=0x%\"PRIx32\" tail=0x%\"PRIx32\"\"\n+pci_nvme_post_load_enter(void *n) \"n=%p\"\n+pci_nvme_post_load_restore_cq(void *n, int i, uint32_t head, uint32_t tail, uint32_t size) \"n=%p i=%d head=0x%\"PRIx32\" tail=0x%\"PRIx32\" size=0x%\"PRIx32\"\"\n+pci_nvme_post_load_restore_sq(void *n, int i, uint32_t head, uint32_t tail, uint32_t size) \"n=%p i=%d head=0x%\"PRIx32\" tail=0x%\"PRIx32\" size=0x%\"PRIx32\"\"\n pci_nvme_map_sgl(uint8_t typ, uint64_t len) \"type 0x%\"PRIx8\" len %\"PRIu64\"\"\n pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) \"cid %\"PRIu16\" nsid 0x%\"PRIx32\" sqid %\"PRIu16\" opc 0x%\"PRIx8\" opname '%s'\"\n pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) \"cid %\"PRIu16\" sqid %\"PRIu16\" opc 0x%\"PRIx8\" opname '%s'\"\n", "prefixes": [ "4/4" ] }