Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/1839903/?format=api
{ "id": 1839903, "url": "http://patchwork.ozlabs.org/api/patches/1839903/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20230926185738.277351-2-david@redhat.com/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20230926185738.277351-2-david@redhat.com>", "list_archive_url": null, "date": "2023-09-26T18:57:21", "name": "[v4,01/18] vhost: Rework memslot filtering and fix \"used_memslot\" tracking", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "8a2c0a070b6f43ee272d49e798a0ab5578a6140d", "submitter": { "id": 70402, "url": "http://patchwork.ozlabs.org/api/people/70402/?format=api", "name": "David Hildenbrand", "email": "david@redhat.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20230926185738.277351-2-david@redhat.com/mbox/", "series": [ { "id": 374991, "url": "http://patchwork.ozlabs.org/api/series/374991/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=374991", "date": "2023-09-26T18:57:23", "name": "virtio-mem: Expose device memory through multiple memslots", "version": 4, "mbox": "http://patchwork.ozlabs.org/series/374991/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/1839903/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/1839903/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256\n header.s=mimecast20190719 header.b=EhxLkiLI;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=patchwork.ozlabs.org)" ], "Received": [ "from lists.gnu.org (lists.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4Rw8DK2lh4z1yqV\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 27 Sep 2023 04:59:13 +1000 (AEST)", "from localhost ([::1] helo=lists1p.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces@nongnu.org>)\n\tid 1qlDGE-0006kv-Qx; Tue, 26 Sep 2023 14:58:18 -0400", "from eggs.gnu.org ([2001:470:142:3::10])\n by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <david@redhat.com>) id 1qlDGD-0006jU-Au\n for qemu-devel@nongnu.org; Tue, 26 Sep 2023 14:58:17 -0400", "from us-smtp-delivery-124.mimecast.com ([170.10.133.124])\n by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <david@redhat.com>) id 1qlDGB-0002ua-1P\n for qemu-devel@nongnu.org; Tue, 26 Sep 2023 14:58:16 -0400", "from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com\n [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS\n (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id\n us-mta-340-s2AyaTeMM_urvlGdoEpzYA-1; Tue, 26 Sep 2023 14:58:05 -0400", "from smtp.corp.redhat.com (int-mx04.intmail.prod.int.rdu2.redhat.com\n [10.11.54.4])\n (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n (No client certificate requested)\n by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 2C0DB85A5BF;\n Tue, 26 Sep 2023 18:58:05 +0000 (UTC)", "from t14s.fritz.box (unknown [10.39.192.33])\n by smtp.corp.redhat.com (Postfix) with ESMTP id 8FE2E2026D68;\n Tue, 26 Sep 2023 18:57:55 +0000 (UTC)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n s=mimecast20190719; t=1695754694;\n h=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n to:to:cc:cc:mime-version:mime-version:\n content-transfer-encoding:content-transfer-encoding:\n in-reply-to:in-reply-to:references:references;\n bh=H2lWTPsgDvCfwCJwS9h9vAACqbsIOrrr2brotmfmJqM=;\n b=EhxLkiLIJvM5J63auahb8vu9DYd3TO7/S8XtGL4PIL0G34bbU5x959ypRHn84Xef96cJq5\n Ld7QaUbA8SfAK8BJoITOz3JMAlSoN3IfNbtIBXpK0RB5jvc/RkijusDDmezkJ4RCDQCXjD\n 8XV7WLfYj17HRFcseB1ylanIuHBKo+8=", "X-MC-Unique": "s2AyaTeMM_urvlGdoEpzYA-1", "From": "David Hildenbrand <david@redhat.com>", "To": "qemu-devel@nongnu.org", "Cc": "David Hildenbrand <david@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>,\n Igor Mammedov <imammedo@redhat.com>,\n Xiao Guangrong <xiaoguangrong.eric@gmail.com>,\n \"Michael S. Tsirkin\" <mst@redhat.com>, Peter Xu <peterx@redhat.com>,\n\t=?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>,\n Eduardo Habkost <eduardo@habkost.net>,\n Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,\n Yanan Wang <wangyanan55@huawei.com>, Michal Privoznik <mprivozn@redhat.com>,\n\t=?utf-8?q?Daniel_P_=2E_Berrang=C3=A9?= <berrange@redhat.com>,\n Gavin Shan <gshan@redhat.com>, Alex Williamson <alex.williamson@redhat.com>,\n Stefan Hajnoczi <stefanha@redhat.com>,\n \"Maciej S . Szmigiero\" <mail@maciej.szmigiero.name>, kvm@vger.kernel.org,\n Tiwei Bie <tiwei.bie@intel.com>", "Subject": "[PATCH v4 01/18] vhost: Rework memslot filtering and fix\n \"used_memslot\" tracking", "Date": "Tue, 26 Sep 2023 20:57:21 +0200", "Message-ID": "<20230926185738.277351-2-david@redhat.com>", "In-Reply-To": "<20230926185738.277351-1-david@redhat.com>", "References": "<20230926185738.277351-1-david@redhat.com>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "X-Scanned-By": "MIMEDefang 3.1 on 10.11.54.4", "Received-SPF": "pass client-ip=170.10.133.124; envelope-from=david@redhat.com;\n helo=us-smtp-delivery-124.mimecast.com", "X-Spam_score_int": "-20", "X-Spam_score": "-2.1", "X-Spam_bar": "--", "X-Spam_report": "(-2.1 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001,\n DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1,\n RCVD_IN_DNSWL_NONE=-0.0001, RCVD_IN_MSPIKE_H3=0.001, RCVD_IN_MSPIKE_WL=0.001,\n SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no", "X-Spam_action": "no action", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "<qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org" }, "content": "Having multiple vhost devices, some filtering out fd-less memslots and\nsome not, can mess up the \"used_memslot\" accounting. Consequently our\n\"free memslot\" checks become unreliable and we might run out of free\nmemslots at runtime later.\n\nAn example sequence which can trigger a potential issue that involves\ndifferent vhost backends (vhost-kernel and vhost-user) and hotplugged\nmemory devices can be found at [1].\n\nLet's make the filtering mechanism less generic and distinguish between\nbackends that support private memslots (without a fd) and ones that only\nsupport shared memslots (with a fd). Track the used_memslots for both\ncases separately and use the corresponding value when required.\n\nNote: Most probably we should filter out MAP_PRIVATE fd-based RAM regions\n(for example, via memory-backend-memfd,...,shared=off or as default with\n memory-backend-file) as well. When not using MAP_SHARED, it might not work\nas expected. Add a TODO for now.\n\n[1] https://lkml.kernel.org/r/fad9136f-08d3-3fd9-71a1-502069c000cf@redhat.com\n\nFixes: 988a27754bbb (\"vhost: allow backends to filter memory sections\")\nCc: Tiwei Bie <tiwei.bie@intel.com>\nAcked-by: Igor Mammedov <imammedo@redhat.com>\nReviewed-by: Peter Xu <peterx@redhat.com>\nSigned-off-by: David Hildenbrand <david@redhat.com>\n---\n hw/virtio/vhost-user.c | 7 ++--\n hw/virtio/vhost.c | 56 ++++++++++++++++++++++++++-----\n include/hw/virtio/vhost-backend.h | 5 ++-\n 3 files changed, 52 insertions(+), 16 deletions(-)", "diff": "diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c\nindex 8dcf049d42..1e7553352a 100644\n--- a/hw/virtio/vhost-user.c\n+++ b/hw/virtio/vhost-user.c\n@@ -2500,10 +2500,9 @@ vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)\n return 0;\n }\n \n-static bool vhost_user_mem_section_filter(struct vhost_dev *dev,\n- MemoryRegionSection *section)\n+static bool vhost_user_no_private_memslots(struct vhost_dev *dev)\n {\n- return memory_region_get_fd(section->mr) >= 0;\n+ return true;\n }\n \n static int vhost_user_get_inflight_fd(struct vhost_dev *dev,\n@@ -2746,6 +2745,7 @@ const VhostOps user_ops = {\n .vhost_backend_init = vhost_user_backend_init,\n .vhost_backend_cleanup = vhost_user_backend_cleanup,\n .vhost_backend_memslots_limit = vhost_user_memslots_limit,\n+ .vhost_backend_no_private_memslots = vhost_user_no_private_memslots,\n .vhost_set_log_base = vhost_user_set_log_base,\n .vhost_set_mem_table = vhost_user_set_mem_table,\n .vhost_set_vring_addr = vhost_user_set_vring_addr,\n@@ -2772,7 +2772,6 @@ const VhostOps user_ops = {\n .vhost_set_config = vhost_user_set_config,\n .vhost_crypto_create_session = vhost_user_crypto_create_session,\n .vhost_crypto_close_session = vhost_user_crypto_close_session,\n- .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,\n .vhost_get_inflight_fd = vhost_user_get_inflight_fd,\n .vhost_set_inflight_fd = vhost_user_set_inflight_fd,\n .vhost_dev_start = vhost_user_dev_start,\ndiff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c\nindex e2f6ffb446..c1e6148833 100644\n--- a/hw/virtio/vhost.c\n+++ b/hw/virtio/vhost.c\n@@ -45,20 +45,33 @@\n static struct vhost_log *vhost_log;\n static struct vhost_log *vhost_log_shm;\n \n+/* Memslots used by backends that support private memslots (without an fd). */\n static unsigned int used_memslots;\n+\n+/* Memslots used by backends that only support shared memslots (with an fd). */\n+static unsigned int used_shared_memslots;\n+\n static QLIST_HEAD(, vhost_dev) vhost_devices =\n QLIST_HEAD_INITIALIZER(vhost_devices);\n \n bool vhost_has_free_slot(void)\n {\n- unsigned int slots_limit = ~0U;\n+ unsigned int free = UINT_MAX;\n struct vhost_dev *hdev;\n \n QLIST_FOREACH(hdev, &vhost_devices, entry) {\n unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);\n- slots_limit = MIN(slots_limit, r);\n+ unsigned int cur_free;\n+\n+ if (hdev->vhost_ops->vhost_backend_no_private_memslots &&\n+ hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {\n+ cur_free = r - used_shared_memslots;\n+ } else {\n+ cur_free = r - used_memslots;\n+ }\n+ free = MIN(free, cur_free);\n }\n- return slots_limit > used_memslots;\n+ return free > 0;\n }\n \n static void vhost_dev_sync_region(struct vhost_dev *dev,\n@@ -474,8 +487,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,\n * vhost_section: identify sections needed for vhost access\n *\n * We only care about RAM sections here (where virtqueue and guest\n- * internals accessed by virtio might live). If we find one we still\n- * allow the backend to potentially filter it out of our list.\n+ * internals accessed by virtio might live).\n */\n static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)\n {\n@@ -502,8 +514,16 @@ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)\n return false;\n }\n \n- if (dev->vhost_ops->vhost_backend_mem_section_filter &&\n- !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {\n+ /*\n+ * Some backends (like vhost-user) can only handle memory regions\n+ * that have an fd (can be mapped into a different process). Filter\n+ * the ones without an fd out, if requested.\n+ *\n+ * TODO: we might have to limit to MAP_SHARED as well.\n+ */\n+ if (memory_region_get_fd(section->mr) < 0 &&\n+ dev->vhost_ops->vhost_backend_no_private_memslots &&\n+ dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {\n trace_vhost_reject_section(mr->name, 2);\n return false;\n }\n@@ -568,7 +588,14 @@ static void vhost_commit(MemoryListener *listener)\n dev->n_mem_sections * sizeof dev->mem->regions[0];\n dev->mem = g_realloc(dev->mem, regions_size);\n dev->mem->nregions = dev->n_mem_sections;\n- used_memslots = dev->mem->nregions;\n+\n+ if (dev->vhost_ops->vhost_backend_no_private_memslots &&\n+ dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {\n+ used_shared_memslots = dev->mem->nregions;\n+ } else {\n+ used_memslots = dev->mem->nregions;\n+ }\n+\n for (i = 0; i < dev->n_mem_sections; i++) {\n struct vhost_memory_region *cur_vmr = dev->mem->regions + i;\n struct MemoryRegionSection *mrs = dev->mem_sections + i;\n@@ -1400,6 +1427,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,\n VhostBackendType backend_type, uint32_t busyloop_timeout,\n Error **errp)\n {\n+ unsigned int used;\n uint64_t features;\n int i, r, n_initialized_vqs = 0;\n \n@@ -1495,7 +1523,17 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,\n memory_listener_register(&hdev->memory_listener, &address_space_memory);\n QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);\n \n- if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {\n+ /*\n+ * The listener we registered properly updated the corresponding counter.\n+ * So we can trust that these values are accurate.\n+ */\n+ if (hdev->vhost_ops->vhost_backend_no_private_memslots &&\n+ hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {\n+ used = used_shared_memslots;\n+ } else {\n+ used = used_memslots;\n+ }\n+ if (used > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {\n error_setg(errp, \"vhost backend memory slots limit is less\"\n \" than current number of present memory slots\");\n r = -EINVAL;\ndiff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h\nindex 31a251a9f5..df2821ddae 100644\n--- a/include/hw/virtio/vhost-backend.h\n+++ b/include/hw/virtio/vhost-backend.h\n@@ -108,8 +108,7 @@ typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev,\n typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev,\n uint64_t session_id);\n \n-typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev,\n- MemoryRegionSection *section);\n+typedef bool (*vhost_backend_no_private_memslots_op)(struct vhost_dev *dev);\n \n typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,\n uint16_t queue_size,\n@@ -138,6 +137,7 @@ typedef struct VhostOps {\n vhost_backend_init vhost_backend_init;\n vhost_backend_cleanup vhost_backend_cleanup;\n vhost_backend_memslots_limit vhost_backend_memslots_limit;\n+ vhost_backend_no_private_memslots_op vhost_backend_no_private_memslots;\n vhost_net_set_backend_op vhost_net_set_backend;\n vhost_net_set_mtu_op vhost_net_set_mtu;\n vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint;\n@@ -172,7 +172,6 @@ typedef struct VhostOps {\n vhost_set_config_op vhost_set_config;\n vhost_crypto_create_session_op vhost_crypto_create_session;\n vhost_crypto_close_session_op vhost_crypto_close_session;\n- vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;\n vhost_get_inflight_fd_op vhost_get_inflight_fd;\n vhost_set_inflight_fd_op vhost_set_inflight_fd;\n vhost_dev_start_op vhost_dev_start;\n", "prefixes": [ "v4", "01/18" ] }