Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.2/patches/815411/?format=api
{ "id": 815411, "url": "http://patchwork.ozlabs.org/api/1.2/patches/815411/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20170919102434.21147-2-pbonzini@redhat.com/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/1.2/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20170919102434.21147-2-pbonzini@redhat.com>", "list_archive_url": null, "date": "2017-09-19T10:24:31", "name": "[1/4] scsi, file-posix: add support for persistent reservation management", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "045f1f28b9022b5a8b524aa974d4c1822d7b5e31", "submitter": { "id": 2701, "url": "http://patchwork.ozlabs.org/api/1.2/people/2701/?format=api", "name": "Paolo Bonzini", "email": "pbonzini@redhat.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20170919102434.21147-2-pbonzini@redhat.com/mbox/", "series": [ { "id": 3827, "url": "http://patchwork.ozlabs.org/api/1.2/series/3827/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=3827", "date": "2017-09-19T10:24:30", "name": "scsi, block: introduce persistent reservation managers", "version": 2, "mbox": "http://patchwork.ozlabs.org/series/3827/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/815411/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/815411/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@bilbo.ozlabs.org", "Authentication-Results": [ "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=nongnu.org\n\t(client-ip=2001:4830:134:3::11; helo=lists.gnu.org;\n\tenvelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n\treceiver=<UNKNOWN>)", "ext-mx06.extmail.prod.ext.phx2.redhat.com;\n\tdmarc=none (p=none dis=none) header.from=redhat.com", "ext-mx06.extmail.prod.ext.phx2.redhat.com;\n\tspf=fail smtp.mailfrom=pbonzini@redhat.com" ], "Received": [ "from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11])\n\t(using TLSv1 with cipher AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xxJtT74Rrz9s7h\n\tfor <incoming@patchwork.ozlabs.org>;\n\tTue, 19 Sep 2017 20:25:53 +1000 (AEST)", "from localhost ([::1]:41289 helo=lists.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.71) (envelope-from\n\t<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>)\n\tid 1duFiy-0004QP-27\n\tfor incoming@patchwork.ozlabs.org; Tue, 19 Sep 2017 06:25:52 -0400", "from eggs.gnu.org ([2001:4830:134:3::10]:45335)\n\tby lists.gnu.org with esmtp (Exim 4.71)\n\t(envelope-from <pbonzini@redhat.com>) id 1duFhz-00049r-IK\n\tfor qemu-devel@nongnu.org; Tue, 19 Sep 2017 06:24:53 -0400", "from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)\n\t(envelope-from <pbonzini@redhat.com>) id 1duFhx-0004dV-3q\n\tfor qemu-devel@nongnu.org; Tue, 19 Sep 2017 06:24:51 -0400", "from mx1.redhat.com ([209.132.183.28]:50680)\n\tby eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32)\n\t(Exim 4.71) (envelope-from <pbonzini@redhat.com>)\n\tid 1duFhm-0004Vc-BN; Tue, 19 Sep 2017 06:24:38 -0400", "from smtp.corp.redhat.com\n\t(int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby mx1.redhat.com (Postfix) with ESMTPS id 758D7267CA;\n\tTue, 19 Sep 2017 10:24:37 +0000 (UTC)", "from donizetti.redhat.com (ovpn-117-61.ams2.redhat.com\n\t[10.36.117.61])\n\tby smtp.corp.redhat.com (Postfix) with ESMTP id 738D55D6A4;\n\tTue, 19 Sep 2017 10:24:36 +0000 (UTC)" ], "DMARC-Filter": "OpenDMARC Filter v1.3.2 mx1.redhat.com 758D7267CA", "From": "Paolo Bonzini <pbonzini@redhat.com>", "To": "qemu-devel@nongnu.org", "Date": "Tue, 19 Sep 2017 12:24:31 +0200", "Message-Id": "<20170919102434.21147-2-pbonzini@redhat.com>", "In-Reply-To": "<20170919102434.21147-1-pbonzini@redhat.com>", "References": "<20170919102434.21147-1-pbonzini@redhat.com>", "X-Scanned-By": "MIMEDefang 2.79 on 10.5.11.15", "X-Greylist": "Sender IP whitelisted, not delayed by milter-greylist-4.5.16\n\t(mx1.redhat.com [10.5.110.30]);\n\tTue, 19 Sep 2017 10:24:37 +0000 (UTC)", "X-detected-operating-system": "by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic]\n\t[fuzzy]", "X-Received-From": "209.132.183.28", "Subject": "[Qemu-devel] [PATCH 1/4] scsi,\n\tfile-posix: add support for persistent reservation management", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.21", "Precedence": "list", "List-Id": "<qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n\t<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<http://lists.nongnu.org/archive/html/qemu-devel/>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n\t<mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Cc": "qemu-block@nongnu.org", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "\"Qemu-devel\"\n\t<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>" }, "content": "It is a common requirement for virtual machine to send persistent\nreservations, but this currently requires either running QEMU with\nCAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged\nQEMU bypass Linux's filter on SG_IO commands.\n\nAs an alternative mechanism, the next patches will introduce a\nprivileged helper to run persistent reservation commands without\nexpanding QEMU's attack surface unnecessarily.\n\nThe helper is invoked through a \"pr-manager\" QOM object, to which\nfile-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and\nPERSISTENT RESERVE IN commands. For example:\n\n $ qemu-system-x86_64\n -device virtio-scsi \\\n -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock\n -drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0\n -device scsi-block,drive=hd\n\nor:\n\n $ qemu-system-x86_64\n -device virtio-scsi \\\n -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock\n -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0\n -device scsi-block,drive=hd\n\nMultiple pr-manager implementations are conceivable and possible, though\nonly one is implemented right now. For example, a pr-manager could:\n\n- talk directly to the multipath daemon from a privileged QEMU\n (i.e. QEMU links to libmpathpersist); this makes reservation work\n properly with multipath, but still requires CAP_SYS_RAWIO\n\n- use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though)\n\n- more interestingly, implement reservations directly in QEMU\n through file system locks or a shared database (e.g. sqlite)\n\nSigned-off-by: Paolo Bonzini <pbonzini@redhat.com>\n---\n Makefile.objs | 1 +\n block/file-posix.c | 30 +++++++++++++\n docs/pr-manager.rst | 51 ++++++++++++++++++++++\n include/scsi/pr-manager.h | 56 ++++++++++++++++++++++++\n qapi/block-core.json | 4 ++\n scsi/Makefile.objs | 2 +\n scsi/pr-manager.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++\n vl.c | 3 +-\n 8 files changed, 255 insertions(+), 1 deletion(-)\n create mode 100644 docs/pr-manager.rst\n create mode 100644 include/scsi/pr-manager.h\n create mode 100644 scsi/pr-manager.c", "diff": "diff --git a/Makefile.objs b/Makefile.objs\nindex 0caa8a5cf8..12abaa6191 100644\n--- a/Makefile.objs\n+++ b/Makefile.objs\n@@ -170,6 +170,7 @@ trace-events-subdirs += qapi\n trace-events-subdirs += accel/tcg\n trace-events-subdirs += accel/kvm\n trace-events-subdirs += nbd\n+trace-events-subdirs += scsi\n \n trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)\n \ndiff --git a/block/file-posix.c b/block/file-posix.c\nindex 6acbd56238..ab12a2b591 100644\n--- a/block/file-posix.c\n+++ b/block/file-posix.c\n@@ -33,6 +33,9 @@\n #include \"block/raw-aio.h\"\n #include \"qapi/qmp/qstring.h\"\n \n+#include \"scsi/pr-manager.h\"\n+#include \"scsi/constants.h\"\n+\n #if defined(__APPLE__) && (__MACH__)\n #include <paths.h>\n #include <sys/param.h>\n@@ -155,6 +158,8 @@ typedef struct BDRVRawState {\n bool page_cache_inconsistent:1;\n bool has_fallocate;\n bool needs_alignment;\n+\n+ PRManager *pr_mgr;\n } BDRVRawState;\n \n typedef struct BDRVRawReopenState {\n@@ -402,6 +407,11 @@ static QemuOptsList raw_runtime_opts = {\n .type = QEMU_OPT_STRING,\n .help = \"file locking mode (on/off/auto, default: auto)\",\n },\n+ {\n+ .name = \"pr-manager\",\n+ .type = QEMU_OPT_STRING,\n+ .help = \"id of persistent reservation manager object (default: none)\",\n+ },\n { /* end of list */ }\n },\n };\n@@ -413,6 +423,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,\n QemuOpts *opts;\n Error *local_err = NULL;\n const char *filename = NULL;\n+ const char *str;\n BlockdevAioOptions aio, aio_default;\n int fd, ret;\n struct stat st;\n@@ -476,6 +487,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,\n abort();\n }\n \n+ str = qemu_opt_get(opts, \"pr-manager\");\n+ if (str) {\n+ s->pr_mgr = pr_manager_lookup(str, &local_err);\n+ if (local_err) {\n+ error_propagate(errp, local_err);\n+ ret = -EINVAL;\n+ goto fail;\n+ }\n+ }\n+\n s->open_flags = open_flags;\n raw_parse_flags(bdrv_flags, &s->open_flags);\n \n@@ -2597,6 +2618,15 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,\n if (fd_open(bs) < 0)\n return NULL;\n \n+ if (req == SG_IO && s->pr_mgr) {\n+ struct sg_io_hdr *io_hdr = buf;\n+ if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||\n+ io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {\n+ return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),\n+ s->fd, io_hdr, cb, opaque);\n+ }\n+ }\n+\n acb = g_new(RawPosixAIOData, 1);\n acb->bs = bs;\n acb->aio_type = QEMU_AIO_IOCTL;\ndiff --git a/docs/pr-manager.rst b/docs/pr-manager.rst\nnew file mode 100644\nindex 0000000000..b6089fb57c\n--- /dev/null\n+++ b/docs/pr-manager.rst\n@@ -0,0 +1,51 @@\n+======================================\n+Persistent reservation managers\n+======================================\n+\n+SCSI persistent Reservations allow restricting access to block devices\n+to specific initiators in a shared storage setup. When implementing\n+clustering of virtual machines, it is a common requirement for virtual\n+machines to send persistent reservation SCSI commands. However,\n+the operating system restricts sending these commands to unprivileged\n+programs because incorrect usage can disrupt regular operation of the\n+storage fabric.\n+\n+For this reason, QEMU's SCSI passthrough devices, ``scsi-block``\n+and ``scsi-generic`` (both are only available on Linux) can delegate\n+implementation of persistent reservations to a separate object,\n+the \"persistent reservation manager\". Only PERSISTENT RESERVE OUT and\n+PERSISTENT RESERVE IN commands are passed to the persistent reservation\n+manager object; other commands are processed by QEMU as usual.\n+\n+-----------------------------------------\n+Defining a persistent reservation manager\n+-----------------------------------------\n+\n+A persistent reservation manager is an instance of a subclass of the\n+\"pr-manager\" QOM class.\n+\n+Right now only one subclass is defined, ``pr-manager-helper``, which\n+forwards the commands to an external privileged helper program\n+over Unix sockets. The helper program only allows sending persistent\n+reservation commands to devices for which QEMU has a file descriptor,\n+so that QEMU will not be able to effect persistent reservations\n+unless it has access to both the socket and the device.\n+\n+``pr-manager-helper`` has a single string property, ``path``, which\n+accepts the path to the helper program's Unix socket. For example,\n+the following command line defines a ``pr-manager-helper`` object and\n+attaches it to a SCSI passthrough device::\n+\n+ $ qemu-system-x86_64\n+ -device virtio-scsi \\\n+ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock\n+ -drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0\n+ -device scsi-block,drive=hd\n+\n+Alternatively, using ``-blockdev``::\n+\n+ $ qemu-system-x86_64\n+ -device virtio-scsi \\\n+ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock\n+ -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0\n+ -device scsi-block,drive=hd\ndiff --git a/include/scsi/pr-manager.h b/include/scsi/pr-manager.h\nnew file mode 100644\nindex 0000000000..b2b37d63bc\n--- /dev/null\n+++ b/include/scsi/pr-manager.h\n@@ -0,0 +1,56 @@\n+#ifndef PR_MANAGER_H\n+#define PR_MANAGER_H\n+\n+#include \"qom/object.h\"\n+#include \"qapi/qmp/qdict.h\"\n+#include \"qapi/visitor.h\"\n+#include \"qom/object_interfaces.h\"\n+#include \"block/aio.h\"\n+\n+#define TYPE_PR_MANAGER \"pr-manager\"\n+\n+#define PR_MANAGER_CLASS(klass) \\\n+ OBJECT_CLASS_CHECK(PRManagerClass, (klass), TYPE_PR_MANAGER)\n+#define PR_MANAGER_GET_CLASS(obj) \\\n+ OBJECT_GET_CLASS(PRManagerClass, (obj), TYPE_PR_MANAGER)\n+#define PR_MANAGER(obj) \\\n+ OBJECT_CHECK(PRManager, (obj), TYPE_PR_MANAGER)\n+\n+struct sg_io_hdr;\n+\n+typedef struct PRManager {\n+ /* <private> */\n+ Object parent;\n+} PRManager;\n+\n+/**\n+ * PRManagerClass:\n+ * @parent_class: the base class\n+ * @run: callback invoked in thread pool context\n+ */\n+typedef struct PRManagerClass {\n+ /* <private> */\n+ ObjectClass parent_class;\n+\n+ /* <public> */\n+ int (*run)(PRManager *pr_mgr, int fd, struct sg_io_hdr *hdr);\n+} PRManagerClass;\n+\n+BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,\n+ AioContext *ctx, int fd,\n+ struct sg_io_hdr *hdr,\n+ BlockCompletionFunc *complete,\n+ void *opaque);\n+\n+#ifdef CONFIG_LINUX\n+PRManager *pr_manager_lookup(const char *id, Error **errp);\n+#else\n+static inline PRManager *pr_manager_lookup(const char *id, Error **errp)\n+{\n+ /* The classes do not exist at all! */\n+ error_setg(errp, \"No persistent reservation manager with id '%s'\", id);\n+ return NULL;\n+}\n+#endif\n+\n+#endif\ndiff --git a/qapi/block-core.json b/qapi/block-core.json\nindex bb11815608..c69a395804 100644\n--- a/qapi/block-core.json\n+++ b/qapi/block-core.json\n@@ -2241,6 +2241,9 @@\n # Driver specific block device options for the file backend.\n #\n # @filename: path to the image file\n+# @pr-manager: the id for the object that will handle persistent reservations\n+# for this device (default: none, forward the commands via SG_IO;\n+# since 2.11)\n # @aio: AIO backend (default: threads) (since: 2.8)\n # @locking: whether to enable file locking. If set to 'auto', only enable\n # when Open File Descriptor (OFD) locking API is available\n@@ -2250,6 +2253,7 @@\n ##\n { 'struct': 'BlockdevOptionsFile',\n 'data': { 'filename': 'str',\n+ '*pr-manager': 'str',\n '*locking': 'OnOffAuto',\n '*aio': 'BlockdevAioOptions' } }\n \ndiff --git a/scsi/Makefile.objs b/scsi/Makefile.objs\nindex 31b82a5a36..5496d2ae6a 100644\n--- a/scsi/Makefile.objs\n+++ b/scsi/Makefile.objs\n@@ -1 +1,3 @@\n block-obj-y += utils.o\n+\n+block-obj-$(CONFIG_LINUX) += pr-manager.o\ndiff --git a/scsi/pr-manager.c b/scsi/pr-manager.c\nnew file mode 100644\nindex 0000000000..cde19b87c3\n--- /dev/null\n+++ b/scsi/pr-manager.c\n@@ -0,0 +1,109 @@\n+/*\n+ * Persistent reservation manager abstract class\n+ *\n+ * Copyright (c) 2017 Red Hat, Inc.\n+ *\n+ * Author: Paolo Bonzini <pbonzini@redhat.com>\n+ *\n+ * This code is licensed under the LGPL.\n+ *\n+ */\n+\n+#include \"qemu/osdep.h\"\n+#include <scsi/sg.h>\n+\n+#include \"qapi/error.h\"\n+#include \"block/aio.h\"\n+#include \"block/thread-pool.h\"\n+#include \"scsi/pr-manager.h\"\n+#include \"trace.h\"\n+\n+typedef struct PRManagerData {\n+ PRManager *pr_mgr;\n+ struct sg_io_hdr *hdr;\n+ int fd;\n+} PRManagerData;\n+\n+static int pr_manager_worker(void *opaque)\n+{\n+ PRManagerData *data = opaque;\n+ PRManager *pr_mgr = data->pr_mgr;\n+ PRManagerClass *pr_mgr_class =\n+ PR_MANAGER_GET_CLASS(pr_mgr);\n+ struct sg_io_hdr *hdr = data->hdr;\n+ int fd = data->fd;\n+ int r;\n+\n+ g_free(data);\n+ trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]);\n+\n+ /* The is was taken in pr_manager_execute. */\n+ r = pr_mgr_class->run(pr_mgr, fd, hdr);\n+ object_unref(OBJECT(pr_mgr));\n+ return r;\n+}\n+\n+\n+BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,\n+ AioContext *ctx, int fd,\n+ struct sg_io_hdr *hdr,\n+ BlockCompletionFunc *complete,\n+ void *opaque)\n+{\n+ PRManagerData *data = g_new(PRManagerData, 1);\n+ ThreadPool *pool = aio_get_thread_pool(ctx);\n+\n+ trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1], opaque);\n+ data->pr_mgr = pr_mgr;\n+ data->fd = fd;\n+ data->hdr = hdr;\n+\n+ /* The matching object_unref is in pr_manager_worker. */\n+ object_ref(OBJECT(pr_mgr));\n+ return thread_pool_submit_aio(pool, pr_manager_worker,\n+ data, complete, opaque);\n+}\n+\n+static const TypeInfo pr_manager_info = {\n+ .parent = TYPE_OBJECT,\n+ .name = TYPE_PR_MANAGER,\n+ .class_size = sizeof(PRManagerClass),\n+ .abstract = true,\n+ .interfaces = (InterfaceInfo[]) {\n+ { TYPE_USER_CREATABLE },\n+ { }\n+ }\n+};\n+\n+PRManager *pr_manager_lookup(const char *id, Error **errp)\n+{\n+ Object *obj;\n+ PRManager *pr_mgr;\n+\n+ obj = object_resolve_path_component(object_get_objects_root(), id);\n+ if (!obj) {\n+ error_setg(errp, \"No persistent reservation manager with id '%s'\", id);\n+ return NULL;\n+ }\n+\n+ pr_mgr = (PRManager *)\n+ object_dynamic_cast(obj,\n+ TYPE_PR_MANAGER);\n+ if (!pr_mgr) {\n+ error_setg(errp,\n+ \"Object with id '%s' is not a persistent reservation manager\",\n+ id);\n+ return NULL;\n+ }\n+\n+ return pr_mgr;\n+}\n+\n+static void\n+pr_manager_register_types(void)\n+{\n+ type_register_static(&pr_manager_info);\n+}\n+\n+\n+type_init(pr_manager_register_types);\ndiff --git a/vl.c b/vl.c\nindex 9e62e92aea..bfee61053b 100644\n--- a/vl.c\n+++ b/vl.c\n@@ -2893,7 +2893,8 @@ static int machine_set_property(void *opaque,\n */\n static bool object_create_initial(const char *type)\n {\n- if (g_str_equal(type, \"rng-egd\")) {\n+ if (g_str_equal(type, \"rng-egd\") ||\n+ g_str_has_prefix(type, \"pr-manager-\")) {\n return false;\n }\n \n", "prefixes": [ "1/4" ] }