Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.1/patches/2230648/?format=api
{ "id": 2230648, "url": "http://patchwork.ozlabs.org/api/1.1/patches/2230648/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linux-pci/patch/20260429231519.2569088-3-mrathor@linux.microsoft.com/", "project": { "id": 28, "url": "http://patchwork.ozlabs.org/api/1.1/projects/28/?format=api", "name": "Linux PCI development", "link_name": "linux-pci", "list_id": "linux-pci.vger.kernel.org", "list_email": "linux-pci@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null }, "msgid": "<20260429231519.2569088-3-mrathor@linux.microsoft.com>", "date": "2026-04-29T23:15:18", "name": "[v0,2/3] hyperv: Implement irq remap for passthru devices", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "76bd987e5dc15490d07f088b5adec09c473b9b96", "submitter": { "id": 91512, "url": "http://patchwork.ozlabs.org/api/1.1/people/91512/?format=api", "name": "Mukesh R", "email": "mrathor@linux.microsoft.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linux-pci/patch/20260429231519.2569088-3-mrathor@linux.microsoft.com/mbox/", "series": [ { "id": 502164, "url": "http://patchwork.ozlabs.org/api/1.1/series/502164/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linux-pci/list/?series=502164", "date": "2026-04-29T23:15:16", "name": "PCI passthru on Hyper-V (Part II)", "version": 0, "mbox": "http://patchwork.ozlabs.org/series/502164/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2230648/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2230648/checks/", "tags": {}, "headers": { "Return-Path": "\n <linux-pci+bounces-53431-incoming=patchwork.ozlabs.org@vger.kernel.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "linux-pci@vger.kernel.org" ], "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=linux.microsoft.com header.i=@linux.microsoft.com\n header.a=rsa-sha256 header.s=default header.b=K4B5fBzi;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org\n (client-ip=172.105.105.114; helo=tor.lore.kernel.org;\n envelope-from=linux-pci+bounces-53431-incoming=patchwork.ozlabs.org@vger.kernel.org;\n receiver=patchwork.ozlabs.org)", "smtp.subspace.kernel.org;\n\tdkim=pass (1024-bit key) header.d=linux.microsoft.com\n header.i=@linux.microsoft.com header.b=\"K4B5fBzi\"", "smtp.subspace.kernel.org;\n arc=none smtp.client-ip=13.77.154.182", "smtp.subspace.kernel.org;\n dmarc=pass (p=none dis=none) header.from=linux.microsoft.com", "smtp.subspace.kernel.org;\n spf=pass smtp.mailfrom=linux.microsoft.com" ], "Received": [ "from tor.lore.kernel.org (tor.lore.kernel.org [172.105.105.114])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g5Y8Q4zfgz1yGq\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 30 Apr 2026 09:16:22 +1000 (AEST)", "from smtp.subspace.kernel.org (conduit.subspace.kernel.org\n [100.90.174.1])\n\tby tor.lore.kernel.org (Postfix) with ESMTP id 391C9302B80A\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 29 Apr 2026 23:15:58 +0000 (UTC)", "from localhost.localdomain (localhost.localdomain [127.0.0.1])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id 7888139F166;\n\tWed, 29 Apr 2026 23:15:45 +0000 (UTC)", "from linux.microsoft.com (linux.microsoft.com [13.77.154.182])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id A4524DDC5;\n\tWed, 29 Apr 2026 23:15:43 +0000 (UTC)", "from mrdev.corp.microsoft.com (unknown [13.88.17.9])\n\tby linux.microsoft.com (Postfix) with ESMTPSA id CF82620B716E;\n\tWed, 29 Apr 2026 16:15:42 -0700 (PDT)" ], "ARC-Seal": "i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;\n\tt=1777504545; cv=none;\n b=ieq0NVcpNKkd+twofJI3Z6j4YdwJ2GsTaR+c32xdoWopYYeW1igE40XsAl2IKihMPPYwETATyQkAcT5NmBI8xraLXuJbS0lPjhImJm3+aTj45k8eibzbpob6rKdI/N0fl/ranfrZEv17TsYLxoCXxbAdU13yE757zERa4AGl0ZQ=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=subspace.kernel.org;\n\ts=arc-20240116; t=1777504545; c=relaxed/simple;\n\tbh=+jZL2yzNq92eUZkUTql6nt4kSCpLxMk5DqXGT6y0sbs=;\n\th=From:To:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version;\n b=J10CHyrcNN24Sa/puJ7gKyXFNPA9Q8UNHNgEhycglU4M8sG3nlcnBl686Ts77cT/8g1rjEY3JXx/gLmWNKAx2VOoZydoqvHRmAwVwbkql+vqOSi9Wzw+APZRwgXY4Jq3iSEhTaJL8ke5fERLe9YpRwL6/FxvgGkVj4XFIexqz2g=", "ARC-Authentication-Results": "i=1; smtp.subspace.kernel.org;\n dmarc=pass (p=none dis=none) header.from=linux.microsoft.com;\n spf=pass smtp.mailfrom=linux.microsoft.com;\n dkim=pass (1024-bit key) header.d=linux.microsoft.com\n header.i=@linux.microsoft.com header.b=K4B5fBzi;\n arc=none smtp.client-ip=13.77.154.182", "DKIM-Filter": "OpenDKIM Filter v2.11.0 linux.microsoft.com CF82620B716E", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com;\n\ts=default; t=1777504543;\n\tbh=rpEi0Z5x5qCF3nGvkxTmufob6EB7cXEpvYXuys8oGOc=;\n\th=From:To:Subject:Date:In-Reply-To:References:From;\n\tb=K4B5fBziKwnmduq/76cQN4bBt4090NJ65lAVQq0ek7Dfyd6tdnwvVGc1NO+UzWg82\n\t wFV6aXHtKTdPtG/eAEAE81sJx1ySIMpoLvKM/mYNwjSXyL0HbfKqExUdE/z1ktWTDI\n\t IhTqPWxooAqJtGacVE2ceRl5b3Q+Qv7YaReNt2uQ=", "From": "Mukesh R <mrathor@linux.microsoft.com>", "To": "hpa@zytor.com,\n\trobin.murphy@arm.com,\n\trobh@kernel.org,\n\tlinux-hyperv@vger.kernel.org,\n\tlinux-kernel@vger.kernel.org,\n\tiommu@lists.linux.dev,\n\tlinux-pci@vger.kernel.org,\n\tlinux-arch@vger.kernel.org", "Subject": "[PATCH v0 2/3] hyperv: Implement irq remap for passthru devices", "Date": "Wed, 29 Apr 2026 16:15:18 -0700", "Message-ID": "<20260429231519.2569088-3-mrathor@linux.microsoft.com>", "X-Mailer": "git-send-email 2.51.2.vfs.0.1", "In-Reply-To": "<20260429231519.2569088-1-mrathor@linux.microsoft.com>", "References": "<20260429231519.2569088-1-mrathor@linux.microsoft.com>", "Precedence": "bulk", "X-Mailing-List": "linux-pci@vger.kernel.org", "List-Id": "<linux-pci.vger.kernel.org>", "List-Subscribe": "<mailto:linux-pci+subscribe@vger.kernel.org>", "List-Unsubscribe": "<mailto:linux-pci+unsubscribe@vger.kernel.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit" }, "content": "Implement interrupt remapping for direct attached and domain attached\ndevices on Hyper-V.\n\nPlease note there are few constraints when it comes to mapping device\ninterrupts on Hyper-V. For example, the hypervisor will not allow mapping\ndevice interrupts to root if the device is a direct attached device. Since\nthe target guest cpu and vector info is not available during the initial\nVFIO irq setup, we work around by skipping this initial map. Then later\nduring irqbypass trigger, when both guest target cpu vector are available,\nwe do the map in the hypervisor, update the device, and enable the\ninterrupt vector on the device. Rather than special case direct attached,\nwe do same for domain attached also. This implies irqbypass is required\nfor MSHV pci device passthru. Also noteworthy is that the hypervisor\nwill automatically setup any direct hw injection like posted interrupts.\n\nSigned-off-by: Mukesh R <mrathor@linux.microsoft.com>\n---\n arch/x86/hyperv/irqdomain.c | 18 +-\n drivers/hv/mshv_eventfd.c | 422 +++++++++++++++++++++++++++-\n drivers/iommu/hyperv-iommu-root.c | 14 +\n drivers/pci/controller/pci-hyperv.c | 10 +\n include/asm-generic/mshyperv.h | 4 +\n 5 files changed, 464 insertions(+), 4 deletions(-)", "diff": "diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c\nindex 527835b99a70..d32e912ad4a9 100644\n--- a/arch/x86/hyperv/irqdomain.c\n+++ b/arch/x86/hyperv/irqdomain.c\n@@ -222,7 +222,7 @@ int hv_map_msi_interrupt(struct irq_data *data,\n \n \tmsidesc = irq_data_get_msi_desc(data);\n \tpdev = msi_desc_to_pci_dev(msidesc);\n-\thv_devid.as_uint64 = hv_build_devid_type_pci(pdev);\n+\thv_devid.as_uint64 = hv_devid_from_pdev(pdev);\n \tcpu = cpumask_first(irq_data_get_effective_affinity_mask(data));\n \n \treturn hv_map_interrupt(hv_current_partition_id, hv_devid, false, cpu,\n@@ -258,6 +258,20 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)\n \t\treturn;\n \t}\n \n+\t/*\n+\t * For direct attached devices, we cannot map interrupts in the\n+\t * hypervisor because it will not allow it until we have guest target\n+\t * vcpu and vector. So defer it until irqbypass. Also, do the same\n+\t * for domain attached devices for simplicity.\n+\t */\n+\tif (hv_pcidev_is_pthru_dev(pdev)) {\n+\t\tif (data->chip_data)\n+\t\t\tentry_to_msi_msg(data->chip_data, msg);\n+\t\telse\n+\t\t\tmemset(msg, 0, sizeof(struct msi_msg));\n+\t\treturn;\n+\t}\n+\n \tif (data->chip_data) {\n \t\t/*\n \t\t * This interrupt is already mapped. Let's unmap first.\n@@ -297,7 +311,7 @@ static int hv_unmap_msi_interrupt(struct pci_dev *pdev,\n {\n \tunion hv_device_id hv_devid;\n \n-\thv_devid.as_uint64 = hv_build_devid_type_pci(pdev);\n+\thv_devid.as_uint64 = hv_devid_from_pdev(pdev);\n \treturn hv_unmap_interrupt(hv_devid.as_uint64, irq_entry);\n }\n \ndiff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c\nindex 90959f639dc3..666e28f4a4b5 100644\n--- a/drivers/hv/mshv_eventfd.c\n+++ b/drivers/hv/mshv_eventfd.c\n@@ -7,7 +7,6 @@\n *\n * All credits to kvm developers.\n */\n-\n #include <linux/syscalls.h>\n #include <linux/wait.h>\n #include <linux/poll.h>\n@@ -15,7 +14,8 @@\n #include <linux/list.h>\n #include <linux/workqueue.h>\n #include <linux/eventfd.h>\n-\n+#include <linux/pci.h>\n+#include <linux/vfio_pci_core.h>\n #if IS_ENABLED(CONFIG_X86_64)\n #include <asm/apic.h>\n #endif\n@@ -27,6 +27,376 @@\n \n static struct workqueue_struct *irqfd_cleanup_wq;\n \n+#if IS_ENABLED(CONFIG_X86_64)\n+\n+static int mshv_parse_mshv_irqfd(struct mshv_irqfd *irqfd,\n+\t\t\t\t struct pci_dev **out_pdev,\n+\t\t\t\t struct irq_data **out_irqdata)\n+{\n+\tstruct irq_bypass_producer *prod;\n+\tstruct msi_desc *msidesc;\n+\tstruct irq_data *irqdata;\n+\n+\tif (irqfd == NULL || irqfd->irqfd_bypass_prod == NULL)\n+\t\treturn -ENODEV;\n+\n+\tprod = irqfd->irqfd_bypass_prod;\n+\n+\tirqdata = irq_get_irq_data(prod->irq);\n+\tif (irqdata == NULL) {\n+\t\tpr_err(\"Hyper-V: irqbypass fail, no irqdata. irq:0x%x\\n\",\n+\t\t prod->irq);\n+\t\treturn -EINVAL;\n+\t}\n+\t*out_irqdata = irqdata;\n+\n+\tmsidesc = irq_data_get_msi_desc(irqdata);\n+\tif (msidesc == NULL) {\n+\t\tpr_err(\"Hyper-V: irqbypass msi fail. irq:0x%x\\n\", prod->irq);\n+\t\treturn -EINVAL;\n+\t}\n+\n+\t*out_pdev = msi_desc_to_pci_dev(msidesc);\n+\tif (*out_pdev == NULL) {\n+\t\tpr_err(\"Hyper-V: mshv_irqfd parse fail. irq:0x%x\\n\", prod->irq);\n+\t\treturn -EINVAL;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+/* Must be called with interrupts disabled */\n+static int hv_vpset_from_hyp_disabled(\n+\t\t\tstruct hv_input_get_vp_set_from_mda *input,\n+\t\t\tunion hv_output_get_vp_set_from_mda *output,\n+\t\t\tstruct mshv_lapic_irq *lapic_irq, u64 partid)\n+{\n+\tu64 status;\n+\n+\tmemset(input, 0, sizeof(*input));\n+\tinput->target_partid = partid;\n+\tinput->dest_address = lapic_irq->lapic_apic_id;\n+\tinput->input_vtl = 0;\n+\tinput->destmode_logical = lapic_irq->lapic_control.logical_dest_mode;\n+\n+\tstatus = hv_do_hypercall(HVCALL_GET_VPSET_FROM_MDA, input, output);\n+\tif (!hv_result_success(status)) {\n+\t\thv_status_err(status, \"apicid:0x%llx dest:0x%x\\n\",\n+\t\t\t lapic_irq->lapic_apic_id,\n+\t\t\t lapic_irq->lapic_control.logical_dest_mode);\n+\t}\n+\n+\treturn hv_result_to_errno(status);\n+}\n+\n+/* Returns number of banks copied, -errno in case of error */\n+static int hv_copy_vpset(struct hv_vpset *dest, struct hv_vpset *src)\n+{\n+\tu64 bank_mask;\n+\tint banks, tot_banks = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK;\n+\n+\tif (tot_banks >= HV_MAX_SPARSE_VCPU_BANKS)\n+\t\treturn -EINVAL;\n+\n+\tdest->format = src->format;\n+\tdest->valid_bank_mask = src->valid_bank_mask;\n+\tbank_mask = src->valid_bank_mask;\n+\tfor (banks = 0; banks <= tot_banks; banks++) {\n+\t\tif (bank_mask == 0)\n+\t\t\tbreak;\n+\n+\t\tif (bank_mask & 1)\n+\t\t\tdest->bank_contents[banks] = src->bank_contents[banks];\n+\t\tbank_mask = bank_mask >> 1;\n+\t}\n+\n+\treturn banks;\n+}\n+\n+static int mshv_map_device_interrupt(u64 ptid, union hv_device_id hv_devid,\n+\t\t\t\t struct mshv_lapic_irq *ginfo,\n+\t\t\t\t struct hv_interrupt_entry *ret_entry,\n+\t\t\t\t u64 *ret_status)\n+{\n+\tstruct hv_input_map_device_interrupt *irq_input;\n+\tstruct hv_output_map_device_interrupt *irq_output;\n+\tstruct hv_device_interrupt_descriptor *intdesc;\n+\tstruct hv_input_get_vp_set_from_mda *mda_input;\n+\tunion hv_output_get_vp_set_from_mda *mda_output;\n+\tulong flags;\n+\tu64 status;\n+\tint rc, var_size;\n+\n+\t*ret_status = U64_MAX;\n+\tlocal_irq_save(flags);\n+\n+\tmda_input = *this_cpu_ptr(hyperv_pcpu_input_arg);\n+\tmda_output = *this_cpu_ptr(hyperv_pcpu_output_arg);\n+\n+\t/*\n+\t * Map Device Interrupt hcall needs vp set based on vp indexes used\n+\t * during vp creation. Here we have lapic-id of the vp only. Easiest\n+\t * is to just ask the hypervisor for the vp set matching the lapic-id.\n+\t */\n+\trc = hv_vpset_from_hyp_disabled(mda_input, mda_output, ginfo, ptid);\n+\tif (rc)\n+\t\tgoto out;\t/* error already printed */\n+\n+\tirq_input = *this_cpu_ptr(hyperv_pcpu_input_arg);\n+\tirq_output = *this_cpu_ptr(hyperv_pcpu_output_arg);\n+\tmemset(irq_input, 0, sizeof(*irq_input));\n+\n+\tirq_input->partition_id = ptid;\n+\tirq_input->device_id = hv_devid.as_uint64;\n+\n+\tintdesc = &irq_input->interrupt_descriptor;\n+\tintdesc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;\n+\tintdesc->vector_count = 1;\n+\tintdesc->target.vector = ginfo->lapic_vector;\n+\tintdesc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;\n+\n+\tintdesc->target.vp_set.valid_bank_mask = 0;\n+\tintdesc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;\n+\tintdesc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;\n+\trc = hv_copy_vpset(&intdesc->target.vp_set, &mda_output->target_vpset);\n+\tif (rc <= 0) {\n+\t\tpr_err(\"Hyper-V: ptid %lld - (irq)vpset copy failed (%d)\\n\",\n+\t\t ptid, rc);\n+\t\tgoto out;\n+\t}\n+\n+\t/*\n+\t * var-sized hcall: var-size starts after vp_mask (thus vp_set.format\n+\t * does not count, but vp_set.valid_bank_mask does).\n+\t */\n+\tvar_size = rc + 1;\n+\tstatus = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,\n+\t\t\t\t irq_input, irq_output);\n+\t*ret_entry = irq_output->interrupt_entry;\n+\tlocal_irq_restore(flags);\n+\n+\trc = 0;\n+\tif (!hv_result_success(status)) {\n+\t\tif (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY)\n+\t\t\thv_status_err(status, \"pt:%lld vec:%d lapic-id:%lld\\n\",\n+\t\t\t ptid, ginfo->lapic_vector, ginfo->lapic_apic_id);\n+\t\t*ret_status = status;\n+\t\trc = hv_result_to_errno(status);\n+\t}\n+\n+\treturn rc;\n+\n+out:\n+\tlocal_irq_restore(flags);\n+\treturn rc;\n+\n+}\n+\n+static int mshv_unmap_device_interrupt(union hv_device_id hv_devid,\n+\t\t\t\t struct hv_interrupt_entry *irq_entry)\n+{\n+\tunsigned long flags;\n+\tstruct hv_input_unmap_device_interrupt *input;\n+\tu64 status;\n+\n+\tlocal_irq_save(flags);\n+\tinput = *this_cpu_ptr(hyperv_pcpu_input_arg);\n+\tmemset(input, 0, sizeof(*input));\n+\n+\tif (hv_devid.device_type == HV_DEVICE_TYPE_LOGICAL)\n+\t\tinput->partition_id = hv_get_current_partid();\n+\telse\n+\t\tinput->partition_id = hv_current_partition_id;\n+\n+\tinput->device_id = hv_devid.as_uint64;\n+\tinput->interrupt_entry = *irq_entry;\n+\n+\tstatus = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);\n+\tlocal_irq_restore(flags);\n+\n+\tif (!hv_result_success(status))\n+\t\thv_status_err(status, \"\\n\");\n+\n+\treturn hv_result_to_errno(status);\n+}\n+\n+static int mshv_chk_unmap_irq(union hv_device_id hv_devid,\n+\t\t\t struct irq_data *irqdata)\n+{\n+\tint rc;\n+\n+\tif (irqdata->chip_data == NULL)\n+\t\treturn 0;\n+\n+\trc = mshv_unmap_device_interrupt(hv_devid, irqdata->chip_data);\n+\tif (rc)\n+\t\treturn rc;\n+\n+\tkfree(irqdata->chip_data);\n+\tirqdata->chip_data = NULL;\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Synchronize device update with VFIO.\n+ * See: vfio_pci_memory_lock_and_enable()\n+ */\n+static u16 mshv_pci_memory_lock_and_enable(struct vfio_pci_core_device *cdev)\n+{\n+\tu16 cmd;\n+\n+\tdown_write(&cdev->memory_lock);\n+\tpci_read_config_word(cdev->pdev, PCI_COMMAND, &cmd);\n+\tif (!(cmd & PCI_COMMAND_MEMORY))\n+\t\tpci_write_config_word(cdev->pdev, PCI_COMMAND,\n+\t\t\t\t cmd | PCI_COMMAND_MEMORY);\n+\treturn cmd;\n+}\n+\n+static void mshv_pci_memory_unlock_and_restore(\n+\t\t\t\t\tstruct vfio_pci_core_device *cdev,\n+\t\t\t\t\tu16 cmd)\n+{\n+\tpci_write_config_word(cdev->pdev, PCI_COMMAND, cmd);\n+\tup_write(&cdev->memory_lock);\n+}\n+\n+static void mshv_make_device_usable(struct pci_dev *pdev, int vector,\n+\t\t\t\t struct hv_interrupt_entry *hv_entry)\n+{\n+\tint lirq;\n+\tstruct msi_msg msimsg;\n+\tstruct irq_data *irqdata;\n+\tu16 pcicmd;\n+\tstruct vfio_pci_core_device *coredev = dev_get_drvdata(&pdev->dev);\n+\n+\tif (pdev->dev.driver == NULL ||\n+\t strcmp(pdev->dev.driver->name, \"vfio-pci\") != 0) {\n+\t\tpr_err(\"Hyper-V: irqbypass: non vfio device %s\\n\",\n+\t\t pci_name(pdev));\n+\t\treturn;\n+\t}\n+\tif (coredev == NULL) {\n+\t\tpr_err(\"Hyper-V: irqbypass: null vfio device for %s\\n\",\n+\t\t pci_name(pdev));\n+\t\treturn;\n+\t}\n+\n+\tif (hv_entry->source != HV_INTERRUPT_SOURCE_MSI) {\n+\t\tpr_err(\"Hyper-V: %s irq source not msi\\n\", pci_name(pdev));\n+\t\treturn;\n+\t}\n+\n+\tlirq = pci_irq_vector(pdev, vector);\n+\tirqdata = irq_get_irq_data(lirq);\n+\tif (irqdata == NULL) {\n+\t\tpr_err(\"Hyper-V: null irq_data for write msimsg. lirq:0x%x\\n\",\n+\t\t lirq);\n+\t\treturn;\n+\t}\n+\n+\tmsimsg.address_hi = 0;\n+\tmsimsg.address_lo = hv_entry->msi_entry.address.as_uint32;\n+\tmsimsg.data = hv_entry->msi_entry.data.as_uint32;\n+\n+\tpcicmd = mshv_pci_memory_lock_and_enable(coredev);\n+\tpci_write_msi_msg(lirq, &msimsg);\n+\tmshv_pci_memory_unlock_and_restore(coredev, pcicmd);\n+\n+\tpci_msi_unmask_irq(irqdata);\n+\n+\tif (irqdata->parent_data)\n+\t\tirq_chip_unmask_parent(irqdata);\n+}\n+\n+/*\n+ * This guest has a device passthru'd to it. VFIO did the initial setup of\n+ * the device interrupts, but we left them unmapped in the hypervisor\n+ * because we didn't have the guest target cpu and vector (required by\n+ * hypervisor). We have them now, so do the map hypercall.\n+ * Also, when here, it is expected that the device global mask is unset\n+ * but individual MSI/x masks are set. Goal here is to map the interrupt in\n+ * the hypervisor, update the corresponding device MSI/x entry, and enable it.\n+ */\n+static void mshv_pthru_dev_irq_remap(struct mshv_irqfd *irqfd)\n+{\n+\tu64 ptid, status;\n+\tstruct pci_dev *pdev;\n+\tint rc, deposit_pgs = 16;\n+\tstruct mshv_lapic_irq *ginfo = &irqfd->irqfd_lapic_irq;\n+\tunion hv_device_id hv_devid;\n+\tstruct hv_interrupt_entry *new_entry;\n+\tstruct irq_data *irqdata;\n+\n+\tif (!irqfd->irqfd_girq_ent.girq_entry_valid ||\n+\t irqfd->irqfd_bypass_prod == NULL)\n+\t\treturn;\n+\n+\trc = mshv_parse_mshv_irqfd(irqfd, &pdev, &irqdata);\n+\tif (rc)\n+\t\treturn;\n+\n+\thv_devid.as_uint64 = hv_devid_from_pdev(pdev);\n+\n+\trc = mshv_chk_unmap_irq(hv_devid, irqdata);\n+\tif (rc)\n+\t\treturn;\n+\n+\tnew_entry = kmalloc(sizeof(*new_entry), GFP_ATOMIC);\n+\tif (new_entry == NULL)\n+\t\treturn;\n+\n+\tptid = irqfd->irqfd_partn->pt_id;\n+\n+\twhile (deposit_pgs--) {\n+\t\trc = mshv_map_device_interrupt(ptid, hv_devid, ginfo, new_entry,\n+\t\t\t\t\t &status);\n+\t\tif (rc == 0)\n+\t\t\tbreak;\n+\t\tif (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY)\n+\t\t\tbreak;\n+\n+\t\trc = hv_call_deposit_pages(NUMA_NO_NODE, ptid, 1);\n+\t\tif (rc)\n+\t\t\tbreak;\n+\t}\n+\tif (rc) {\n+\t\tkfree(new_entry);\n+\t\treturn;\n+\t}\n+\n+\tirqdata->chip_data = new_entry;\n+\n+\tmshv_make_device_usable(pdev, irqdata->hwirq, new_entry);\n+}\n+\n+static void mshv_pthru_dev_irq_undo(struct mshv_irqfd *irqfd)\n+{\n+\tstruct pci_dev *pdev;\n+\tunion hv_device_id hv_devid;\n+\tstruct irq_data *irqdata;\n+\tint rc;\n+\n+\tif (!irqfd->irqfd_girq_ent.girq_entry_valid ||\n+\t irqfd->irqfd_bypass_prod == NULL)\n+\t\treturn;\n+\n+\trc = mshv_parse_mshv_irqfd(irqfd, &pdev, &irqdata);\n+\tif (rc)\n+\t\treturn;\n+\n+\thv_devid.as_uint64 = hv_devid_from_pdev(pdev);\n+\tmshv_chk_unmap_irq(hv_devid, irqdata);\n+}\n+\n+#else /* IS_ENABLED(CONFIG_X86_64) */\n+\n+static void mshv_pthru_dev_irq_remap(struct mshv_irqfd *irqfd) { }\n+static void mshv_pthru_dev_irq_undo(struct mshv_irqfd *irqfd) { }\n+\n+#endif /* IS_ENABLED(CONFIG_X86_64) */\n+\n void mshv_register_irq_ack_notifier(struct mshv_partition *partition,\n \t\t\t\t struct mshv_irq_ack_notifier *mian)\n {\n@@ -264,6 +634,7 @@ static void mshv_irqfd_shutdown(struct work_struct *work)\n \t/*\n \t * It is now safe to release the object's resources\n \t */\n+\tirq_bypass_unregister_consumer(&irqfd->irqfd_bypass_cons);\n \teventfd_ctx_put(irqfd->irqfd_eventfd_ctx);\n \tkfree(irqfd);\n }\n@@ -286,6 +657,12 @@ static void mshv_irqfd_deactivate(struct mshv_irqfd *irqfd)\n \n \thlist_del(&irqfd->irqfd_hnode);\n \n+\t/*\n+\t * Cleanup interrupt map (kfree chip_data) while in a VMM thread as\n+\t * unmap needs partition id. mshv_irqfd_shutdown() runs in a kthread.\n+\t */\n+\tmshv_pthru_dev_irq_undo(irqfd);\n+\n \tqueue_work(irqfd_cleanup_wq, &irqfd->irqfd_shutdown);\n }\n \n@@ -383,6 +760,45 @@ static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh,\n \tadd_wait_queue_priority(wqh, &irqfd->irqfd_wait);\n }\n \n+static int mshv_irq_bypass_add_producer(struct irq_bypass_consumer *cons,\n+\t\t\t\t\tstruct irq_bypass_producer *prod)\n+{\n+\tstruct mshv_irqfd *irqfd;\n+\n+\tirqfd = container_of(cons, struct mshv_irqfd, irqfd_bypass_cons);\n+\tirqfd->irqfd_bypass_prod = prod;\n+\n+\tmshv_pthru_dev_irq_remap(irqfd);\n+\n+\treturn 0;\n+}\n+\n+static void mshv_irq_bypass_del_producer(struct irq_bypass_consumer *cons,\n+\t\t\t\t\t struct irq_bypass_producer *prod)\n+{\n+\tstruct mshv_irqfd *irqfd;\n+\n+\tirqfd = container_of(cons, struct mshv_irqfd, irqfd_bypass_cons);\n+\n+\tWARN_ON(irqfd->irqfd_bypass_prod != prod);\n+\tirqfd->irqfd_bypass_prod = NULL;\n+\n+}\n+\n+static void mshv_setup_irq_bypass(struct mshv_irqfd *irqfd,\n+\t\t\t\t struct eventfd_ctx *eventfd)\n+{\n+\tstruct irq_bypass_consumer *consumer = &irqfd->irqfd_bypass_cons;\n+\tint rc;\n+\n+\tconsumer->add_producer = mshv_irq_bypass_add_producer;\n+\tconsumer->del_producer = mshv_irq_bypass_del_producer;\n+\trc = irq_bypass_register_consumer(&irqfd->irqfd_bypass_cons, eventfd);\n+\tif (rc)\n+\t\tpr_err(\"Hyper-V: irq bypass consumer registration failed: %d\\n\",\n+\t\t rc);\n+}\n+\n static int mshv_irqfd_assign(struct mshv_partition *pt,\n \t\t\t struct mshv_user_irqfd *args)\n {\n@@ -509,6 +925,8 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,\n \tif (events & EPOLLIN)\n \t\tmshv_assert_irq_slow(irqfd);\n \n+\tmshv_setup_irq_bypass(irqfd, eventfd);\n+\n \tsrcu_read_unlock(&pt->pt_irq_srcu, idx);\n \treturn 0;\n \ndiff --git a/drivers/iommu/hyperv-iommu-root.c b/drivers/iommu/hyperv-iommu-root.c\nindex 739bbf39dea2..3e078e9213f9 100644\n--- a/drivers/iommu/hyperv-iommu-root.c\n+++ b/drivers/iommu/hyperv-iommu-root.c\n@@ -219,6 +219,20 @@ u64 hv_build_devid_oftype(struct pci_dev *pdev, enum hv_device_type type)\n }\n EXPORT_SYMBOL_GPL(hv_build_devid_oftype);\n \n+/* Build device id for the interrupt path */\n+u64 hv_devid_from_pdev(struct pci_dev *pdev)\n+{\n+\tenum hv_device_type dev_type;\n+\n+\tif (hv_pcidev_is_attached_dev(pdev))\n+\t\tdev_type = HV_DEVICE_TYPE_LOGICAL;\n+\telse\n+\t\tdev_type = HV_DEVICE_TYPE_PCI;\n+\n+\treturn hv_build_devid_oftype(pdev, dev_type);\n+}\n+EXPORT_SYMBOL_GPL(hv_devid_from_pdev);\n+\n /* Create a new device domain in the hypervisor */\n static int hv_iommu_create_hyp_devdom(struct hv_domain *hvdom)\n {\ndiff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c\nindex 8f6b818ee09b..8ecc909c3415 100644\n--- a/drivers/pci/controller/pci-hyperv.c\n+++ b/drivers/pci/controller/pci-hyperv.c\n@@ -1745,6 +1745,16 @@ static void hv_irq_mask(struct irq_data *data)\n \n static void hv_irq_unmask(struct irq_data *data)\n {\n+\tstruct pci_dev *pdev;\n+\tstruct msi_desc *msi_desc;\n+\n+\tmsi_desc = irq_data_get_msi_desc(data);\n+\tpdev = msi_desc_to_pci_dev(msi_desc);\n+\n+\t/* Done during bypass setup in mshv_eventfd.c: mshv_irqfd_assign() */\n+\tif (hv_pcidev_is_pthru_dev(pdev))\n+\t\treturn;\n+\n \thv_arch_irq_unmask(data);\n \n \tif (data->parent_data->chip->irq_unmask)\ndiff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h\nindex edbcfc2a9b60..887605aa9c95 100644\n--- a/include/asm-generic/mshyperv.h\n+++ b/include/asm-generic/mshyperv.h\n@@ -341,6 +341,7 @@ u64 hv_get_current_partid(void);\n bool hv_pcidev_is_attached_dev(struct pci_dev *pdev);\n bool hv_pcidev_is_pthru_dev(struct pci_dev *pdev);\n u64 hv_build_devid_oftype(struct pci_dev *pdev, enum hv_device_type type);\n+u64 hv_devid_from_pdev(struct pci_dev *pdev);\n \n #else /* Remove following after arm64 implementation is done */\n \n@@ -354,6 +355,9 @@ static inline u64 hv_build_devid_oftype(struct pci_dev *pdev,\n \t\t\t\t\tenum hv_device_type type)\n { return 0; }\n \n+static inline u64 hv_devid_from_pdev(struct pci_dev *pdev)\n+{ return 0; }\n+\n static inline u64 hv_get_current_partid(void)\n { return HV_PARTITION_ID_INVALID; }\n #endif /* IS_ENABLED(CONFIG_HYPERV_IOMMU) */\n", "prefixes": [ "v0", "2/3" ] }