@@ -1783,6 +1783,8 @@ virHostCPUStatsAssign;
virHostdevFindUSBDevice;
virHostdevIsSCSIDevice;
virHostdevManagerGetDefault;
+virHostdevPCIDeviceGroupUnbind;
+virHostdevPCIDeviceGroupUnbindable;
virHostdevPCINodeDeviceDetach;
virHostdevPCINodeDeviceReAttach;
virHostdevPCINodeDeviceReset;
@@ -2342,6 +2344,7 @@ virPCIDeviceWaitForCleanup;
virPCIEDeviceInfoFree;
virPCIGetDeviceAddressFromSysfsLink;
virPCIGetHeaderType;
+virPCIGetIOMMUGroupList;
virPCIGetNetName;
virPCIGetPhysicalFunction;
virPCIGetVirtualFunctionIndex;
@@ -384,6 +384,22 @@ qemuHostdevPrepareDomainDevices(virQEMUDriverPtr driver,
}
void
+qemuHostdevReleasePCIDevices(virQEMUDriverPtr driver,
+ const char *name,
+ virDomainHostdevDefPtr *hostdevs,
+ int nhostdevs)
+{
+ virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
+ const char *oldStateDir = cfg->stateDir;
+ virHostdevManagerPtr hostdev_mgr = driver->hostdevMgr;
+
+ virHostdevReleasePCIDevices(hostdev_mgr, QEMU_DRIVER_NAME, name,
+ hostdevs, nhostdevs, oldStateDir);
+
+ virObjectUnref(cfg);
+}
+
+void
qemuHostdevReAttachPCIDevices(virQEMUDriverPtr driver,
const char *name,
virDomainHostdevDefPtr *hostdevs,
@@ -74,6 +74,10 @@ void qemuHostdevReAttachPCIDevices(virQEMUDriverPtr driver,
const char *name,
virDomainHostdevDefPtr *hostdevs,
int nhostdevs);
+void qemuHostdevReleasePCIDevices(virQEMUDriverPtr driver,
+ const char *name,
+ virDomainHostdevDefPtr *hostdevs,
+ int nhostdevs);
void qemuHostdevReAttachUSBDevices(virQEMUDriverPtr driver,
const char *name,
virDomainHostdevDefPtr *hostdevs,
@@ -3896,7 +3896,10 @@ qemuDomainRemoveHostDevice(virQEMUDriverPtr driver,
switch ((virDomainHostdevSubsysType) hostdev->source.subsys.type) {
case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
- qemuHostdevReAttachPCIDevices(driver, vm->def->name, &hostdev, 1);
+ if (is_vfio)
+ qemuHostdevReleasePCIDevices(driver, vm->def->name, &hostdev, 1);
+ else
+ qemuHostdevReAttachPCIDevices(driver, vm->def->name, &hostdev, 1);
qemuDomainReleaseDeviceAddress(vm, hostdev->info, NULL);
/* QEMU might no longer need to lock as much memory, eg. we just
* detached the last VFIO device, so adjust the limit here */
@@ -3925,6 +3928,17 @@ qemuDomainRemoveHostDevice(virQEMUDriverPtr driver,
virDomainNetDefFree(net);
}
+ if (is_vfio) {
+ int iommu_group =
+ virPCIDeviceAddressGetIOMMUGroupNum(&hostdev->source.subsys.u.pci.addr);
+ if (virHostdevPCIDeviceGroupUnbindable(driver->hostdevMgr,
+ iommu_group)) {
+ virHostdevPCIDeviceGroupUnbind(driver->hostdevMgr,
+ iommu_group);
+ }
+ }
+
+
ret = 0;
cleanup:
@@ -905,6 +905,96 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr mgr,
return ret;
}
+static bool
+virHostdevPCIDeviceUnbindableInternal(virHostdevManagerPtr mgr,
+ int iommu_group)
+{
+ struct virHostdevIsPCINodeDeviceUsedData data = { mgr, NULL, true };
+
+ if (virPCIIOMMUGroupIterate(iommu_group,
+ virHostdevIsPCINodeDeviceUsed,
+ &data) < 0) {
+ VIR_DEBUG("IOMMU group %d is not unbindable", iommu_group);
+ return false;
+ }
+
+ VIR_DEBUG("IOMMU group %d is unbindable", iommu_group);
+ return true;
+}
+
+/*
+ * Check if devices within IOMMU group are in use by any domains
+ */
+bool
+virHostdevPCIDeviceGroupUnbindable(virHostdevManagerPtr mgr,
+ int iommu_group)
+{
+ bool result;
+
+ virObjectLock(mgr->activePCIHostdevs);
+ result = virHostdevPCIDeviceUnbindableInternal(mgr, iommu_group);
+ virObjectUnlock(mgr->activePCIHostdevs);
+
+ return result;
+}
+
+/*
+ * Confirm all devices in IOMMU group are in inactiveList
+ * before attempting to reattach to host driver. Devices in IOMMU
+ * group that aren't in either activeList or inactiveList are considered
+ * outside our control, so we treat them as inactive as well.
+ *
+ * Callers can check virHostdevPCIDeviceGroupUnbindable() beforehand
+ * for some indication that the group is ready for reattach to the
+ * host, but since it's possible for a hostdev from the group to get
+ * re-attached to a guest prior to subsequently calling this function
+ * there is no guarantee of this, which should be fine since it would
+ * only be immediately rebound to the stub driver anyway.
+ */
+void
+virHostdevPCIDeviceGroupUnbind(virHostdevManagerPtr mgr,
+ int iommu_group)
+{
+ virPCIDeviceListPtr pcidevs = NULL;
+ size_t i;
+
+ virObjectLock(mgr->activePCIHostdevs);
+ virObjectLock(mgr->inactivePCIHostdevs);
+
+ if (!virHostdevPCIDeviceUnbindableInternal(mgr, iommu_group)) {
+ VIR_DEBUG("IOMMU group %d still in use, deferring reattach "
+ "of PCI devices to host", iommu_group);
+ goto cleanup;
+ }
+
+ pcidevs = virPCIGetIOMMUGroupList(iommu_group);
+ for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) {
+ virPCIDevicePtr actual, pci = virPCIDeviceListGet(pcidevs, i);
+ virPCIDeviceAddressPtr devAddr = virPCIDeviceGetAddress(pci);
+
+ actual = virPCIDeviceListFindByIDs(mgr->inactivePCIHostdevs,
+ devAddr->domain,
+ devAddr->bus,
+ devAddr->slot,
+ devAddr->function);
+ if (actual) {
+ VIR_DEBUG("Reattaching PCI device %s", virPCIDeviceGetName(actual));
+ if (virPCIDeviceGetManaged(actual))
+ if (virPCIDeviceReattach(actual, mgr->activePCIHostdevs,
+ mgr->inactivePCIHostdevs) < 0) {
+ VIR_ERROR(_("Failed to re-attach PCI device: %s"),
+ virGetLastErrorMessage());
+ virResetLastError();
+ }
+ }
+ }
+
+ cleanup:
+ virObjectUnref(pcidevs);
+ virObjectUnlock(mgr->activePCIHostdevs);
+ virObjectUnlock(mgr->inactivePCIHostdevs);
+}
+
/*
* Pre-condition: inactivePCIHostdevs & activePCIHostdevs
* are locked
@@ -122,6 +122,14 @@ virHostdevReleasePCIDevices(virHostdevManagerPtr mgr,
const char *oldStateDir)
ATTRIBUTE_NONNULL(1);
void
+virHostdevPCIDeviceGroupUnbind(virHostdevManagerPtr mgr,
+ int iommu_group)
+ ATTRIBUTE_NONNULL(1);
+bool
+virHostdevPCIDeviceGroupUnbindable(virHostdevManagerPtr mgr,
+ int iommu_group)
+ ATTRIBUTE_NONNULL(1);
+void
virHostdevReAttachUSBDevices(virHostdevManagerPtr hostdev_mgr,
const char *drv_name,
const char *dom_name,
@@ -2298,6 +2298,33 @@ virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev)
}
+/*
+ * virPCIGetIOMMUGroupList - return a virPCIDeviceList containing
+ * all of the devices in @iommu_group.
+ *
+ * Return the new list, or NULL on failure
+ */
+virPCIDeviceListPtr
+virPCIGetIOMMUGroupList(int iommu_group)
+{
+ virPCIDeviceListPtr groupList = virPCIDeviceListNew();
+
+ if (!groupList)
+ goto error;
+
+ if (virPCIIOMMUGroupIterate(iommu_group,
+ virPCIDeviceGetIOMMUGroupAddOne,
+ groupList) < 0)
+ goto error;
+
+ return groupList;
+
+ error:
+ virObjectUnref(groupList);
+ return NULL;
+}
+
+
typedef struct {
virPCIDeviceAddressPtr **iommuGroupDevices;
size_t *nIommuGroupDevices;
@@ -180,6 +180,7 @@ int virPCIIOMMUGroupIterate(int iommu_group,
virPCIDeviceAddressActor actor,
void *opaque);
virPCIDeviceListPtr virPCIDeviceGetIOMMUGroupList(virPCIDevicePtr dev);
+virPCIDeviceListPtr virPCIGetIOMMUGroupList(int iommu_group);
int virPCIDeviceAddressGetIOMMUGroupAddresses(virPCIDeviceAddressPtr devAddr,
virPCIDeviceAddressPtr **iommuGroupDevices,
size_t *nIommuGroupDevices);
Currently we bind a managed hostdev back to the host driver (or "unbind" from the perspective of the stub driver) immediately upon receiving a DEVICE_DELETED event from QEMU. In cases where we have more one device from the group attached to a guest, this runs the risk of putting the group in a "non-viable" state where both a guest and host are using devices from a group simultaneously. This patch addresses this by deferring the unbind step until all hostdevs from a group have been detached from the guest. In the meantime, they are left on the drvManager's inactiveList, in a similar state as they would be if they were unmanaged devices that were bound to VFIO via nodedev-detach but not yet plugged into a guest. Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> --- src/libvirt_private.syms | 3 ++ src/qemu/qemu_hostdev.c | 16 +++++++++ src/qemu/qemu_hostdev.h | 4 +++ src/qemu/qemu_hotplug.c | 16 ++++++++- src/util/virhostdev.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ src/util/virhostdev.h | 8 +++++ src/util/virpci.c | 27 +++++++++++++++ src/util/virpci.h | 1 + 8 files changed, 164 insertions(+), 1 deletion(-)