@@ -493,7 +493,7 @@ static void vfio_listener_release(VFIOContainer *container)
memory_listener_unregister(&container->listener);
}
-static struct vfio_info_cap_header *
+struct vfio_info_cap_header *
vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
{
struct vfio_info_cap_header *hdr;
@@ -34,6 +34,7 @@
#include "trace.h"
#define MSIX_CAP_LENGTH 12
+#define IGD_OPREGION 0xFC
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
@@ -1108,6 +1109,25 @@ void vfio_pci_write_config(PCIDevice *pdev,
} else if (was_enabled && !is_enabled) {
vfio_msix_disable(vdev);
}
+ } else if (vdev->igd_opregion &&
+ ranges_overlap(addr, len, IGD_OPREGION, 4)) {
+ uint32_t orig, cur;
+
+ orig = pci_get_long(pdev->config + IGD_OPREGION);
+ pci_default_write_config(pdev, addr, val, len);
+ cur = pci_get_long(pdev->config + IGD_OPREGION);
+
+ if (cur != orig) {
+ if (orig) {
+ memory_region_del_subregion(get_system_memory(),
+ vdev->igd_opregion->mem);
+ }
+
+ if (cur) {
+ memory_region_add_subregion(get_system_memory(),
+ cur, vdev->igd_opregion->mem);
+ }
+ }
} else {
/* Write everything to QEMU to keep emulated bits correct */
pci_default_write_config(pdev, addr, val, len);
@@ -1459,6 +1479,10 @@ static void vfio_bars_exit(VFIOPCIDevice *vdev)
pci_unregister_vga(&vdev->pdev);
vfio_vga_quirk_exit(vdev);
}
+
+ if (vdev->igd_opregion) {
+ vfio_region_exit(vdev->igd_opregion);
+ }
}
static void vfio_bars_finalize(VFIOPCIDevice *vdev)
@@ -1477,6 +1501,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev)
}
g_free(vdev->vga);
}
+
+ if (vdev->igd_opregion) {
+ vfio_region_finalize(vdev->igd_opregion);
+ g_free(vdev->igd_opregion);
+ }
}
/*
@@ -2123,6 +2152,45 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
}
+ if (vbasedev->num_regions > VFIO_PCI_NUM_REGIONS) {
+ for (i = VFIO_PCI_NUM_REGIONS; i < vbasedev->num_regions; i++) {
+ struct vfio_info_cap_header *hdr;
+ struct vfio_region_info_cap_type *type;
+
+ ret = vfio_get_region_info(vbasedev, i, ®_info);
+ if (ret) {
+ continue;
+ }
+
+ hdr = vfio_get_region_info_cap(reg_info, VFIO_REGION_INFO_CAP_TYPE);
+ if (!hdr) {
+ g_free(reg_info);
+ continue;
+ }
+
+ type = container_of(hdr, struct vfio_region_info_cap_type, header);
+ if (type->type ==
+ (VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL) &&
+ type->subtype == VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION) {
+ char *name;
+
+ vdev->igd_opregion = g_new0(VFIORegion, 1);
+ name = g_strdup_printf("%s IGD OpRegion", vbasedev->name);
+
+ ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+ vdev->igd_opregion, i, name);
+ g_free(name);
+
+ if (ret) {
+ error_report("vfio: Error setting up IGD OpRegion\n");
+ goto error;
+ }
+ }
+
+ g_free(reg_info);
+ }
+ }
+
irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
@@ -2525,6 +2593,12 @@ static int vfio_initfn(PCIDevice *pdev)
vdev->msi_cap_size);
}
+ if (vdev->igd_opregion) {
+ pci_set_long(vdev->pdev.config + IGD_OPREGION, 0);
+ pci_set_long(vdev->pdev.wmask + IGD_OPREGION, ~0);
+ vfio_add_emulated_long(vdev, 0x5C, 0, ~0);
+ }
+
if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
vfio_intx_mmap_enable, vdev);
@@ -115,6 +115,7 @@ typedef struct VFIOPCIDevice {
int interrupt; /* Current interrupt type */
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
+ VFIORegion *igd_opregion;
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
@@ -146,6 +146,8 @@ int vfio_get_device(VFIOGroup *group, const char *name,
VFIODevice *vbasedev);
int vfio_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info);
+struct vfio_info_cap_header *
+ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id);
extern const MemoryRegionOps vfio_region_ops;
extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;
This is provided via a device specific region, look for it on Intel VGA class devices, initialize it, and tie it to the config space register at 0xFC. Note that on bare metal it seems that 0xFC only points to the memory reserved by the BIOS for the OpRegion, in the model used here, programming the 0xFC register makes the host OpRegion pages appear in the VM address space at that address. Register 0x5C is the base of the stolen memory region (BDSM). Emulating this register and setting it to zero prevents the driver from trying to blindly use this address, which might be in-use RAM in the VM. This avoids DMAR faults, or potentially VM memory corruption, but we likely need a better solution, this is just a hack. It also avoids framebuffer corruption. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> --- hw/vfio/common.c | 2 + hw/vfio/pci.c | 74 +++++++++++++++++++++++++++++++++++++++++ hw/vfio/pci.h | 1 + include/hw/vfio/vfio-common.h | 2 + 4 files changed, 78 insertions(+), 1 deletion(-)