Patchwork [2/2] vfio-pci: Add PCIe capability mangling based on bus type

login
register
mail settings
Submitter Alex Williamson
Date Feb. 18, 2013, 6:29 p.m.
Message ID <20130218182916.14182.8405.stgit@bling.home>
Download mbox | patch
Permalink /patch/221437/
State New
Headers show

Comments

Alex Williamson - Feb. 18, 2013, 6:29 p.m.
Windows seems to pay particular interest to the PCIe header type of
devices and will fail to load drivers if we attached Endpoint devices
or Legacy Endpoint devices to the Root Complex.  We don't yet have a
good way to determine the bus type, so for now we add an experimental
x-bustype option which will later be replaced by some mechanism to
determine this automatcally.  The new option is defined as:

x-bustype=<n> where <n> is one of:
	0: Legacy PCI [default]
	1: PCI Express
	2: PCI Express Root Complex

Conversion of PCIe types is does as follows:

* Legacy PCI
  * No change, capability is unmodified for compatibility.
* PCI Express
  * Integrated Root Complex Endpoint -> Endpoint
* PCI Express Root Complext
  * Endpoint -> Integrated Root Complex Endpoint
  * Legacy Endpoint -> none, capability hidden

We also take this opportunity to explicitly limit supported devices
to Endpoints, Legacy Endpoints, and Root Complex Integrated Endpoints.
We don't currently have support for other types and users often cause
themselves problems by assigning them.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio_pci.c |  130 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+), 1 deletion(-)

Patch

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 53b23f3..7d6468b 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -130,6 +130,7 @@  typedef struct VFIODevice {
     PCIHostDeviceAddress host;
     QLIST_ENTRY(VFIODevice) next;
     struct VFIOGroup *group;
+    uint8_t bustype;
     bool reset_works;
 } VFIODevice;
 
@@ -1506,6 +1507,123 @@  static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
     return next - pos;
 }
 
+static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
+{
+    pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
+}
+
+static void vfio_add_emulated_word(VFIODevice *vdev, int pos,
+                                   uint16_t val, uint16_t mask)
+{
+    vfio_set_word_bits(vdev->pdev.config + pos, val, mask);
+    vfio_set_word_bits(vdev->pdev.wmask + pos, ~mask, mask);
+    vfio_set_word_bits(vdev->emulated_config_bits + pos, mask, mask);
+}
+
+static void vfio_set_long_bits(uint8_t *buf, uint32_t val, uint32_t mask)
+{
+    pci_set_long(buf, (pci_get_long(buf) & ~mask) | val);
+}
+
+static void vfio_add_emulated_long(VFIODevice *vdev, int pos,
+                                   uint32_t val, uint32_t mask)
+{
+    vfio_set_long_bits(vdev->pdev.config + pos, val, mask);
+    vfio_set_long_bits(vdev->pdev.wmask + pos, ~mask, mask);
+    vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
+}
+
+static int vfio_setup_pcie_cap(VFIODevice *vdev, int pos, uint8_t size)
+{
+    uint16_t flags;
+    uint8_t type;
+    enum {
+        VFIO_BUS_TYPE_PCI,
+        VFIO_BUS_TYPE_PCIE,
+        VFIO_BUS_TYPE_PCIE_RC,
+    };
+
+    flags = pci_get_word(vdev->pdev.config + pos + PCI_CAP_FLAGS);
+    type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+
+    switch (type) {
+    case PCI_EXP_TYPE_ENDPOINT:
+    case PCI_EXP_TYPE_LEG_END:
+    case PCI_EXP_TYPE_RC_END:
+        break;
+    default:
+        error_report("vfio: Assignment of PCIe type 0x%x devices is not "
+                     "currently supported\n", type);
+        return -EINVAL;
+    }
+
+    if (vdev->bustype > VFIO_BUS_TYPE_PCIE_RC) {
+        error_report("vfio: Unknown x-bustype %d.  Accepted values:\n"
+                     "\t%d: Legacy PCI [default]\n"
+                     "\t%d: PCI Express\n"
+                     "\t%d: PCI Express Root-Complex\n", vdev->bustype,
+                     VFIO_BUS_TYPE_PCI, VFIO_BUS_TYPE_PCIE,
+                     VFIO_BUS_TYPE_PCIE_RC);
+        return -EINVAL;
+    }
+
+    switch (vdev->bustype) {
+    case VFIO_BUS_TYPE_PCI:
+        /*
+         * Use express capability as-is on PCI bus.  It doesn't make much
+         * sense to even expose, but some drivers (ex. tg3) depend on it
+         * and guests don't seem to be particular about it.  We'll need
+         * to revist this if we ever expose an IOMMU to the guest.
+         */
+        return pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size);
+
+    case VFIO_BUS_TYPE_PCIE:
+        if (type == PCI_EXP_TYPE_RC_END) {
+            /* Type becomes non-Integrated Endpoint */
+            vfio_add_emulated_word(vdev, pos + PCI_CAP_FLAGS,
+                                   PCI_EXP_TYPE_ENDPOINT << 4,
+                                   PCI_EXP_FLAGS_TYPE);
+            /* XXX Implement LNKCAP fields? */
+        }
+
+        return pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size);
+
+    case VFIO_BUS_TYPE_PCIE_RC:
+        switch (type) {
+        case PCI_EXP_TYPE_ENDPOINT:
+            /* Type becomes Integrated Endpoint */
+            vfio_add_emulated_word(vdev, pos + PCI_CAP_FLAGS,
+                                   PCI_EXP_TYPE_RC_END << 4,
+                                   PCI_EXP_FLAGS_TYPE);
+
+            /* Link Capabilities, Status, and Control goes away */
+            if (size < PCI_EXP_LNKCTL) {
+                break;
+            }
+            vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP, 0U, ~0U);
+            vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCTL, 0U, ~0U);
+
+            /* Link 2 Capabilities, Status, and Control goes away */
+            if (size < PCI_EXP_LNKCTL2) {
+                break;
+            }
+            vfio_add_emulated_long(vdev, pos + 0x2c, 0U, ~0U);
+            vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCTL2, 0U, ~0U);
+            break;
+        case PCI_EXP_TYPE_LEG_END:
+            /*
+             * Legacy endpoints don't belong on the root complex.  Windows
+             * seems to be happier with devices if we skip the capability.
+             */
+            return 0;
+        }
+
+        return pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size);
+    }
+
+    return -EINVAL;
+}
+
 static int vfio_add_std_cap(VFIODevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1536,13 +1654,22 @@  static int vfio_add_std_cap(VFIODevice *vdev, uint8_t pos)
             return ret;
         }
     } else {
-        pdev->config[PCI_CAPABILITY_LIST] = 0; /* Begin the rebuild */
+        /* Begin the rebuild, use QEMU emulated list bits */
+        pdev->config[PCI_CAPABILITY_LIST] = 0;
+        vdev->emulated_config_bits[PCI_CAPABILITY_LIST] = 0xff;
+        vdev->emulated_config_bits[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
     }
 
+    /* Use emulated next pointer to allow dropping caps */
+    pci_set_byte(vdev->emulated_config_bits + pos + 1, 0xff);
+
     switch (cap_id) {
     case PCI_CAP_ID_MSI:
         ret = vfio_setup_msi(vdev, pos);
         break;
+    case PCI_CAP_ID_EXP:
+        ret = vfio_setup_pcie_cap(vdev, pos, size);
+        break;
     case PCI_CAP_ID_MSIX:
         ret = vfio_setup_msix(vdev, pos);
         break;
@@ -2102,6 +2229,7 @@  static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
     DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIODevice,
                        intx.mmap_timeout, 1100),
+    DEFINE_PROP_UINT8("x-bustype", VFIODevice, bustype, 0),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),