Patchwork [01/11] vfio-pci: Update slow path INTx algorithm

login
register
mail settings
Submitter Alex Williamson
Date Oct. 4, 2012, 10:17 p.m.
Message ID <20121004221737.3189.75549.stgit@bling.home>
Download mbox | patch
Permalink /patch/189355/
State New
Headers show

Comments

Alex Williamson - Oct. 4, 2012, 10:17 p.m.
We can't afford the overhead of switching out and back into mmap mode
around each interrupt, but we can do it lazily via a timer.  On INTx
interrupt, disable the mmap'd memory regions and set a timer.  On
every interrupt, push the timer out.  If the timer expires and the
interrupt is no longer pending, switch back to mmap mode.

This has the benefit that things like graphics cards, which rarely or
never, fire an interrupt don't need manual user intervention to add
the x-intx=off parameter.  They'll just remain in mmap mode until they
trigger an interrupt, and if they don't continue to regularly fire
interrupts, they'll switch back.

The default timeout is tuned for network cards so that a ping is just
enough to keep them in non-mmap mode, where they have much better
latency.  It is tunable with an experimental option,
x-intx-mmap-timeout-ms.  A value of 0 keeps the device in non-mmap
mode after the first interrupt.

It's possible we could look at the class code of devices and come up
with reasonable per-class defaults based on expected interrupt
frequency and latency.  None of this is used for MSI interrupts and
also won't be used if we can bypass through KVM.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/vfio_pci.c     |   65 ++++++++++++++++++++++++++++++++++-------------------
 hw/vfio_pci_int.h |    4 ++-
 2 files changed, 44 insertions(+), 25 deletions(-)

Patch

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index a1eeced..7ec9c30 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -92,6 +92,34 @@  static void vfio_unmask_intx(VFIODevice *vdev)
     ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
 }
 
+/*
+ * Disabling BAR mmaping can be slow, but toggling it around INTx can
+ * also be a huge overhead.  We try to get the best of both worlds by
+ * waiting until an interrupt to disable mmaps (subsequent transitions
+ * to the same state are effectively no overhead).  If the interrupt has
+ * been serviced and the time gap is long enough, we re-enable mmaps for
+ * performance.  This works well for things like graphics cards, which
+ * may not use their interrupt at all and are penalized to an unusable
+ * level by read/write BAR traps.  Other devices, like NICs, have more
+ * regular interrupts and see much better latency by staying in non-mmap
+ * mode.  We therefore set the default mmap_timeout such that a ping
+ * is just enough to keep the mmap disabled.  Users can experiment with
+ * other options with the x-intx-mmap-timeout-ms parameter (a value of
+ * zero disables the timer).
+ */
+static void vfio_intx_mmap_enable(void *opaque)
+{
+    VFIODevice *vdev = opaque;
+
+    if (vdev->intx.pending) {
+        qemu_mod_timer(vdev->intx.mmap_timer,
+                       qemu_get_clock_ms(vm_clock) + vdev->intx.mmap_timeout);
+        return;
+    }
+
+    vfio_mmap_set_enabled(vdev, true);
+}
+
 static void vfio_intx_interrupt(void *opaque)
 {
     VFIODevice *vdev = opaque;
@@ -106,6 +134,11 @@  static void vfio_intx_interrupt(void *opaque)
 
     vdev->intx.pending = true;
     qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 1);
+    vfio_mmap_set_enabled(vdev, false);
+    if (vdev->intx.mmap_timeout) {
+        qemu_mod_timer(vdev->intx.mmap_timer,
+                       qemu_get_clock_ms(vm_clock) + vdev->intx.mmap_timeout);
+    }
 }
 
 static void vfio_eoi(VFIODevice *vdev)
@@ -141,7 +174,7 @@  static int vfio_enable_intx(VFIODevice *vdev)
     uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
     int ret;
 
-    if (vdev->intx.disabled || !pin) {
+    if (!pin) {
         return 0;
     }
 
@@ -162,16 +195,6 @@  static int vfio_enable_intx(VFIODevice *vdev)
         return -errno;
     }
 
-    /*
-     * Disable mmaps so we can trap on BAR accesses.  We interpret any
-     * access as a response to an interrupt and unmask the physical
-     * device.  The device will re-assert if the interrupt is still
-     * pending.  We'll likely retrigger on the host multiple times per
-     * guest interrupt, but without EOI notification it's better than
-     * nothing.  Acceleration paths through KVM will avoid this.
-     */
-    vfio_mmap_set_enabled(vdev, false);
-
     vdev->interrupt = VFIO_INT_INTx;
 
     DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
@@ -184,6 +207,7 @@  static void vfio_disable_intx(VFIODevice *vdev)
 {
     int fd;
 
+    qemu_del_timer(vdev->intx.mmap_timer);
     vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX);
     vdev->intx.pending = false;
     qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
@@ -1766,17 +1790,8 @@  static int vfio_initfn(PCIDevice *pdev)
     }
 
     if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
-        if (vdev->intx.intx && strcmp(vdev->intx.intx, "off")) {
-            error_report("vfio: Unknown option x-intx=%s, "
-                         "valid options: \"off\".\n", vdev->intx.intx);
-            ret = -EINVAL;
-            goto out_teardown;
-        }
-
-        if (vdev->intx.intx && !strcmp(vdev->intx.intx, "off")) {
-            vdev->intx.disabled = true;
-        }
-
+        vdev->intx.mmap_timer = qemu_new_timer_ms(vm_clock,
+                                                  vfio_intx_mmap_enable, vdev);
         ret = vfio_enable_intx(vdev);
         if (ret) {
             goto out_teardown;
@@ -1802,6 +1817,9 @@  static void vfio_exitfn(PCIDevice *pdev)
 
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     vfio_disable_interrupts(vdev);
+    if (vdev->intx.mmap_timer) {
+        qemu_free_timer(vdev->intx.mmap_timer);
+    }
     vfio_teardown_msi(vdev);
     vfio_unmap_bars(vdev);
     vfio_put_device(vdev);
@@ -1826,7 +1844,8 @@  static void vfio_pci_reset(DeviceState *dev)
 
 static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
-    DEFINE_PROP_STRING("x-intx", VFIODevice, intx.intx),
+    DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIODevice,
+                       intx.mmap_timeout, 1100),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
diff --git a/hw/vfio_pci_int.h b/hw/vfio_pci_int.h
index 3812d8d..e69bf5f 100644
--- a/hw/vfio_pci_int.h
+++ b/hw/vfio_pci_int.h
@@ -36,8 +36,8 @@  typedef struct VFIOINTx {
     EventNotifier interrupt; /* eventfd triggered on interrupt */
     EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
     PCIINTxRoute route; /* routing info for QEMU bypass */
-    bool disabled;
-    char *intx;
+    uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
+    QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
 } VFIOINTx;
 
 struct VFIODevice;