Patchwork [RFC,27/45] qemu-kvm: Lazily update MSI caches

login
register
mail settings
Submitter Jan Kiszka
Date Oct. 17, 2011, 9:28 a.m.
Message ID <33bcf349a40afdf62233e265534bf48cdd90fd06.1318843693.git.jan.kiszka@siemens.com>
Download mbox | patch
Permalink /patch/120133/
State New
Headers show

Comments

Jan Kiszka - Oct. 17, 2011, 9:28 a.m.
Instead of registering every possible MSI message that is prepared in
some device's config space, this commit only registers those messages
that are actually sent.

Every message that runs through the delivery hook is first checked
against its cached data. If there is a mismatch, then the registration
is created or updated, if it matches, delivery is performed directly.

To avoid exhausting limited KVM IRQ routes, devices are expected to
flush their MSI caches whenever the content is no longer used or valid.
If we run out of routes nevertheless, we flush all caches that were
created dynamically, ie. via the MSI delivery hook. However, we keep all
those cached routes intact that are static because they are associated
with external sources (irqfds).

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 hw/apic.c  |    4 +--
 hw/msi.c   |   93 ++++++------------------------------------------------------
 hw/msi.h   |    2 +-
 hw/msix.c  |   91 ++++------------------------------------------------------
 hw/pci.c   |    1 -
 hw/pci.h   |    3 --
 kvm-stub.c |   13 +--------
 kvm.h      |    6 ++--
 qemu-kvm.c |   69 ++++++++++++++++++++++++++++++++++---------
 9 files changed, 75 insertions(+), 207 deletions(-)

Patch

diff --git a/hw/apic.c b/hw/apic.c
index cb6662c..2cafc49 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -807,9 +807,7 @@  static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
 void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache)
 {
     if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        if (kvm_set_irq(cache->kvm_gsi, 1, NULL) < 0) {
-            abort();
-        }
+        kvm_msi_deliver(msg, cache);
     } else {
         uint8_t dest =
             (msg->address & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
diff --git a/hw/msi.c b/hw/msi.c
index 1328903..23d79dd 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -140,71 +140,18 @@  static void msi_message_from_vector(PCIDevice *dev, uint16_t msi_flags,
     }
 }
 
-static void kvm_msi_update(PCIDevice *dev)
-{
-    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
-    unsigned int max_vectors = 1 <<
-        ((flags & PCI_MSI_FLAGS_QMASK) >> (ffs(PCI_MSI_FLAGS_QMASK) - 1));
-    unsigned int nr_vectors = msi_nr_vectors(flags);
-    MSIRoutingCache *cache;
-    bool changed = false;
-    unsigned int vector;
-    MSIMessage msg;
-    int r;
-
-    for (vector = 0; vector < max_vectors; vector++) {
-        cache = &dev->msi_cache[vector];
-
-        if (vector >= nr_vectors) {
-            if (vector < dev->msi_entries_nr) {
-                kvm_msi_message_del(cache);
-                changed = true;
-            }
-        } else if (vector >= dev->msi_entries_nr) {
-            msi_message_from_vector(dev, flags, vector, &msg);
-            r = kvm_msi_message_add(&msg, cache);
-            if (r) {
-                fprintf(stderr, "%s: kvm_msi_add failed: %s\n", __func__,
-                        strerror(-r));
-                exit(1);
-            }
-            changed = true;
-        } else {
-            msi_message_from_vector(dev, flags, vector, &msg);
-            r = kvm_msi_message_update(&msg, cache);
-            if (r < 0) {
-                fprintf(stderr, "%s: kvm_update_msi failed: %s\n",
-                        __func__, strerror(-r));
-                exit(1);
-            }
-            if (r > 0) {
-                changed = true;
-            }
-        }
-    }
-    dev->msi_entries_nr = nr_vectors;
-    if (changed) {
-        r = kvm_commit_irq_routes();
-        if (r) {
-            fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__,
-                    strerror(-r));
-            exit(1);
-        }
-    }
-}
-
-/* KVM specific MSI helpers */
 static void kvm_msi_free(PCIDevice *dev)
 {
-    unsigned int vector;
+    unsigned int vector, nr_vectors;
 
-    for (vector = 0; vector < dev->msi_entries_nr; ++vector) {
-        kvm_msi_message_del(&dev->msi_cache[vector]);
+    if (!kvm_enabled() || !kvm_irqchip_in_kernel()) {
+        return;
     }
-    if (dev->msi_entries_nr > 0) {
-        kvm_commit_irq_routes();
+    nr_vectors =
+        msi_nr_vectors(pci_get_word(dev->config + msi_flags_off(dev)));
+    for (vector = 0; vector < nr_vectors; ++vector) {
+        kvm_msi_cache_invalidate(&dev->msi_cache[vector]);
     }
-    dev->msi_entries_nr = 0;
 }
 
 int msi_init(struct PCIDevice *dev, uint8_t offset,
@@ -283,10 +230,7 @@  void msi_uninit(struct PCIDevice *dev)
     flags = pci_get_word(dev->config + msi_flags_off(dev));
     cap_size = msi_cap_sizeof(flags);
 
-    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        kvm_msi_free(dev);
-    }
-
+    kvm_msi_free(dev);
     g_free(dev->msi_cache);
 
     pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
@@ -303,9 +247,6 @@  void msi_reset(PCIDevice *dev)
     if (!msi_present(dev)) {
         return;
     }
-    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        kvm_msi_free(dev);
-    }
 
     flags = pci_get_word(dev->config + msi_flags_off(dev));
     flags &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
@@ -402,6 +343,7 @@  void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
 #endif
 
     if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
+        kvm_msi_free(dev);
         return;
     }
 
@@ -433,10 +375,6 @@  void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
         pci_set_word(dev->config + msi_flags_off(dev), flags);
     }
 
-    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        kvm_msi_update(dev);
-    }
-
     if (!msi_per_vector_mask) {
         /* if per vector masking isn't supported,
            there is no pending interrupt. */
@@ -467,16 +405,3 @@  unsigned int msi_nr_vectors_allocated(const PCIDevice *dev)
     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
     return msi_nr_vectors(flags);
 }
-
-void msi_post_load(PCIDevice *dev)
-{
-    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
-
-    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        kvm_msi_free(dev);
-
-        if (flags & PCI_MSI_FLAGS_ENABLE) {
-            kvm_msi_update(dev);
-        }
-    }
-}
diff --git a/hw/msi.h b/hw/msi.h
index 20ae215..74f6d52 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -32,6 +32,7 @@  struct MSIMessage {
 typedef enum {
     MSI_ROUTE_NONE = 0,
     MSI_ROUTE_STATIC,
+    MSI_ROUTE_DYNAMIC,
 } MSIRouteType;
 
 struct MSIRoutingCache {
@@ -51,7 +52,6 @@  void msi_reset(PCIDevice *dev);
 void msi_notify(PCIDevice *dev, unsigned int vector);
 void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len);
 unsigned int msi_nr_vectors_allocated(const PCIDevice *dev);
-void msi_post_load(PCIDevice *dev);
 
 static inline bool msi_present(const PCIDevice *dev)
 {
diff --git a/hw/msix.c b/hw/msix.c
index 7d45760..ce3375a 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -42,79 +42,16 @@  static void msix_message_from_vector(PCIDevice *dev, unsigned vector,
     msg->data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
 }
 
-/* KVM specific MSIX helpers */
 static void kvm_msix_free(PCIDevice *dev)
 {
-    int vector, changed = 0;
-
-    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
-        if (dev->msix_entry_used[vector]) {
-            kvm_msi_message_del(&dev->msix_cache[vector]);
-            changed = 1;
-        }
-    }
-    if (changed) {
-        kvm_commit_irq_routes();
-    }
-}
-
-static void kvm_msix_update(PCIDevice *dev, int vector,
-                            int was_masked, int is_masked)
-{
-    int mask_cleared = was_masked && !is_masked;
-    MSIMessage msg;
-    int r;
+    int vector;
 
-    /* It is only legal to change an entry when it is masked. Therefore, it is
-     * enough to update the routing in kernel when mask is being cleared. */
-    if (!mask_cleared) {
+    if (!kvm_enabled() || !kvm_irqchip_in_kernel()) {
         return;
     }
-    if (!dev->msix_entry_used[vector]) {
-        return;
-    }
-
-    msix_message_from_vector(dev, vector, &msg);
-    r = kvm_msi_message_update(&msg, &dev->msix_cache[vector]);
-    if (r < 0) {
-        fprintf(stderr, "%s: kvm_update_msix failed: %s\n", __func__,
-                strerror(-r));
-        exit(1);
-    }
-    if (r > 0) {
-        r = kvm_commit_irq_routes();
-        if (r) {
-            fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__,
-		    strerror(-r));
-            exit(1);
-        }
-    }
-}
-
-static int kvm_msix_vector_add(PCIDevice *dev, unsigned vector)
-{
-    MSIMessage msg;
-    int r;
-
-    msix_message_from_vector(dev, vector, &msg);
-    r = kvm_msi_message_add(&msg, &dev->msix_cache[vector]);
-    if (r < 0) {
-        fprintf(stderr, "%s: kvm_add_msix failed: %s\n", __func__, strerror(-r));
-        return r;
-    }
-
-    r = kvm_commit_irq_routes();
-    if (r < 0) {
-        fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__, strerror(-r));
-        return r;
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
+        kvm_msi_cache_invalidate(&dev->msix_cache[vector]);
     }
-    return 0;
-}
-
-static void kvm_msix_vector_del(PCIDevice *dev, unsigned vector)
-{
-    kvm_msi_message_del(&dev->msix_cache[vector]);
-    kvm_commit_irq_routes();
 }
 
 /* Add MSI-X capability to the config space for the device. */
@@ -264,6 +201,7 @@  void msix_write_config(PCIDevice *dev, uint32_t addr,
     }
 
     if (!is_enabled) {
+        kvm_msix_free(dev);
         return;
     }
 
@@ -288,9 +226,6 @@  static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
     bool is_masked;
 
     pci_set_long(dev->msix_table_page + offset, val);
-    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
-    }
 
     if (msix_enabled(dev) && vector < dev->msix_entries_nr) {
         is_masked = msix_is_masked(dev, vector);
@@ -391,10 +326,6 @@  static void msix_free_irq_entries(PCIDevice *dev)
 {
     int vector;
 
-    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        kvm_msix_free(dev);
-    }
-
     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
         dev->msix_entry_used[vector] = 0;
         msix_clr_pending(dev, vector);
@@ -418,6 +349,7 @@  int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
     g_free(dev->msix_entry_used);
     dev->msix_entry_used = NULL;
 
+    kvm_msix_free(dev);
     g_free(dev->msix_cache);
 
     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
@@ -510,16 +442,8 @@  void msix_reset(PCIDevice *dev)
 /* Mark vector as used. */
 int msix_vector_use(PCIDevice *dev, unsigned vector)
 {
-    int ret;
     if (vector >= dev->msix_entries_nr)
         return -EINVAL;
-    if (kvm_enabled() && kvm_irqchip_in_kernel() &&
-        !dev->msix_entry_used[vector]) {
-        ret = kvm_msix_vector_add(dev, vector);
-        if (ret) {
-            return ret;
-        }
-    }
     ++dev->msix_entry_used[vector];
     return 0;
 }
@@ -533,9 +457,6 @@  void msix_vector_unuse(PCIDevice *dev, unsigned vector)
     if (--dev->msix_entry_used[vector]) {
         return;
     }
-    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        kvm_msix_vector_del(dev, vector);
-    }
     msix_clr_pending(dev, vector);
 }
 
diff --git a/hw/pci.c b/hw/pci.c
index 39b2173..4f0d7e1 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -362,7 +362,6 @@  static int get_pci_config_device(QEMUFile *f, void *pv, size_t size)
     memcpy(s->config, config, size);
 
     pci_update_mappings(s);
-    msi_post_load(s);
 
     g_free(config);
     return 0;
diff --git a/hw/pci.h b/hw/pci.h
index 4249c6a..d7a652e 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -201,9 +201,6 @@  struct PCIDevice {
     MSIRoutingCache *msi_cache;
     MSIRoutingCache *msix_cache;
 
-    /* MSI entries */
-    int msi_entries_nr;
-
     /* How much space does an MSIX table need. */
     /* The spec requires giving the table structure
      * a 4K aligned region all by itself. Align it to
diff --git a/kvm-stub.c b/kvm-stub.c
index ca4382a..acd1446 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -140,19 +140,8 @@  int kvm_get_irq_route_gsi(void)
     return -ENOSYS;
 }
 
-int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache *cache)
+void kvm_msi_cache_invalidate(MSIRoutingCache *cache)
 {
-    return -ENOSYS;
-}
-
-int kvm_msi_message_del(MSIRoutingCache *cache)
-{
-    return -ENOSYS;
-}
-
-int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache)
-{
-    return -ENOSYS;
 }
 
 int kvm_commit_irq_routes(void)
diff --git a/kvm.h b/kvm.h
index 8647647..61bcfec 100644
--- a/kvm.h
+++ b/kvm.h
@@ -204,9 +204,9 @@  int kvm_has_gsi_routing(void);
 int kvm_allows_irq0_override(void);
 int kvm_get_irq_route_gsi(void);
 
-int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache *cache);
-int kvm_msi_message_del(MSIRoutingCache *cache);
-int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache);
+void kvm_msi_cache_invalidate(MSIRoutingCache *cache);
+
+void kvm_msi_deliver(MSIMessage *msg, MSIRoutingCache *cache);
 
 int kvm_msi_irqfd_set(MSIMessage *msg, MSIRoutingCache *cache, int fd,
                       bool assigned);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index eb8f176..199564c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -432,12 +432,16 @@  int kvm_commit_irq_routes(void)
 #endif
 }
 
+static void kvm_msi_cache_flush(KVMState *s);
+
 int kvm_get_irq_route_gsi(void)
 {
     KVMState *s = kvm_state;
     int i, bit;
     uint32_t *buf = s->used_gsi_bitmap;
+    bool retry = true;
 
+again:
     /* Return the lowest unused GSI in the bitmap */
     for (i = 0; i < s->max_gsi / 32; i++) {
         bit = ffs(~buf[i]);
@@ -447,7 +451,11 @@  int kvm_get_irq_route_gsi(void)
 
         return bit - 1 + i * 32;
     }
-
+    if (retry) {
+        retry = false;
+        kvm_msi_cache_flush(s);
+        goto again;
+    }
     return -ENOSPC;
 }
 
@@ -463,7 +471,8 @@  static void kvm_msi_routing_entry(struct kvm_irq_routing_entry *e,
     e->u.msi.data = cache->msg.data;
 }
 
-int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache *cache)
+static int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache *cache,
+                               MSIRouteType type)
 {
     struct kvm_irq_routing_entry e;
     int ret;
@@ -473,30 +482,56 @@  int kvm_msi_message_add(MSIMessage *msg, MSIRoutingCache *cache)
         return ret;
     }
     cache->msg = *msg;
-    cache->type = MSI_ROUTE_STATIC;
+    cache->type = type;
     cache->kvm_gsi = ret;
     cache->kvm_irqfd = -1;
 
     kvm_msi_routing_entry(&e, cache);
-    return kvm_add_routing_entry(&e, cache);
+    ret = kvm_add_routing_entry(&e, cache);
+    if (ret < 0) {
+        return ret;
+    }
+    return kvm_commit_irq_routes();
 }
 
-int kvm_msi_message_del(MSIRoutingCache *cache)
+void kvm_msi_cache_invalidate(MSIRoutingCache *cache)
 {
     struct kvm_irq_routing_entry e;
 
-    kvm_msi_routing_entry(&e, cache);
-    return kvm_del_routing_entry(&e);
+    if (cache->type != MSI_ROUTE_NONE) {
+        kvm_msi_routing_entry(&e, cache);
+        kvm_del_routing_entry(&e);
+    }
+}
+
+static void kvm_msi_cache_flush(KVMState *s)
+{
+    int nr_irq_routes = kvm_state->irq_routes->nr;
+    MSIRoutingCache *cache;
+    int i;
+
+    for (i = 0; i < nr_irq_routes; i++) {
+        cache = s->msi_cache[i];
+        if (cache && cache->type == MSI_ROUTE_DYNAMIC) {
+            kvm_msi_cache_invalidate(cache);
+        }
+    }
 }
 
-int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache)
+static int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache,
+                                  MSIRouteType type)
 {
     struct kvm_irq_routing_entry old, new;
     MSIRoutingCache new_cache;
     int ret;
 
-    assert(cache->type != MSI_ROUTE_NONE);
-
+    if (cache->type == MSI_ROUTE_NONE) {
+        ret = kvm_msi_message_add(msg, cache, type);
+        if (ret < 0) {
+            return ret;
+        }
+        return kvm_commit_irq_routes();
+    }
     if (msg->address == cache->msg.address && msg->data == cache->msg.data) {
         return 0;
     }
@@ -515,9 +550,16 @@  int kvm_msi_message_update(MSIMessage *msg, MSIRoutingCache *cache)
     }
     *cache = new_cache;
 
-    return 1;
+    return kvm_commit_irq_routes();
 }
 
+void kvm_msi_deliver(MSIMessage *msg, MSIRoutingCache *cache)
+{
+    if (kvm_msi_message_update(msg, cache, MSI_ROUTE_DYNAMIC) < 0 ||
+        kvm_set_irq(cache->kvm_gsi, 1, NULL) < 0) {
+        abort();
+    }
+}
 
 int kvm_msi_irqfd_set(MSIMessage *msg, MSIRoutingCache *cache, int fd,
                       bool assigned)
@@ -525,10 +567,7 @@  int kvm_msi_irqfd_set(MSIMessage *msg, MSIRoutingCache *cache, int fd,
     int ret;
 
     if (assigned) {
-        if (cache->type == MSI_ROUTE_NONE) {
-            return -EINVAL;
-        }
-        ret = kvm_msi_message_update(msg, cache);
+        ret = kvm_msi_message_update(msg, cache, MSI_ROUTE_STATIC);
         if (ret < 0) {
             return ret;
         }