@@ -24,6 +24,7 @@
* Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
* Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
* Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ * Copyright (C) 2011, Siemens AG, Jan Kiszka (jan.kiszka@siemens.com)
*/
#include <stdio.h>
#include <unistd.h>
@@ -41,6 +42,7 @@
#include "range.h"
#include "sysemu.h"
#include "msi.h"
+#include "msix.h"
#define MSIX_PAGE_SIZE 0x1000
@@ -64,8 +66,6 @@
static void assigned_dev_load_option_rom(AssignedDevice *dev);
-static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
-
static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
uint32_t addr, int len, uint32_t *val)
{
@@ -238,24 +238,11 @@ static void assigned_dev_iomem_setup(PCIDevice *pci_dev, int region_num,
{
AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
AssignedDevRegion *region = &r_dev->v_addrs[region_num];
- PCIRegion *real_region = &r_dev->real_device.regions[region_num];
if (e_size > 0) {
memory_region_init(®ion->container, "assigned-dev-container",
e_size);
memory_region_add_subregion(®ion->container, 0, ®ion->real_iomem);
-
- /* deal with MSI-X MMIO page */
- if (real_region->base_addr <= r_dev->msix_table_addr &&
- real_region->base_addr + real_region->size >
- r_dev->msix_table_addr) {
- int offset = r_dev->msix_table_addr - real_region->base_addr;
-
- memory_region_add_subregion_overlap(®ion->container,
- offset,
- &r_dev->mmio,
- 1);
- }
}
}
@@ -648,21 +635,20 @@ again:
static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs);
-static void invalidate_msix_vectors(AssignedDevice *dev)
-{
- int i;
-
- for (i = 0; i < dev->irq_entries_nr; i++) {
- kvm_msi_cache_invalidate(&dev->dev.msix_cache[i]);
- }
-}
-
static void free_assigned_device(AssignedDevice *dev)
{
+ uint32_t table_bar_nr, pba_bar_nr;
+ uint8_t *msix_cap;
int i;
- if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
- assigned_dev_unregister_msix_mmio(dev);
+ if (msix_present(&dev->dev)) {
+ msix_cap = dev->dev.config + dev->dev.msix_cap;
+ table_bar_nr = pci_get_long(msix_cap + PCI_MSIX_TABLE) &
+ PCI_MSIX_FLAGS_BIRMASK;
+ pba_bar_nr = pci_get_long(msix_cap + PCI_MSIX_PBA) &
+ PCI_MSIX_FLAGS_BIRMASK;
+ msix_uninit(&dev->dev, &dev->v_addrs[table_bar_nr].container,
+ &dev->v_addrs[pba_bar_nr].container);
}
for (i = 0; i < dev->real_device.region_number; i++) {
PCIRegion *pci_region = &dev->real_device.regions[i];
@@ -698,9 +684,6 @@ static void free_assigned_device(AssignedDevice *dev)
if (dev->real_device.config_fd >= 0) {
close(dev->real_device.config_fd);
}
-
- invalidate_msix_vectors(dev);
- g_free(dev->dev.msix_cache);
}
static uint32_t calc_assigned_dev_id(AssignedDevice *dev)
@@ -916,11 +899,13 @@ void assigned_dev_update_irqs(void)
}
}
+/* used for both MSI and MSI-X */
static void assigned_dev_update_msi(PCIDevice *pci_dev, bool enabled)
{
AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
if (!enabled) {
+ dev->msix_vectors_in_use = 0;
assign_intx(dev);
}
}
@@ -945,113 +930,66 @@ static int assigned_dev_update_msi_vector(PCIDevice *pci_dev,
return 0;
}
-static int assigned_dev_set_msix_vectors(PCIDevice *pci_dev)
+static int assigned_dev_update_msix_vector(PCIDevice *pci_dev,
+ unsigned int vector,
+ MSIMessage *msg, bool masked)
{
- AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
- uint16_t entries_nr = 0, entries_max_nr;
- void *msix_page = adev->msix_table_page;
+ AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
+ MSIRoutingCache *cache;
uint32_t dev_id;
- MSIMessage msg;
- int pos, i, r;
-
- assert(adev->irq_entries_nr == 0);
-
- pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
+ unsigned int i;
+ int ret = 0;
- entries_max_nr = pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS);
- entries_max_nr &= PCI_MSIX_FLAGS_QSIZE;
- entries_max_nr += 1;
+ if (!masked) {
+ dev_id = calc_assigned_dev_id(dev);
- /* Get the usable entry number for allocating */
- for (i = 0; i < entries_max_nr; i++) {
/* Assuming IA-32 MSI message format:
* Ignore unused entry (invalid vector) */
- if (pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_DATA) == 0) {
- continue;
+ if (msg->data == 0) {
+ if (pci_dev->msix_cache[vector].type == MSI_ROUTE_NONE) {
+ return ret;
+ }
+ dev->msix_vectors_in_use--;
+ deassign_irq(dev);
+ kvm_msi_cache_invalidate(&pci_dev->msix_cache[vector]);
+ } else {
+ if (pci_dev->msix_cache[vector].type != MSI_ROUTE_NONE) {
+ ret = kvm_device_msix_set_vector(kvm_state, dev_id,
+ vector, msg,
+ &pci_dev->msix_cache[vector]);
+ return ret;
+ }
+ dev->msix_vectors_in_use++;
+ deassign_irq(dev);
}
- entries_nr++;
- }
- if (entries_nr == 0) {
- fprintf(stderr, "MSI-X entry number is zero!\n");
- return -EINVAL;
- }
- dev_id = calc_assigned_dev_id(adev);
-
- r = kvm_device_msix_init_vectors(kvm_state, dev_id, entries_nr);
- if (r < 0) {
- return r;
- }
- pci_dev->msix_cache = g_malloc0(entries_nr * sizeof(MSIRoutingCache));
- adev->irq_entries_nr = entries_nr;
-
- for (i = 0; i < entries_max_nr; i++) {
- if (entries_nr == 0) {
- break;
- }
- msg.data = pci_get_long(msix_page + i * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_DATA);
- if (msg.data == 0) {
- continue;
+ ret = kvm_device_msix_init_vectors(kvm_state, dev_id,
+ dev->msix_vectors_in_use);
+ if (ret < 0) {
+ return ret;
}
- msg.address = pci_get_quad(msix_page + i * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_LOWER_ADDR);
- r = kvm_device_msix_set_vector(kvm_state, dev_id, i, &msg,
- &pci_dev->msix_cache[i]);
- if (r < 0) {
- return r;
+ for (i = 0; i < pci_dev->msix_entries_nr; i++) {
+ cache = &pci_dev->msix_cache[i];
+ if (i != vector && cache->type == MSI_ROUTE_NONE) {
+ continue;
+ }
+ ret = kvm_device_msix_set_vector(kvm_state, dev_id, i,
+ i == vector ? msg : &cache->msg,
+ cache);
+ if (ret < 0) {
+ return ret;
+ }
}
- entries_nr--;
- }
-
- return 0;
-}
-
-static void assigned_dev_update_msix(PCIDevice *pci_dev)
-{
- AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
- uint16_t ctrl_word = pci_get_word(pci_dev->config + pci_dev->msix_cap +
- PCI_MSIX_FLAGS);
- uint32_t dev_id;
- int r;
- dev_id = calc_assigned_dev_id(assigned_dev);
-
- /* Some guests gratuitously disable MSIX even if they're not using it,
- * try to catch this by only deassigning irqs if the guest is using
- * MSIX or intends to start. */
- if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
- (ctrl_word & PCI_MSIX_FLAGS_ENABLE)) {
- invalidate_msix_vectors(assigned_dev);
- g_free(pci_dev->msix_cache);
- assigned_dev->irq_entries_nr = 0;
-
- r = kvm_device_irq_deassign(kvm_state, dev_id,
- assigned_dev->irq_requested_type);
- /* -ENXIO means no assigned irq */
- if (r && r != -ENXIO)
- perror("assigned_dev_update_msix: deassign irq");
-
- assigned_dev->irq_requested_type = 0;
- }
-
- if (ctrl_word & PCI_MSIX_FLAGS_ENABLE) {
- if (assigned_dev_set_msix_vectors(pci_dev) < 0) {
- perror("assigned_dev_update_msix_mmio");
- return;
- }
- if (kvm_device_msix_assign(kvm_state, dev_id) < 0) {
- perror("assigned_dev_enable_msix: assign irq");
- return;
+ ret = kvm_device_msix_assign(kvm_state, dev_id);
+ if (ret < 0) {
+ return ret;
}
- assigned_dev->girq = -1;
- assigned_dev->irq_requested_type = KVM_DEV_IRQ_HOST_MSIX |
- KVM_DEV_IRQ_GUEST_MSIX;
- } else {
- assign_intx(assigned_dev);
+ dev->irq_requested_type =
+ KVM_DEV_IRQ_HOST_MSIX | KVM_DEV_IRQ_GUEST_MSIX;
}
+ return ret;
}
static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev,
@@ -1083,13 +1021,6 @@ static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address,
pci_default_write_config(pci_dev, address, val, len);
- if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
- if (range_covers_byte(address, len,
- pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) {
- assigned_dev_update_msix(pci_dev);
- }
- }
-
emulate_mask = 0;
memcpy(&emulate_mask, assigned_dev->emulate_config_write + address, len);
emulate_mask = le32_to_cpu(emulate_mask);
@@ -1115,7 +1046,6 @@ static void assigned_dev_setup_cap_read(AssignedDevice *dev, uint32_t offset,
static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
{
AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
- PCIRegion *pci_region = dev->real_device.regions;
int ret, pos;
/* Clear initial capabilities pointer and status copied from hw */
@@ -1145,27 +1075,31 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
/* Expose MSI-X capability */
pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0);
if (pos != 0 && kvm_device_msix_supported(kvm_state)) {
- int bar_nr;
- uint32_t msix_table_entry;
-
- dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
- if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos, 12)) < 0) {
+ unsigned int table_bar_nr, pba_bar_nr;
+ uint32_t table_offset, pba_offset;
+ uint16_t nentries;
+
+ nentries = (pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
+ PCI_MSIX_FLAGS_QSIZE) + 1;
+ table_offset = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE);
+ table_bar_nr = table_offset & PCI_MSIX_FLAGS_BIRMASK;
+ table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
+ pba_offset = pci_get_long(pci_dev->config + pos + PCI_MSIX_PBA);
+ pba_bar_nr = pba_offset & PCI_MSIX_FLAGS_BIRMASK;
+ pba_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
+
+ ret = msix_init(pci_dev, pos, nentries,
+ &dev->v_addrs[table_bar_nr].container, table_bar_nr,
+ table_offset, &dev->v_addrs[pba_bar_nr].container,
+ pba_bar_nr, pba_offset);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = msix_set_config_notifiers(pci_dev, assigned_dev_update_msi,
+ assigned_dev_update_msix_vector);
+ if (ret < 0) {
return ret;
}
- pci_dev->msix_cap = pos;
-
- pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS,
- pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
- PCI_MSIX_FLAGS_QSIZE);
-
- /* Only enable and function mask bits are writable */
- pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS,
- PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
-
- msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE);
- bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK;
- msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK;
- dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
}
/* Minimal PM support, nothing writable, device appears to NAK changes */
@@ -1378,94 +1312,6 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
return 0;
}
-static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
-{
- AssignedDevice *adev = opaque;
- unsigned int offset = addr & 0xfff;
- void *page = adev->msix_table_page;
- uint32_t val = 0;
-
- memcpy(&val, (void *)((char *)page + offset), 4);
-
- return val;
-}
-
-static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
-{
- return ((msix_mmio_readl(opaque, addr & ~3)) >>
- (8 * (addr & 3))) & 0xff;
-}
-
-static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
-{
- return ((msix_mmio_readl(opaque, addr & ~3)) >>
- (8 * (addr & 3))) & 0xffff;
-}
-
-static void msix_mmio_writel(void *opaque,
- target_phys_addr_t addr, uint32_t val)
-{
- AssignedDevice *adev = opaque;
- unsigned int offset = addr & 0xfff;
- void *page = adev->msix_table_page;
-
- DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
- addr, val);
- memcpy((void *)((char *)page + offset), &val, 4);
-}
-
-static void msix_mmio_writew(void *opaque,
- target_phys_addr_t addr, uint32_t val)
-{
- msix_mmio_writel(opaque, addr & ~3,
- (val & 0xffff) << (8*(addr & 3)));
-}
-
-static void msix_mmio_writeb(void *opaque,
- target_phys_addr_t addr, uint32_t val)
-{
- msix_mmio_writel(opaque, addr & ~3,
- (val & 0xff) << (8*(addr & 3)));
-}
-
-static const MemoryRegionOps msix_mmio_ops = {
- .old_mmio = {
- .read = { msix_mmio_readb, msix_mmio_readw, msix_mmio_readl, },
- .write = { msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel, },
- },
- .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
-{
- dev->msix_table_page = mmap(NULL, 0x1000,
- PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
- if (dev->msix_table_page == MAP_FAILED) {
- fprintf(stderr, "fail allocate msix_table_page! %s\n",
- strerror(errno));
- return -EFAULT;
- }
- memset(dev->msix_table_page, 0, 0x1000);
- memory_region_init_io(&dev->mmio, &msix_mmio_ops, dev,
- "assigned-dev-msix", MSIX_PAGE_SIZE);
- return 0;
-}
-
-static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
-{
- if (!dev->msix_table_page)
- return;
-
- memory_region_destroy(&dev->mmio);
-
- if (munmap(dev->msix_table_page, 0x1000) == -1) {
- fprintf(stderr, "error unmapping msix_table_page! %s\n",
- strerror(errno));
- }
- dev->msix_table_page = NULL;
-}
-
static const VMStateDescription vmstate_assigned_device = {
.name = "pci-assign",
.unmigratable = 1,
@@ -1548,23 +1394,16 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
goto out;
}
- if (assigned_device_pci_cap_init(pci_dev) < 0) {
- goto out;
- }
-
- /* intercept MSI-X entry page in the MMIO */
- if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
- if (assigned_dev_register_msix_mmio(dev)) {
- goto out;
- }
- }
-
/* handle real device's MMIO/PIO BARs */
if (assigned_dev_register_regions(dev->real_device.regions,
dev->real_device.region_number,
dev))
goto out;
+ if (assigned_device_pci_cap_init(pci_dev) < 0) {
+ goto out;
+ }
+
/* handle interrupt routing */
e_intx = dev->dev.config[0x3d] - 1;
dev->intpin = e_intx;
@@ -95,21 +95,9 @@ typedef struct AssignedDevice {
uint8_t h_devfn;
int irq_requested_type;
int bound;
- struct {
-#define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
-#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1)
- uint32_t available;
-#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
-#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1)
-#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
- uint32_t state;
- } cap;
uint8_t emulate_config_read[PCI_CONFIG_SPACE_SIZE];
uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE];
- int irq_entries_nr;
- void *msix_table_page;
- target_phys_addr_t msix_table_addr;
- MemoryRegion mmio;
+ unsigned int msix_vectors_in_use;
char *configfd_name;
int32_t bootindex;
QLIST_ENTRY(AssignedDevice) next;
Switch MSI-X support of the device assignment core to the generic layer QEMU offers. As for legacy MSI, we use config notifiers to update IRQ assignment and routes on guest changes. Quite a bit code becomes obsolete in the device assigment core, e.g. the maintenance of the MSI-X vector masking MMIO page. Note that we have to reorder BAR mapping and capability initialization in order to pass the BAR container on msix_init. Also in this case we still do not support per-vector masking even after these changes. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> --- hw/device-assignment.c | 335 +++++++++++++----------------------------------- hw/device-assignment.h | 14 +-- 2 files changed, 88 insertions(+), 261 deletions(-)