Patchwork [RFC,V1,10/11] Introduce Xen PCI Passthrough, MSI (3/3)

login
register
mail settings
Submitter Anthony PERARD
Date Oct. 4, 2011, 2:51 p.m.
Message ID <1317739882-4809-11-git-send-email-anthony.perard@citrix.com>
Download mbox | patch
Permalink /patch/117638/
State New
Headers show

Comments

Anthony PERARD - Oct. 4, 2011, 2:51 p.m.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
 hw/xen_pci_passthrough_msi.c |  674 ++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 674 insertions(+), 0 deletions(-)
 create mode 100644 hw/xen_pci_passthrough_msi.c
Stefano Stabellini - Oct. 5, 2011, 11:51 a.m.
On Tue, 4 Oct 2011, Anthony PERARD wrote:
> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>

You should set the original author of this patch correctly and add his
signed-off-by.
Remember to run the patch through checkpatch.pl.

> ---
>  hw/xen_pci_passthrough_msi.c |  674 ++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 674 insertions(+), 0 deletions(-)
>  create mode 100644 hw/xen_pci_passthrough_msi.c
> 
> diff --git a/hw/xen_pci_passthrough_msi.c b/hw/xen_pci_passthrough_msi.c
> new file mode 100644
> index 0000000..be18ff1
> --- /dev/null
> +++ b/hw/xen_pci_passthrough_msi.c
> @@ -0,0 +1,674 @@
> +/*
> + * Copyright (c) 2007, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + * Jiang Yunhong <yunhong.jiang@intel.com>
> + *
> + * This file implements direct PCI assignment to a HVM guest
> + */
> +
> +#include <sys/mman.h>
> +
> +#include "xen_backend.h"
> +#include "xen_pci_passthrough.h"
> +
> +void msi_set_enable(XenPCIPassthroughState *dev, int en)
> +{
> +    uint16_t val = 0;
> +    uint32_t address = 0;
> +    PT_LOG("enable: %i\n", en);
> +
> +    if (!dev->msi) {
> +        return;
> +    }
> +
> +    address = dev->msi->ctrl_offset;
> +    if (!address) {
> +        return;
> +    }
> +
> +    val = host_pci_read_word(dev->real_device, address);
> +    val &= ~PCI_MSI_FLAGS_ENABLE;
> +    val |= en & PCI_MSI_FLAGS_ENABLE;
> +    host_pci_write_word(dev->real_device, address, val);
> +
> +    PT_LOG("done, address: %#x, val: %#x\n", address, val);
> +}
> +
> +static void msix_set_enable(XenPCIPassthroughState *dev, int en)
> +{
> +    uint16_t val = 0;
> +    uint32_t address = 0;
> +
> +    if (!dev->msix) {
> +        return;
> +    }
> +
> +    address = dev->msix->ctrl_offset;
> +    if (!address) {
> +        return;
> +    }
> +
> +    val = host_pci_read_word(dev->real_device, address);
> +    val &= ~PCI_MSIX_FLAGS_ENABLE;
> +    if (en) {
> +        val |= PCI_MSIX_FLAGS_ENABLE;
> +    }
> +    host_pci_write_word(dev->real_device, address, val);
> +}
> +
> +/*********************************/
> +/* MSI virtuailization functions */
> +
> +/*
> + * setup physical msi, but didn't enable it
> + */
> +int pt_msi_setup(XenPCIPassthroughState *dev)
> +{
> +    int pirq = -1;
> +    uint8_t gvec = 0;
> +
> +    if (!(dev->msi->flags & MSI_FLAG_UNINIT)) {
> +        PT_LOG("Error: setup physical after initialized?? \n");
> +        return -1;
> +    }
> +
> +    gvec = dev->msi->data & 0xFF;
> +    if (!gvec) {
> +        /* if gvec is 0, the guest is asking for a particular pirq that
> +         * is passed as dest_id */
> +        pirq = (dev->msi->addr_hi & 0xffffff00) |
> +               ((dev->msi->addr_lo >> MSI_TARGET_CPU_SHIFT) & 0xff);
> +        if (!pirq) {
> +            /* this probably identifies an misconfiguration of the guest,
> +             * try the emulated path */
> +            pirq = -1;
> +        } else {
> +            PT_LOG("pt_msi_setup requested pirq = %d\n", pirq);
> +        }
> +    }
> +
> +    if (xc_physdev_map_pirq_msi(xen_xc, xen_domid, AUTO_ASSIGN, &pirq,
> +                                PCI_DEVFN(dev->real_device->dev,
> +                                          dev->real_device->func),
> +                                dev->real_device->bus, 0, 0)) {
> +        PT_LOG("Error: Mapping of MSI failed.\n");
> +        return -1;
> +    }
> +
> +    if (pirq < 0) {
> +        PT_LOG("Error: Invalid pirq number\n");
> +        return -1;
> +    }
> +
> +    dev->msi->pirq = pirq;
> +    PT_LOG("msi mapped with pirq %x\n", pirq);
> +
> +    return 0;
> +}
> +
> +static uint32_t __get_msi_gflags(uint32_t data, uint64_t addr)
> +{
> +    uint32_t result = 0;
> +    int rh, dm, dest_id, deliv_mode, trig_mode;
> +
> +    rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1;
> +    dm = (addr >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
> +    dest_id = (addr >> MSI_TARGET_CPU_SHIFT) & 0xff;
> +    deliv_mode = (data >> MSI_DATA_DELIVERY_SHIFT) & 0x7;
> +    trig_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
> +
> +    result |= dest_id | (rh << GFLAGS_SHIFT_RH) | (dm << GFLAGS_SHIFT_DM) | \
> +              (deliv_mode << GLFAGS_SHIFT_DELIV_MODE) |
> +              (trig_mode << GLFAGS_SHIFT_TRG_MODE);
> +
> +    return result;
> +}
> +
> +int pt_msi_update(XenPCIPassthroughState *d)
> +{
> +    uint8_t gvec = 0;
> +    uint32_t gflags = 0;
> +    uint64_t addr = 0;
> +    int ret = 0;
> +
> +    /* get vector, address, flags info, etc. */
> +    gvec = d->msi->data & 0xFF;
> +    addr = (uint64_t)d->msi->addr_hi << 32 | d->msi->addr_lo;
> +    gflags = __get_msi_gflags(d->msi->data, addr);
> +
> +    PT_LOG("Update msi with pirq %x gvec %x gflags %x\n",
> +           d->msi->pirq, gvec, gflags);
> +
> +    ret = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec,
> +                                   d->msi->pirq, gflags, 0);
> +
> +    if (ret) {
> +        PT_LOG("Error: Binding of MSI failed.\n");
> +
> +        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, d->msi->pirq)) {
> +            PT_LOG("Error: Unmapping of MSI failed.\n");
> +        }
> +        d->msi->pirq = -1;
> +        return ret;
> +    }
> +    return 0;
> +}
> +
> +void pt_msi_disable(XenPCIPassthroughState *dev)
> +{
> +    PCIDevice *d = &dev->dev;
> +    uint8_t gvec = 0;
> +    uint32_t gflags = 0;
> +    uint64_t addr = 0;
> +    uint8_t e_device = 0;
> +    uint8_t e_intx = 0;
> +
> +    msi_set_enable(dev, 0);
> +
> +    e_device = PCI_SLOT(dev->dev.devfn);
> +    e_intx = pci_intx(dev);
> +
> +    if (dev->msi_trans_en) {
> +        if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, dev->msi->pirq,
> +                                    PT_IRQ_TYPE_MSI_TRANSLATE, 0,
> +                                    e_device, e_intx, 0)) {
> +            PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n");
> +            goto out;
> +        }
> +    } else if (!(dev->msi->flags & MSI_FLAG_UNINIT)) {
> +        /* get vector, address, flags info, etc. */
> +        gvec = dev->msi->data & 0xFF;
> +        addr = (uint64_t)dev->msi->addr_hi << 32 | dev->msi->addr_lo;
> +        gflags = __get_msi_gflags(dev->msi->data, addr);
> +
> +        PT_LOG("Unbind msi with pirq %x, gvec %x\n",
> +                dev->msi->pirq, gvec);
> +
> +        if (xc_domain_unbind_msi_irq(xen_xc, xen_domid, gvec,
> +                                        dev->msi->pirq, gflags)) {
> +            PT_LOG("Error: Unbinding of MSI failed. [%02x:%02x.%x]\n",
> +                   pci_bus_num(d->bus), PCI_SLOT(d->devfn),
> +                   PCI_FUNC(d->devfn));
> +            goto out;
> +        }
> +    }
> +
> +    if (dev->msi->pirq != -1) {
> +        PT_LOG("Unmap msi with pirq %x\n", dev->msi->pirq);
> +
> +        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, dev->msi->pirq)) {
> +            PT_LOG("Error: Unmapping of MSI failed. [%02x:%02x.%x]\n",
> +                   pci_bus_num(d->bus), PCI_SLOT(d->devfn),
> +                   PCI_FUNC(d->devfn));
> +            goto out;
> +        }
> +    }
> +
> +out:
> +    /* clear msi info */
> +    dev->msi->flags = 0;
> +    dev->msi->pirq = -1;
> +    dev->msi_trans_en = 0;
> +}
> +
> +/* MSI-INTx translation virtulization functions */
> +int pt_enable_msi_translate(XenPCIPassthroughState* dev)
> +{
> +    uint8_t e_device = 0;
> +    uint8_t e_intx = 0;
> +
> +    if (!(dev->msi && dev->msi_trans_cap)) {
> +        return -1;
> +    }
> +
> +    msi_set_enable(dev, 0);
> +    dev->msi_trans_en = 0;
> +
> +    if (pt_msi_setup(dev)) {
> +        PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n");
> +        return -1;
> +    }
> +
> +    e_device = PCI_SLOT(dev->dev.devfn);
> +    /* fix virtual interrupt pin to INTA# */
> +    e_intx = pci_intx(dev);
> +
> +    if (xc_domain_bind_pt_irq(xen_xc, xen_domid, dev->msi->pirq,
> +                              PT_IRQ_TYPE_MSI_TRANSLATE, 0,
> +                              e_device, e_intx, 0)) {
> +        PT_LOG("Error: MSI-INTx translation bind failed, fallback\n");
> +
> +        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, dev->msi->pirq)) {
> +            PT_LOG("Error: Unmapping of MSI failed.\n");
> +        }
> +        dev->msi->pirq = -1;
> +        return -1;
> +    }
> +
> +    msi_set_enable(dev, 1);
> +    dev->msi_trans_en = 1;
> +
> +    return 0;
> +}
> +
> +void pt_disable_msi_translate(XenPCIPassthroughState *dev)
> +{
> +    uint8_t e_device = 0;
> +    uint8_t e_intx = 0;
> +
> +    /* MSI_ENABLE bit should be disabed until the new handler is set */
> +    msi_set_enable(dev, 0);
> +
> +    e_device = PCI_SLOT(dev->dev.devfn);
> +    e_intx = pci_intx(dev);
> +
> +    if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, dev->msi->pirq,
> +                                 PT_IRQ_TYPE_MSI_TRANSLATE, 0,
> +                                 e_device, e_intx, 0)) {
> +        PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n");
> +    }
> +
> +    if (dev->machine_irq) {
> +        if (xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, dev->machine_irq,
> +                                       0, e_device, e_intx)) {
> +            PT_LOG("Error: Rebinding of interrupt failed!\n");
> +        }
> +    }
> +
> +    dev->msi_trans_en = 0;
> +}
> +
> +/*********************************/
> +/* MSI-X virtulization functions */
> +
> +static void mask_physical_msix_entry(XenPCIPassthroughState *dev,
> +                                     int entry_nr, int mask)
> +{
> +    void *phys_off;
> +
> +    phys_off = dev->msix->phys_iomem_base + 16 * entry_nr + 12;
> +    *(uint32_t *)phys_off = mask;
> +}
> +
> +static int pt_msix_update_one(XenPCIPassthroughState *dev, int entry_nr)
> +{
> +    struct msix_entry_info *entry = &dev->msix->msix_entry[entry_nr];
> +    int pirq = entry->pirq;
> +    int gvec = entry->io_mem[2] & 0xff;
> +    uint64_t gaddr = *(uint64_t *)&entry->io_mem[0];
> +    uint32_t gflags = __get_msi_gflags(entry->io_mem[2], gaddr);
> +    int ret;
> +
> +    if (!entry->flags) {
> +        return 0;
> +    }
> +
> +    if (!gvec) {
> +        /* if gvec is 0, the guest is asking for a particular pirq that
> +         * is passed as dest_id */
> +        pirq = ((gaddr >> 32) & 0xffffff00) |
> +               (((gaddr & 0xffffffff) >> MSI_TARGET_CPU_SHIFT) & 0xff);
> +        if (!pirq) {
> +            /* this probably identifies an misconfiguration of the guest,
> +             * try the emulated path */
> +            pirq = -1;
> +        } else {
> +            PT_LOG("pt_msix_update_one requested pirq = %d\n", pirq);
> +        }
> +    }
> +
> +    /* Check if this entry is already mapped */
> +    if (entry->pirq == -1) {
> +        ret = xc_physdev_map_pirq_msi(xen_xc, xen_domid, AUTO_ASSIGN, &pirq,
> +                                      PCI_DEVFN(dev->real_device->dev,
> +                                                dev->real_device->func),
> +                                      dev->real_device->bus, entry_nr,
> +                                      dev->msix->table_base);
> +        if (ret) {
> +            PT_LOG("Error: Mapping msix entry %x\n", entry_nr);
> +            return ret;
> +        }
> +        entry->pirq = pirq;
> +    }
> +
> +    PT_LOG("Update msix entry %x with pirq %x gvec %x\n",
> +            entry_nr, pirq, gvec);
> +
> +    ret = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec, pirq, gflags,
> +                                   dev->msix->mmio_base_addr);
> +    if (ret) {
> +        PT_LOG("Error: Updating msix irq info for entry %d\n", entry_nr);
> +
> +        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, entry->pirq)) {
> +            PT_LOG("Error: Unmapping of MSI-X failed.\n");
> +        }
> +        entry->pirq = -1;
> +        return ret;
> +    }
> +
> +    entry->flags = 0;
> +
> +    return 0;
> +}
> +
> +int pt_msix_update(XenPCIPassthroughState *dev)
> +{
> +    struct pt_msix_info *msix = dev->msix;
> +    int i;
> +
> +    for (i = 0; i < msix->total_entries; i++) {
> +        pt_msix_update_one(dev, i);
> +    }
> +
> +    return 0;
> +}
> +
> +void pt_msix_disable(XenPCIPassthroughState *dev)
> +{
> +    PCIDevice *d = &dev->dev;
> +    uint8_t gvec = 0;
> +    uint32_t gflags = 0;
> +    uint64_t addr = 0;
> +    int i = 0;
> +    struct msix_entry_info *entry = NULL;
> +
> +    msix_set_enable(dev, 0);
> +
> +    for ( i = 0; i < dev->msix->total_entries; i++ ) {
> +        entry = &dev->msix->msix_entry[i];
> +
> +        if (entry->pirq == -1) {
> +            continue;
> +        }
> +
> +        gvec = entry->io_mem[2] & 0xff;
> +        addr = *(uint64_t *)&entry->io_mem[0];
> +        gflags = __get_msi_gflags(entry->io_mem[2], addr);
> +
> +        PT_LOG("Unbind msix with pirq %x, gvec %x\n",
> +                entry->pirq, gvec);
> +
> +        if (xc_domain_unbind_msi_irq(xen_xc, xen_domid, gvec,
> +                                        entry->pirq, gflags)) {
> +            PT_LOG("Error: Unbinding of MSI-X failed. [%02x:%02x.%x]\n",
> +                   pci_bus_num(d->bus), PCI_SLOT(d->devfn),
> +                   PCI_FUNC(d->devfn));
> +        } else {
> +            PT_LOG("Unmap msix with pirq %x\n", entry->pirq);
> +
> +            if (xc_physdev_unmap_pirq(xen_xc, xen_domid, entry->pirq)) {
> +                PT_LOG("Error: Unmapping of MSI-X failed. [%02x:%02x.%x]\n",
> +                       pci_bus_num(d->bus),
> +                       PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> +            }
> +        }
> +        /* clear msi-x info */
> +        entry->pirq = -1;
> +        entry->flags = 0;
> +    }
> +}
> +
> +int pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index)
> +{
> +    XenMSIXEntry *entry;
> +    int i, ret;
> +
> +    if (!(s->msix && s->msix->bar_index == bar_index)) {
> +        return 0;
> +    }
> +
> +    for (i = 0; i < s->msix->total_entries; i++) {
> +        entry = &s->msix->msix_entry[i];
> +        if (entry->pirq != -1) {
> +            ret = xc_domain_unbind_pt_irq(xen_xc, xen_domid, entry->pirq,
> +                                          PT_IRQ_TYPE_MSI, 0, 0, 0, 0);
> +            if (ret) {
> +                PT_LOG("Error: unbind MSI-X entry %d failed\n", entry->pirq);
> +            }
> +            entry->flags = 1;
> +        }
> +    }
> +    pt_msix_update(s);
> +
> +    return 0;
> +}
> +
> +static void pci_msix_invalid_write(void *opaque, target_phys_addr_t addr,
> +                                   uint32_t val)
> +{
> +    PT_LOG("Error: Invalid write to MSI-X table,"
> +           " only dword access is allowed.\n");
> +}
> +
> +static void pci_msix_writel(void *opaque, target_phys_addr_t addr,
> +                            uint32_t val)
> +{
> +    XenPCIPassthroughState *dev = (XenPCIPassthroughState *)opaque;
> +    XenPTMSIX *msix = dev->msix;
> +    XenMSIXEntry *entry;
> +    int entry_nr, offset;
> +    void *phys_off;
> +    uint32_t vec_ctrl;
> +
> +    if (addr % 4) {
> +        PT_LOG("Error: Unaligned dword access to MSI-X table, "
> +                "addr %016"PRIx64"\n", addr);
> +        return;
> +    }
> +
> +    PT_LOG("addr: "TARGET_FMT_plx", val: %#x\n", addr, val);
> +
> +    // FIXME/TODO I maybe have to do the same in readl !
> +    /* entry_nr = (addr - msix->mmio_base_addr) / 16; */

what is the problem here?


> +    entry_nr = addr / 16;
> +    entry = &msix->msix_entry[entry_nr];
> +    offset = (addr % 16) / 4;
> +
> +    /*
> +     * If Xen intercepts the mask bit access, io_mem[3] may not be
> +     * up-to-date. Read from hardware directly.
> +     */
> +    phys_off = dev->msix->phys_iomem_base + 16 * entry_nr + 12;
> +    vec_ctrl = *(uint32_t *)phys_off;
> +
> +    if (offset != 3 && msix->enabled && !(vec_ctrl & 0x1)) {
> +        PT_LOG("Error: Can't update msix entry %d since MSI-X is already "
> +                "function.\n", entry_nr);
> +        return;
> +    }
> +
> +    if (offset != 3 && entry->io_mem[offset] != val) {
> +        entry->flags = 1;
> +    }
> +    entry->io_mem[offset] = val;
> +
> +    if (offset == 3) {
> +        if (msix->enabled && !(val & 0x1)) {
> +            pt_msix_update_one(dev, entry_nr);
> +        }
> +        mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1);
> +    }
> +}
> +
> +static CPUWriteMemoryFunc *pci_msix_write[] = {
> +    pci_msix_invalid_write,
> +    pci_msix_invalid_write,
> +    pci_msix_writel
> +};
> +
> +static uint32_t pci_msix_invalid_read(void *opaque, target_phys_addr_t addr)
> +{
> +    PT_LOG("Error: Invalid read to MSI-X table,"
> +           " only dword access is allowed.\n");
> +    return 0;
> +}
> +
> +static uint32_t pci_msix_readl(void *opaque, target_phys_addr_t addr)
> +{
> +    XenPCIPassthroughState *dev = (XenPCIPassthroughState *)opaque;
> +    struct pt_msix_info *msix = dev->msix;
> +    int entry_nr, offset;
> +
> +    if (addr % 4) {
> +        PT_LOG("Error: Unaligned dword access to MSI-X table, "
> +                "addr %016"PRIx64"\n", addr);
> +        return 0;
> +    }
> +
> +    PT_LOG("addr: "TARGET_FMT_plx"\n", addr);
> +
> +    entry_nr = addr / 16;
> +    offset = (addr % 16) / 4;
> +
> +    return msix->msix_entry[entry_nr].io_mem[offset];
> +}
> +
> +static CPUReadMemoryFunc *pci_msix_read[] = {
> +    pci_msix_invalid_read,
> +    pci_msix_invalid_read,
> +    pci_msix_readl
> +};
> +
> +int add_msix_mapping(XenPCIPassthroughState *s, int bar_index)
> +{
> +    if (!(s->msix && s->msix->bar_index == bar_index)) {
> +        return 0;
> +    }
> +
> +    return xc_domain_memory_mapping
> +        (xen_xc, xen_domid,
> +         s->msix->mmio_base_addr >> XC_PAGE_SHIFT,
> +         (s->bases[bar_index].access.maddr + s->msix->table_off)
> +             >> XC_PAGE_SHIFT,
> +         (s->msix->total_entries * 16 + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT,
> +         DPCI_ADD_MAPPING);
> +}
> +
> +int remove_msix_mapping(XenPCIPassthroughState *s, int bar_index)
> +{
> +    if (!(s->msix && s->msix->bar_index == bar_index)) {
> +        return 0;
> +    }
> +
> +    s->msix->mmio_base_addr = s->bases[bar_index].e_physbase
> +        + s->msix->table_off;
> +
> +    cpu_register_physical_memory(s->msix->mmio_base_addr,
> +                                 s->msix->total_entries * 16,
> +                                 s->msix->mmio_index);
> +
> +    return xc_domain_memory_mapping
> +        (xen_xc, xen_domid,
> +         s->msix->mmio_base_addr >> XC_PAGE_SHIFT,
> +         (s->bases[bar_index].access.maddr + s->msix->table_off)
> +             >> XC_PAGE_SHIFT,
> +         (s->msix->total_entries * 16 + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT,
> +         DPCI_REMOVE_MAPPING);
> +}
> +
> +int pt_msix_init(XenPCIPassthroughState *dev, int pos)
> +{
> +    uint8_t id;
> +    uint16_t control;
> +    int i, total_entries, table_off, bar_index;
> +    HostPCIDevice *pd = dev->real_device;
> +    int fd;
> +
> +    id = host_pci_read_byte(pd, pos + PCI_CAP_LIST_ID);
> +
> +    if (id != PCI_CAP_ID_MSIX) {
> +        PT_LOG("Error: Invalid id %#x pos %#x\n", id, pos);
> +        return -1;
> +    }
> +
> +    control = host_pci_read_word(pd, pos + 2);
> +    total_entries = control & 0x7ff;
> +    total_entries += 1;
> +
> +    dev->msix = g_malloc0(sizeof (struct pt_msix_info)
> +                          + total_entries * sizeof (struct msix_entry_info));
> +
> +    dev->msix->total_entries = total_entries;
> +    for (i = 0; i < total_entries; i++) {
> +        dev->msix->msix_entry[i].pirq = -1;
> +    }
> +
> +    dev->msix->mmio_index =
> +        cpu_register_io_memory(pci_msix_read, pci_msix_write,
> +                               dev, DEVICE_NATIVE_ENDIAN);
> +
> +    table_off = host_pci_read_long(pd, pos + PCI_MSIX_TABLE);
> +    bar_index = dev->msix->bar_index = table_off & PCI_MSIX_FLAGS_BIRMASK;
> +    table_off = dev->msix->table_off = table_off & ~PCI_MSIX_FLAGS_BIRMASK;
> +    dev->msix->table_base =
> +        pt_pci_base_addr(dev->real_device->base_addr[bar_index]);
> +    PT_LOG("get MSI-X table bar base %#"PRIx64"\n", dev->msix->table_base);
> +
> +    fd = open("/dev/mem", O_RDWR);
> +    if (fd == -1) {
> +        PT_LOG("Error: Can't open /dev/mem: %s\n", strerror(errno));
> +        goto error_out;
> +    }
> +    PT_LOG("table_off = %#x, total_entries = %d\n", table_off, total_entries);
> +    dev->msix->table_offset_adjust = table_off & 0x0fff;
> +    dev->msix->phys_iomem_base = mmap(0,
> +                                      total_entries * 16
> +                                      + dev->msix->table_offset_adjust,
> +                                      PROT_WRITE | PROT_READ,
> +                                      MAP_SHARED | MAP_LOCKED,
> +                                      fd,
> +                                      dev->msix->table_base + table_off
> +                                      - dev->msix->table_offset_adjust);
> +    if (dev->msix->phys_iomem_base == MAP_FAILED) {
> +        PT_LOG("Error: Can't map physical MSI-X table: %s\n", strerror(errno));
> +        close(fd);
> +        goto error_out;
> +    }
> +    dev->msix->phys_iomem_base = ((char *)dev->msix->phys_iomem_base +
> +                                  dev->msix->table_offset_adjust);
> +
> +    close(fd);
> +
> +    PT_LOG("mapping physical MSI-X table to %p\n", dev->msix->phys_iomem_base);
> +    return 0;
> +
> +error_out:
> +    g_free(dev->msix);
> +    dev->msix = NULL;
> +    return -1;
> +}
> +
> +#if 0
> +static void pt_msix_delete(XenPCIPassthroughState *dev)
> +{
> +    /* unmap the MSI-X memory mapped register area */
> +    if (dev->msix->phys_iomem_base)
> +    {
> +        PT_LOG("unmapping physical MSI-X table from %lx\n",
> +           (unsigned long)dev->msix->phys_iomem_base);
> +        munmap(dev->msix->phys_iomem_base, dev->msix->total_entries * 16 +
> +           dev->msix->table_offset_adjust);
> +    }
> +
> +    if (dev->msix->mmio_index > 0)
> +    {
> +        cpu_unregister_io_memory(dev->msix->mmio_index);
> +    }
> +
> +
> +    free(dev->msix);
> +}
> +#endif
> --
> Anthony PERARD
>

Patch

diff --git a/hw/xen_pci_passthrough_msi.c b/hw/xen_pci_passthrough_msi.c
new file mode 100644
index 0000000..be18ff1
--- /dev/null
+++ b/hw/xen_pci_passthrough_msi.c
@@ -0,0 +1,674 @@ 
+/*
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Jiang Yunhong <yunhong.jiang@intel.com>
+ *
+ * This file implements direct PCI assignment to a HVM guest
+ */
+
+#include <sys/mman.h>
+
+#include "xen_backend.h"
+#include "xen_pci_passthrough.h"
+
+void msi_set_enable(XenPCIPassthroughState *dev, int en)
+{
+    uint16_t val = 0;
+    uint32_t address = 0;
+    PT_LOG("enable: %i\n", en);
+
+    if (!dev->msi) {
+        return;
+    }
+
+    address = dev->msi->ctrl_offset;
+    if (!address) {
+        return;
+    }
+
+    val = host_pci_read_word(dev->real_device, address);
+    val &= ~PCI_MSI_FLAGS_ENABLE;
+    val |= en & PCI_MSI_FLAGS_ENABLE;
+    host_pci_write_word(dev->real_device, address, val);
+
+    PT_LOG("done, address: %#x, val: %#x\n", address, val);
+}
+
+static void msix_set_enable(XenPCIPassthroughState *dev, int en)
+{
+    uint16_t val = 0;
+    uint32_t address = 0;
+
+    if (!dev->msix) {
+        return;
+    }
+
+    address = dev->msix->ctrl_offset;
+    if (!address) {
+        return;
+    }
+
+    val = host_pci_read_word(dev->real_device, address);
+    val &= ~PCI_MSIX_FLAGS_ENABLE;
+    if (en) {
+        val |= PCI_MSIX_FLAGS_ENABLE;
+    }
+    host_pci_write_word(dev->real_device, address, val);
+}
+
+/*********************************/
+/* MSI virtuailization functions */
+
+/*
+ * setup physical msi, but didn't enable it
+ */
+int pt_msi_setup(XenPCIPassthroughState *dev)
+{
+    int pirq = -1;
+    uint8_t gvec = 0;
+
+    if (!(dev->msi->flags & MSI_FLAG_UNINIT)) {
+        PT_LOG("Error: setup physical after initialized?? \n");
+        return -1;
+    }
+
+    gvec = dev->msi->data & 0xFF;
+    if (!gvec) {
+        /* if gvec is 0, the guest is asking for a particular pirq that
+         * is passed as dest_id */
+        pirq = (dev->msi->addr_hi & 0xffffff00) |
+               ((dev->msi->addr_lo >> MSI_TARGET_CPU_SHIFT) & 0xff);
+        if (!pirq) {
+            /* this probably identifies an misconfiguration of the guest,
+             * try the emulated path */
+            pirq = -1;
+        } else {
+            PT_LOG("pt_msi_setup requested pirq = %d\n", pirq);
+        }
+    }
+
+    if (xc_physdev_map_pirq_msi(xen_xc, xen_domid, AUTO_ASSIGN, &pirq,
+                                PCI_DEVFN(dev->real_device->dev,
+                                          dev->real_device->func),
+                                dev->real_device->bus, 0, 0)) {
+        PT_LOG("Error: Mapping of MSI failed.\n");
+        return -1;
+    }
+
+    if (pirq < 0) {
+        PT_LOG("Error: Invalid pirq number\n");
+        return -1;
+    }
+
+    dev->msi->pirq = pirq;
+    PT_LOG("msi mapped with pirq %x\n", pirq);
+
+    return 0;
+}
+
+static uint32_t __get_msi_gflags(uint32_t data, uint64_t addr)
+{
+    uint32_t result = 0;
+    int rh, dm, dest_id, deliv_mode, trig_mode;
+
+    rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1;
+    dm = (addr >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
+    dest_id = (addr >> MSI_TARGET_CPU_SHIFT) & 0xff;
+    deliv_mode = (data >> MSI_DATA_DELIVERY_SHIFT) & 0x7;
+    trig_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+
+    result |= dest_id | (rh << GFLAGS_SHIFT_RH) | (dm << GFLAGS_SHIFT_DM) | \
+              (deliv_mode << GLFAGS_SHIFT_DELIV_MODE) |
+              (trig_mode << GLFAGS_SHIFT_TRG_MODE);
+
+    return result;
+}
+
+int pt_msi_update(XenPCIPassthroughState *d)
+{
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    int ret = 0;
+
+    /* get vector, address, flags info, etc. */
+    gvec = d->msi->data & 0xFF;
+    addr = (uint64_t)d->msi->addr_hi << 32 | d->msi->addr_lo;
+    gflags = __get_msi_gflags(d->msi->data, addr);
+
+    PT_LOG("Update msi with pirq %x gvec %x gflags %x\n",
+           d->msi->pirq, gvec, gflags);
+
+    ret = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec,
+                                   d->msi->pirq, gflags, 0);
+
+    if (ret) {
+        PT_LOG("Error: Binding of MSI failed.\n");
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, d->msi->pirq)) {
+            PT_LOG("Error: Unmapping of MSI failed.\n");
+        }
+        d->msi->pirq = -1;
+        return ret;
+    }
+    return 0;
+}
+
+void pt_msi_disable(XenPCIPassthroughState *dev)
+{
+    PCIDevice *d = &dev->dev;
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    msi_set_enable(dev, 0);
+
+    e_device = PCI_SLOT(dev->dev.devfn);
+    e_intx = pci_intx(dev);
+
+    if (dev->msi_trans_en) {
+        if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, dev->msi->pirq,
+                                    PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                    e_device, e_intx, 0)) {
+            PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n");
+            goto out;
+        }
+    } else if (!(dev->msi->flags & MSI_FLAG_UNINIT)) {
+        /* get vector, address, flags info, etc. */
+        gvec = dev->msi->data & 0xFF;
+        addr = (uint64_t)dev->msi->addr_hi << 32 | dev->msi->addr_lo;
+        gflags = __get_msi_gflags(dev->msi->data, addr);
+
+        PT_LOG("Unbind msi with pirq %x, gvec %x\n",
+                dev->msi->pirq, gvec);
+
+        if (xc_domain_unbind_msi_irq(xen_xc, xen_domid, gvec,
+                                        dev->msi->pirq, gflags)) {
+            PT_LOG("Error: Unbinding of MSI failed. [%02x:%02x.%x]\n",
+                   pci_bus_num(d->bus), PCI_SLOT(d->devfn),
+                   PCI_FUNC(d->devfn));
+            goto out;
+        }
+    }
+
+    if (dev->msi->pirq != -1) {
+        PT_LOG("Unmap msi with pirq %x\n", dev->msi->pirq);
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, dev->msi->pirq)) {
+            PT_LOG("Error: Unmapping of MSI failed. [%02x:%02x.%x]\n",
+                   pci_bus_num(d->bus), PCI_SLOT(d->devfn),
+                   PCI_FUNC(d->devfn));
+            goto out;
+        }
+    }
+
+out:
+    /* clear msi info */
+    dev->msi->flags = 0;
+    dev->msi->pirq = -1;
+    dev->msi_trans_en = 0;
+}
+
+/* MSI-INTx translation virtulization functions */
+int pt_enable_msi_translate(XenPCIPassthroughState* dev)
+{
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    if (!(dev->msi && dev->msi_trans_cap)) {
+        return -1;
+    }
+
+    msi_set_enable(dev, 0);
+    dev->msi_trans_en = 0;
+
+    if (pt_msi_setup(dev)) {
+        PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n");
+        return -1;
+    }
+
+    e_device = PCI_SLOT(dev->dev.devfn);
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = pci_intx(dev);
+
+    if (xc_domain_bind_pt_irq(xen_xc, xen_domid, dev->msi->pirq,
+                              PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                              e_device, e_intx, 0)) {
+        PT_LOG("Error: MSI-INTx translation bind failed, fallback\n");
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, dev->msi->pirq)) {
+            PT_LOG("Error: Unmapping of MSI failed.\n");
+        }
+        dev->msi->pirq = -1;
+        return -1;
+    }
+
+    msi_set_enable(dev, 1);
+    dev->msi_trans_en = 1;
+
+    return 0;
+}
+
+void pt_disable_msi_translate(XenPCIPassthroughState *dev)
+{
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    /* MSI_ENABLE bit should be disabed until the new handler is set */
+    msi_set_enable(dev, 0);
+
+    e_device = PCI_SLOT(dev->dev.devfn);
+    e_intx = pci_intx(dev);
+
+    if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, dev->msi->pirq,
+                                 PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                 e_device, e_intx, 0)) {
+        PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n");
+    }
+
+    if (dev->machine_irq) {
+        if (xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, dev->machine_irq,
+                                       0, e_device, e_intx)) {
+            PT_LOG("Error: Rebinding of interrupt failed!\n");
+        }
+    }
+
+    dev->msi_trans_en = 0;
+}
+
+/*********************************/
+/* MSI-X virtulization functions */
+
+static void mask_physical_msix_entry(XenPCIPassthroughState *dev,
+                                     int entry_nr, int mask)
+{
+    void *phys_off;
+
+    phys_off = dev->msix->phys_iomem_base + 16 * entry_nr + 12;
+    *(uint32_t *)phys_off = mask;
+}
+
+static int pt_msix_update_one(XenPCIPassthroughState *dev, int entry_nr)
+{
+    struct msix_entry_info *entry = &dev->msix->msix_entry[entry_nr];
+    int pirq = entry->pirq;
+    int gvec = entry->io_mem[2] & 0xff;
+    uint64_t gaddr = *(uint64_t *)&entry->io_mem[0];
+    uint32_t gflags = __get_msi_gflags(entry->io_mem[2], gaddr);
+    int ret;
+
+    if (!entry->flags) {
+        return 0;
+    }
+
+    if (!gvec) {
+        /* if gvec is 0, the guest is asking for a particular pirq that
+         * is passed as dest_id */
+        pirq = ((gaddr >> 32) & 0xffffff00) |
+               (((gaddr & 0xffffffff) >> MSI_TARGET_CPU_SHIFT) & 0xff);
+        if (!pirq) {
+            /* this probably identifies an misconfiguration of the guest,
+             * try the emulated path */
+            pirq = -1;
+        } else {
+            PT_LOG("pt_msix_update_one requested pirq = %d\n", pirq);
+        }
+    }
+
+    /* Check if this entry is already mapped */
+    if (entry->pirq == -1) {
+        ret = xc_physdev_map_pirq_msi(xen_xc, xen_domid, AUTO_ASSIGN, &pirq,
+                                      PCI_DEVFN(dev->real_device->dev,
+                                                dev->real_device->func),
+                                      dev->real_device->bus, entry_nr,
+                                      dev->msix->table_base);
+        if (ret) {
+            PT_LOG("Error: Mapping msix entry %x\n", entry_nr);
+            return ret;
+        }
+        entry->pirq = pirq;
+    }
+
+    PT_LOG("Update msix entry %x with pirq %x gvec %x\n",
+            entry_nr, pirq, gvec);
+
+    ret = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec, pirq, gflags,
+                                   dev->msix->mmio_base_addr);
+    if (ret) {
+        PT_LOG("Error: Updating msix irq info for entry %d\n", entry_nr);
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, entry->pirq)) {
+            PT_LOG("Error: Unmapping of MSI-X failed.\n");
+        }
+        entry->pirq = -1;
+        return ret;
+    }
+
+    entry->flags = 0;
+
+    return 0;
+}
+
+int pt_msix_update(XenPCIPassthroughState *dev)
+{
+    struct pt_msix_info *msix = dev->msix;
+    int i;
+
+    for (i = 0; i < msix->total_entries; i++) {
+        pt_msix_update_one(dev, i);
+    }
+
+    return 0;
+}
+
+void pt_msix_disable(XenPCIPassthroughState *dev)
+{
+    PCIDevice *d = &dev->dev;
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    int i = 0;
+    struct msix_entry_info *entry = NULL;
+
+    msix_set_enable(dev, 0);
+
+    for ( i = 0; i < dev->msix->total_entries; i++ ) {
+        entry = &dev->msix->msix_entry[i];
+
+        if (entry->pirq == -1) {
+            continue;
+        }
+
+        gvec = entry->io_mem[2] & 0xff;
+        addr = *(uint64_t *)&entry->io_mem[0];
+        gflags = __get_msi_gflags(entry->io_mem[2], addr);
+
+        PT_LOG("Unbind msix with pirq %x, gvec %x\n",
+                entry->pirq, gvec);
+
+        if (xc_domain_unbind_msi_irq(xen_xc, xen_domid, gvec,
+                                        entry->pirq, gflags)) {
+            PT_LOG("Error: Unbinding of MSI-X failed. [%02x:%02x.%x]\n",
+                   pci_bus_num(d->bus), PCI_SLOT(d->devfn),
+                   PCI_FUNC(d->devfn));
+        } else {
+            PT_LOG("Unmap msix with pirq %x\n", entry->pirq);
+
+            if (xc_physdev_unmap_pirq(xen_xc, xen_domid, entry->pirq)) {
+                PT_LOG("Error: Unmapping of MSI-X failed. [%02x:%02x.%x]\n",
+                       pci_bus_num(d->bus),
+                       PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
+            }
+        }
+        /* clear msi-x info */
+        entry->pirq = -1;
+        entry->flags = 0;
+    }
+}
+
+int pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index)
+{
+    XenMSIXEntry *entry;
+    int i, ret;
+
+    if (!(s->msix && s->msix->bar_index == bar_index)) {
+        return 0;
+    }
+
+    for (i = 0; i < s->msix->total_entries; i++) {
+        entry = &s->msix->msix_entry[i];
+        if (entry->pirq != -1) {
+            ret = xc_domain_unbind_pt_irq(xen_xc, xen_domid, entry->pirq,
+                                          PT_IRQ_TYPE_MSI, 0, 0, 0, 0);
+            if (ret) {
+                PT_LOG("Error: unbind MSI-X entry %d failed\n", entry->pirq);
+            }
+            entry->flags = 1;
+        }
+    }
+    pt_msix_update(s);
+
+    return 0;
+}
+
+static void pci_msix_invalid_write(void *opaque, target_phys_addr_t addr,
+                                   uint32_t val)
+{
+    PT_LOG("Error: Invalid write to MSI-X table,"
+           " only dword access is allowed.\n");
+}
+
+static void pci_msix_writel(void *opaque, target_phys_addr_t addr,
+                            uint32_t val)
+{
+    XenPCIPassthroughState *dev = (XenPCIPassthroughState *)opaque;
+    XenPTMSIX *msix = dev->msix;
+    XenMSIXEntry *entry;
+    int entry_nr, offset;
+    void *phys_off;
+    uint32_t vec_ctrl;
+
+    if (addr % 4) {
+        PT_LOG("Error: Unaligned dword access to MSI-X table, "
+                "addr %016"PRIx64"\n", addr);
+        return;
+    }
+
+    PT_LOG("addr: "TARGET_FMT_plx", val: %#x\n", addr, val);
+
+    // FIXME/TODO I maybe have to do the same in readl !
+    /* entry_nr = (addr - msix->mmio_base_addr) / 16; */
+    entry_nr = addr / 16;
+    entry = &msix->msix_entry[entry_nr];
+    offset = (addr % 16) / 4;
+
+    /*
+     * If Xen intercepts the mask bit access, io_mem[3] may not be
+     * up-to-date. Read from hardware directly.
+     */
+    phys_off = dev->msix->phys_iomem_base + 16 * entry_nr + 12;
+    vec_ctrl = *(uint32_t *)phys_off;
+
+    if (offset != 3 && msix->enabled && !(vec_ctrl & 0x1)) {
+        PT_LOG("Error: Can't update msix entry %d since MSI-X is already "
+                "function.\n", entry_nr);
+        return;
+    }
+
+    if (offset != 3 && entry->io_mem[offset] != val) {
+        entry->flags = 1;
+    }
+    entry->io_mem[offset] = val;
+
+    if (offset == 3) {
+        if (msix->enabled && !(val & 0x1)) {
+            pt_msix_update_one(dev, entry_nr);
+        }
+        mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1);
+    }
+}
+
+static CPUWriteMemoryFunc *pci_msix_write[] = {
+    pci_msix_invalid_write,
+    pci_msix_invalid_write,
+    pci_msix_writel
+};
+
+static uint32_t pci_msix_invalid_read(void *opaque, target_phys_addr_t addr)
+{
+    PT_LOG("Error: Invalid read to MSI-X table,"
+           " only dword access is allowed.\n");
+    return 0;
+}
+
+static uint32_t pci_msix_readl(void *opaque, target_phys_addr_t addr)
+{
+    XenPCIPassthroughState *dev = (XenPCIPassthroughState *)opaque;
+    struct pt_msix_info *msix = dev->msix;
+    int entry_nr, offset;
+
+    if (addr % 4) {
+        PT_LOG("Error: Unaligned dword access to MSI-X table, "
+                "addr %016"PRIx64"\n", addr);
+        return 0;
+    }
+
+    PT_LOG("addr: "TARGET_FMT_plx"\n", addr);
+
+    entry_nr = addr / 16;
+    offset = (addr % 16) / 4;
+
+    return msix->msix_entry[entry_nr].io_mem[offset];
+}
+
+static CPUReadMemoryFunc *pci_msix_read[] = {
+    pci_msix_invalid_read,
+    pci_msix_invalid_read,
+    pci_msix_readl
+};
+
+int add_msix_mapping(XenPCIPassthroughState *s, int bar_index)
+{
+    if (!(s->msix && s->msix->bar_index == bar_index)) {
+        return 0;
+    }
+
+    return xc_domain_memory_mapping
+        (xen_xc, xen_domid,
+         s->msix->mmio_base_addr >> XC_PAGE_SHIFT,
+         (s->bases[bar_index].access.maddr + s->msix->table_off)
+             >> XC_PAGE_SHIFT,
+         (s->msix->total_entries * 16 + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT,
+         DPCI_ADD_MAPPING);
+}
+
+int remove_msix_mapping(XenPCIPassthroughState *s, int bar_index)
+{
+    if (!(s->msix && s->msix->bar_index == bar_index)) {
+        return 0;
+    }
+
+    s->msix->mmio_base_addr = s->bases[bar_index].e_physbase
+        + s->msix->table_off;
+
+    cpu_register_physical_memory(s->msix->mmio_base_addr,
+                                 s->msix->total_entries * 16,
+                                 s->msix->mmio_index);
+
+    return xc_domain_memory_mapping
+        (xen_xc, xen_domid,
+         s->msix->mmio_base_addr >> XC_PAGE_SHIFT,
+         (s->bases[bar_index].access.maddr + s->msix->table_off)
+             >> XC_PAGE_SHIFT,
+         (s->msix->total_entries * 16 + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT,
+         DPCI_REMOVE_MAPPING);
+}
+
+int pt_msix_init(XenPCIPassthroughState *dev, int pos)
+{
+    uint8_t id;
+    uint16_t control;
+    int i, total_entries, table_off, bar_index;
+    HostPCIDevice *pd = dev->real_device;
+    int fd;
+
+    id = host_pci_read_byte(pd, pos + PCI_CAP_LIST_ID);
+
+    if (id != PCI_CAP_ID_MSIX) {
+        PT_LOG("Error: Invalid id %#x pos %#x\n", id, pos);
+        return -1;
+    }
+
+    control = host_pci_read_word(pd, pos + 2);
+    total_entries = control & 0x7ff;
+    total_entries += 1;
+
+    dev->msix = g_malloc0(sizeof (struct pt_msix_info)
+                          + total_entries * sizeof (struct msix_entry_info));
+
+    dev->msix->total_entries = total_entries;
+    for (i = 0; i < total_entries; i++) {
+        dev->msix->msix_entry[i].pirq = -1;
+    }
+
+    dev->msix->mmio_index =
+        cpu_register_io_memory(pci_msix_read, pci_msix_write,
+                               dev, DEVICE_NATIVE_ENDIAN);
+
+    table_off = host_pci_read_long(pd, pos + PCI_MSIX_TABLE);
+    bar_index = dev->msix->bar_index = table_off & PCI_MSIX_FLAGS_BIRMASK;
+    table_off = dev->msix->table_off = table_off & ~PCI_MSIX_FLAGS_BIRMASK;
+    dev->msix->table_base =
+        pt_pci_base_addr(dev->real_device->base_addr[bar_index]);
+    PT_LOG("get MSI-X table bar base %#"PRIx64"\n", dev->msix->table_base);
+
+    fd = open("/dev/mem", O_RDWR);
+    if (fd == -1) {
+        PT_LOG("Error: Can't open /dev/mem: %s\n", strerror(errno));
+        goto error_out;
+    }
+    PT_LOG("table_off = %#x, total_entries = %d\n", table_off, total_entries);
+    dev->msix->table_offset_adjust = table_off & 0x0fff;
+    dev->msix->phys_iomem_base = mmap(0,
+                                      total_entries * 16
+                                      + dev->msix->table_offset_adjust,
+                                      PROT_WRITE | PROT_READ,
+                                      MAP_SHARED | MAP_LOCKED,
+                                      fd,
+                                      dev->msix->table_base + table_off
+                                      - dev->msix->table_offset_adjust);
+    if (dev->msix->phys_iomem_base == MAP_FAILED) {
+        PT_LOG("Error: Can't map physical MSI-X table: %s\n", strerror(errno));
+        close(fd);
+        goto error_out;
+    }
+    dev->msix->phys_iomem_base = ((char *)dev->msix->phys_iomem_base +
+                                  dev->msix->table_offset_adjust);
+
+    close(fd);
+
+    PT_LOG("mapping physical MSI-X table to %p\n", dev->msix->phys_iomem_base);
+    return 0;
+
+error_out:
+    g_free(dev->msix);
+    dev->msix = NULL;
+    return -1;
+}
+
+#if 0
+static void pt_msix_delete(XenPCIPassthroughState *dev)
+{
+    /* unmap the MSI-X memory mapped register area */
+    if (dev->msix->phys_iomem_base)
+    {
+        PT_LOG("unmapping physical MSI-X table from %lx\n",
+           (unsigned long)dev->msix->phys_iomem_base);
+        munmap(dev->msix->phys_iomem_base, dev->msix->total_entries * 16 +
+           dev->msix->table_offset_adjust);
+    }
+
+    if (dev->msix->mmio_index > 0)
+    {
+        cpu_unregister_io_memory(dev->msix->mmio_index);
+    }
+
+
+    free(dev->msix);
+}
+#endif