Patchwork vfio-powerpc: added VFIO support (v4)

login
register
mail settings
Submitter Alexey Kardashevskiy
Date July 18, 2012, 11:09 a.m.
Message ID <1342609768-19455-1-git-send-email-aik@ozlabs.ru>
Download mbox | patch
Permalink /patch/171651/
State New
Headers show

Comments

Alexey Kardashevskiy - July 18, 2012, 11:09 a.m.
It literally does the following:

1. POWERPC IOMMU support (the kernel counterpart is required)

2. The patch assumes that IOAPIC calls are going to be replaced
with something generic.

3. Added sPAPRVFIOData (hw/spapr_iommu_vfio.h) which describes
the interface between VFIO and sPAPR IOMMU.

4. Change sPAPR PHB to scan the PCI bus which is used for
the IOMMU-VFIO group. Now it is enough to add the following to
the QEMU command line to get VFIO up with all the devices from
IOMMU group with id=3:
-device spapr-pci-host-bridge,busname=E1000E,buid=0x3,iommu=3,\
 mem_win_addr=0x230000000000,io_win_addr=0x240000000000,msi_win_addr=0x250000000000

WIth the pathes posted today a bit earlier, this patch fully supports
VFIO what includes MSIX as well.

ps. yes, I know that linux_vfio.h has moved, will fix it later :)

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/linux-vfio.h       |   26 +++++++++++
 hw/ppc/Makefile.objs  |    3 ++
 hw/spapr.h            |    4 ++
 hw/spapr_iommu.c      |   62 ++++++++++++++++++++++++-
 hw/spapr_iommu_vfio.h |   34 ++++++++++++++
 hw/spapr_pci.c        |  124 +++++++++++++++++++++++++++++++++++++++++++++++--
 hw/spapr_pci.h        |    6 +++
 hw/vfio_pci.c         |   64 +++++++++++++++++++++++++
 hw/vfio_pci.h         |    2 +
 trace-events          |    1 +
 10 files changed, 320 insertions(+), 6 deletions(-)
 create mode 100644 hw/spapr_iommu_vfio.h
Alex Williamson - July 18, 2012, 2:14 p.m.
On Wed, 2012-07-18 at 21:09 +1000, Alexey Kardashevskiy wrote:
> It literally does the following:
> 
> 1. POWERPC IOMMU support (the kernel counterpart is required)
> 
> 2. The patch assumes that IOAPIC calls are going to be replaced
> with something generic.
> 
> 3. Added sPAPRVFIOData (hw/spapr_iommu_vfio.h) which describes
> the interface between VFIO and sPAPR IOMMU.
> 
> 4. Change sPAPR PHB to scan the PCI bus which is used for
> the IOMMU-VFIO group. Now it is enough to add the following to
> the QEMU command line to get VFIO up with all the devices from
> IOMMU group with id=3:
> -device spapr-pci-host-bridge,busname=E1000E,buid=0x3,iommu=3,\
>  mem_win_addr=0x230000000000,io_win_addr=0x240000000000,msi_win_addr=0x250000000000
> 
> WIth the pathes posted today a bit earlier, this patch fully supports
> VFIO what includes MSIX as well.
> 
> ps. yes, I know that linux_vfio.h has moved, will fix it later :)
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  hw/linux-vfio.h       |   26 +++++++++++
>  hw/ppc/Makefile.objs  |    3 ++
>  hw/spapr.h            |    4 ++
>  hw/spapr_iommu.c      |   62 ++++++++++++++++++++++++-
>  hw/spapr_iommu_vfio.h |   34 ++++++++++++++
>  hw/spapr_pci.c        |  124 +++++++++++++++++++++++++++++++++++++++++++++++--
>  hw/spapr_pci.h        |    6 +++
>  hw/vfio_pci.c         |   64 +++++++++++++++++++++++++
>  hw/vfio_pci.h         |    2 +
>  trace-events          |    1 +
>  10 files changed, 320 insertions(+), 6 deletions(-)
>  create mode 100644 hw/spapr_iommu_vfio.h
> 
> diff --git a/hw/linux-vfio.h b/hw/linux-vfio.h
> index 300d49b..27a0501 100644
> --- a/hw/linux-vfio.h
> +++ b/hw/linux-vfio.h
> @@ -442,4 +442,30 @@ struct vfio_iommu_type1_dma_unmap {
>  
>  #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
>  
> +/*
> + * Interface to SPAPR TCE (POWERPC Book3S)
> + */
> +#define SPAPR_TCE_IOMMU         2
> +
> +struct tce_iommu_info {
> +    __u32 argsz;
> +    __u32 flags;
> +    __u32 dma32_window_start;
> +    __u32 dma32_window_size;
> +    __u64 dma64_window_start;
> +    __u64 dma64_window_size;
> +};
> +
> +#define SPAPR_TCE_IOMMU_GET_INFO        _IO(VFIO_TYPE, VFIO_BASE + 12)
> +
> +struct tce_iommu_dma_map {
> +    __u32 argsz;
> +    __u32 flags;
> +    __u64 va;
> +    __u64 dmaaddr;
> +};
> +
> +#define SPAPR_TCE_IOMMU_MAP_DMA         _IO(VFIO_TYPE, VFIO_BASE + 13)
> +#define SPAPR_TCE_IOMMU_UNMAP_DMA       _IO(VFIO_TYPE, VFIO_BASE + 14)
> +
>  #endif /* VFIO_H */
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index f573a95..c46a049 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
>  # Xilinx PPC peripherals
>  obj-y += xilinx_ethlite.o
>  
> +# VFIO PCI device assignment
> +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
> +
>  obj-y := $(addprefix ../,$(obj-y))
> diff --git a/hw/spapr.h b/hw/spapr.h
> index b37f337..0c15c88 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char *propname,
>  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
>                        DMAContext *dma);
>  
> +struct sPAPRVFIOData;
> +void spapr_vfio_init_dma(int group_id, uint32_t liobn,
> +                         struct sPAPRVFIOData *data);
> +
>  #endif /* !defined (__HW_SPAPR_H__) */
> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
> index 50c288d..0a82842 100644
> --- a/hw/spapr_iommu.c
> +++ b/hw/spapr_iommu.c
> @@ -23,6 +23,8 @@
>  #include "dma.h"
>  
>  #include "hw/spapr.h"
> +#include "hw/spapr_iommu_vfio.h"
> +#include "hw/vfio_pci.h"
>  
>  #include <libfdt.h>
>  
> @@ -183,6 +185,60 @@ static int put_tce_emu(target_ulong liobn, target_ulong ioba, target_ulong tce)
>      return 0;
>  }
>  
> +typedef struct sPAPRVFIOTable {
> +    struct sPAPRVFIOData *data;
> +    uint32_t liobn;
> +    QLIST_ENTRY(sPAPRVFIOTable) list;
> +} sPAPRVFIOTable;
> +
> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
> +
> +void spapr_vfio_init_dma(int group_id, uint32_t liobn,
> +                         struct sPAPRVFIOData *data)
> +{
> +    sPAPRVFIOTable *t;
> +
> +    t = g_malloc0(sizeof(*t));
> +    t->data = data;
> +    t->liobn = liobn;
> +
> +    QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
> +}
> +
> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
> +{
> +    sPAPRVFIOTable *t;
> +    struct tce_iommu_dma_map map = {
> +        .argsz = sizeof(map),
> +        .va = 0,
> +        .dmaaddr = ioba,
> +    };
> +
> +    QLIST_FOREACH(t, &vfio_tce_tables, list) {
> +        if (t->liobn != liobn) {
> +            continue;
> +        }
> +        if (!t->data) {
> +            return H_NO_MEM;
> +        }

Why would this ever happen?

> +        if (tce) {
> +            map.va = (uintptr_t)qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK);
> +
> +            if (t->data->map(t->data->groupid, &map)) {

Just pass t->data, this is why the VFIOContainer has a union.

> +                perror("TCE_MAP_DMA");
> +                return H_PARAMETER;
> +            }
> +        } else {
> +            if (t->data->unmap(t->data->groupid, &map)) {
> +                perror("TCE_UNMAP_DMA");
> +                return H_PARAMETER;
> +            }
> +        }
> +        return H_SUCCESS;
> +    }
> +    return H_CONTINUE; /* positive non-zero value */
> +}
> +
>  static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
>                                target_ulong opcode, target_ulong *args)
>  {
> @@ -200,7 +256,11 @@ static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
>      ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>  
>      ret = put_tce_emu(liobn, ioba, tce);
> -    if (0 >= ret) {
> +    if (ret <= 0) {
> +        return ret ? H_PARAMETER : H_SUCCESS;
> +    }
> +    ret = put_tce_vfio(liobn, ioba, tce);
> +    if (ret <= 0) {
>          return ret ? H_PARAMETER : H_SUCCESS;
>      }
>  #ifdef DEBUG_TCE
> diff --git a/hw/spapr_iommu_vfio.h b/hw/spapr_iommu_vfio.h
> new file mode 100644
> index 0000000..b3d6115
> --- /dev/null
> +++ b/hw/spapr_iommu_vfio.h
> @@ -0,0 +1,34 @@
> +/*
> + * Definitions for VFIO IOMMU implementation for SPAPR TCE.
> + *
> + * Copyright (c) 2012 Alexey Kardashevskiy <aik@olabs.ru>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#if !defined(__HW_SPAPR_IOMMU_VFIO_H__)
> +#define __HW_SPAPR_IOMMU_VFIO_H__
> +
> +#include "hw/linux-vfio.h"
> +
> +struct sPAPRVFIOData {
> +    int groupid;
> +    struct tce_iommu_info info;

Seems a little lazy to embed this whole thing here, what does anyone
need argsz and flags for later?

> +    int (*map)(int groupid, struct tce_iommu_dma_map *param);
> +    int (*unmap)(int groupid, struct tce_iommu_dma_map *param);

s/int groupid/struct sPAPRVFIOData */ in map/unmap

> +};
> +
> +void spapr_register_vfio_container(struct sPAPRVFIOData *data);
> +
> +#endif
> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> index 88c8f2c..75743e7 100644
> --- a/hw/spapr_pci.c
> +++ b/hw/spapr_pci.c
> @@ -22,6 +22,9 @@
>   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>   * THE SOFTWARE.
>   */
> +#include <sys/types.h>
> +#include <dirent.h>
> +
>  #include "hw.h"
>  #include "pci.h"
>  #include "msi.h"
> @@ -32,7 +35,6 @@
>  #include "exec-memory.h"
>  #include <libfdt.h>
>  #include "trace.h"
> -
>  #include "hw/pci_internals.h"
>  
>  /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
> @@ -440,6 +442,12 @@ static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
>                   level);
>  }
>  
> +static int pci_spapr_get_irq(void *opaque, int irq_num)
> +{
> +    sPAPRPHBState *phb = opaque;
> +    return phb->lsi_table[irq_num].dt_irq;
> +}
> +
>  static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
>                                unsigned size)
>  {
> @@ -515,6 +523,93 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
>      return phb->dma;
>  }
>  
> +void spapr_register_vfio_container(struct sPAPRVFIOData *data)
> +{
> +    sPAPRPHBState *phb;
> +
> +    QLIST_FOREACH(phb, &spapr->phbs, list) {
> +        if (phb->iommugroupid == data->groupid) {
> +            phb->vfio_data = *data;
> +            phb->dma_window_start = phb->vfio_data.info.dma32_window_start;
> +            phb->dma_window_size = phb->vfio_data.info.dma32_window_size;
> +            return;
> +        }
> +    }
> +}
> +
> +static int spapr_pci_scan_vfio(sPAPRPHBState *phb)
> +{
> +    char iommupath[256];
> +    DIR *dirp;
> +    struct dirent *entry;
> +
> +    if (!phb->scan) {
> +        trace_spapr_pci("Autoscan disabled for ", phb->dtbusname);
> +        return 0;
> +    }
> +
> +    snprintf(iommupath, sizeof(iommupath),
> +             "/sys/kernel/iommu_groups/%d/devices/", phb->iommugroupid);
> +    dirp = opendir(iommupath);
> +
> +    while ((entry = readdir(dirp)) != NULL) {
> +        char *tmp;
> +        FILE *deviceclassfile;
> +        unsigned deviceclass = 0, domainid, busid, devid, fnid;
> +        char addr[32];
> +        DeviceState *dev;
> +
> +        if (sscanf(entry->d_name, "%X:%X:%X.%x",
> +                   &domainid, &busid, &devid, &fnid) != 4) {
> +            continue;
> +        }
> +
> +        tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name);
> +        trace_spapr_pci("Reading device class from ", tmp);
> +
> +        deviceclassfile = fopen(tmp, "r");
> +        if (deviceclassfile) {
> +            fscanf(deviceclassfile, "%x", &deviceclass);
> +            fclose(deviceclassfile);
> +        }
> +        g_free(tmp);
> +
> +        if (!deviceclass) {
> +            continue;
> +        }
> +        if ((phb->scan < 2) &&
> +            ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8))) {
> +            /* Skip _any_ bridge */
> +            continue;
> +        }
> +        if ((deviceclass == 0xc0310) || (deviceclass == 0xc0320)) {
> +            /* Tweak USB */
> +            phb->force_addr = 1;
> +            phb->enable_multifunction = 1;
> +        }
> +
> +        trace_spapr_pci("Creating devicei from ", entry->d_name);
> +
> +        dev = qdev_create(&phb->host_state.bus->qbus, "vfio-pci");
> +        if (!dev) {
> +            fprintf(stderr, "failed to create vfio-pci\n");
> +            continue;
> +        }
> +        qdev_prop_parse(dev, "host", entry->d_name);
> +        if (phb->force_addr) {
> +            snprintf(addr, sizeof(addr), "%x.%x", devid, fnid);
> +            qdev_prop_parse(dev, "addr", addr);
> +        }
> +        if (phb->enable_multifunction) {
> +            qdev_prop_set_bit(dev, "multifunction", 1);
> +        }
> +        qdev_init_nofail(dev);
> +    }
> +    closedir(dirp);
> +
> +    return 0;
> +}
> +
>  static int spapr_phb_init(SysBusDevice *s)
>  {
>      sPAPRPHBState *phb = DO_UPCAST(sPAPRPHBState, host_state.busdev, s);
> @@ -567,15 +662,13 @@ static int spapr_phb_init(SysBusDevice *s)
>  
>      bus = pci_register_bus(&phb->host_state.busdev.qdev,
>                             phb->busname ? phb->busname : phb->dtbusname,
> -                           pci_spapr_set_irq, NULL, pci_spapr_map_irq, phb,
> +                           pci_spapr_set_irq, pci_spapr_get_irq,
> +                           pci_spapr_map_irq, phb,
>                             &phb->memspace, &phb->iospace,
>                             PCI_DEVFN(0, 0), PCI_NUM_PINS);
>      phb->host_state.bus = bus;
>  
>      phb->dma_liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
> -    phb->dma_window_start = 0;
> -    phb->dma_window_size = 0x40000000;
> -    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn, phb->dma_window_size);
>      pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
>  
>      QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
> @@ -588,6 +681,19 @@ static int spapr_phb_init(SysBusDevice *s)
>          }
>      }
>  
> +    if (phb->iommugroupid >= 0) {
> +        if (spapr_pci_scan_vfio(phb) < 0) {
> +            return -1;
> +        }
> +        spapr_vfio_init_dma(phb->iommugroupid, phb->dma_liobn, &phb->vfio_data);
> +        return 0;
> +    }
> +
> +    phb->dma_window_start = 0;
> +    phb->dma_window_size = 0x40000000;
> +    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn,
> +                                         phb->dma_window_size);
> +
>      return 0;
>  }
>  
> @@ -599,6 +705,10 @@ static Property spapr_phb_properties[] = {
>      DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
>      DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
>      DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
> +    DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
> +    DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1),
> +    DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),
> +    DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -729,6 +839,10 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>      _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
>                       sizeof(interrupt_map)));
>  
> +    if (!phb->dma_window_size) {
> +        fprintf(stderr, "Unexpected error: DMA window is zero, exiting\n");
> +        exit(1);
> +    }
>      spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
>                   phb->dma_liobn, phb->dma_window_start,
>                   phb->dma_window_size);
> diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
> index 145071c..ed17053 100644
> --- a/hw/spapr_pci.h
> +++ b/hw/spapr_pci.h
> @@ -26,6 +26,7 @@
>  #include "hw/pci.h"
>  #include "hw/pci_host.h"
>  #include "hw/xics.h"
> +#include "hw/spapr_iommu_vfio.h"
>  
>  #define SPAPR_MSIX_MAX_DEVS 32
>  
> @@ -57,6 +58,11 @@ typedef struct sPAPRPHBState {
>          int nvec;
>      } msi_table[SPAPR_MSIX_MAX_DEVS];
>  
> +    struct sPAPRVFIOData vfio_data;
> +    int32_t iommugroupid;
> +    uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */
> +    uint8_t enable_multifunction, force_addr;
> +
>      QLIST_ENTRY(sPAPRPHBState) list;
>  } sPAPRPHBState;
>  
> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
> index 0ad4761..0023d41 100644
> --- a/hw/vfio_pci.c
> +++ b/hw/vfio_pci.c
> @@ -43,6 +43,7 @@
>  #include "range.h"
>  #include "vfio_pci.h"
>  #include "linux-vfio.h"
> +#include "hw/spapr_iommu_vfio.h"
>  
>  //#define DEBUG_VFIO
>  #ifdef DEBUG_VFIO
> @@ -55,6 +56,8 @@
>  
>  #define MSIX_CAP_LENGTH 12
>  
> +static VFIOGroup *vfio_get_group(int groupid);
> +

Unnecessary, because...

>  static QLIST_HEAD(, VFIOContainer)
>      container_list = QLIST_HEAD_INITIALIZER(container_list);
>  
> @@ -1088,6 +1091,31 @@ static void vfio_listener_release(VFIOContainer *container)
>  }
>  
>  /*
> + * sPAPR TCE DMA interface
> + */
> +static int spapr_tce_map(int groupid, struct tce_iommu_dma_map *param)
> +{
> +    VFIOGroup *group = vfio_get_group(groupid);
> +
> +    if (!group || !group->container) {
> +        return -1;
> +    }

This should be:

static int spapr_tce_map(sPAPRVFIOData *spapr_data, struct tce_iommu_dma_map *param)
{
    VFIOContainer *container;

    container = container_of(spapr_data, VFIOContainer, iommu_data.spapr);

    return ioctl(container->fd, ....
}


> +
> +    return ioctl(group->container->fd, SPAPR_TCE_IOMMU_MAP_DMA, param);
> +}
> +
> +static int spapr_tce_unmap(int groupid, struct tce_iommu_dma_map *param)
> +{
> +    VFIOGroup *group = vfio_get_group(groupid);
> +
> +    if (!group || !group->container) {
> +        return -1;
> +    }
> +
> +    return ioctl(group->container->fd, SPAPR_TCE_IOMMU_UNMAP_DMA, param);
> +}
> +
> +/*
>   * Interrupt setup
>   */
>  static void vfio_disable_interrupts(VFIODevice *vdev)
> @@ -1590,6 +1618,42 @@ static int vfio_connect_container(VFIOGroup *group)
>          memory_listener_register(&container->iommu_data.listener,
>                                   get_system_memory());
>  
> +    } else if (ioctl(fd, VFIO_CHECK_EXTENSION, SPAPR_TCE_IOMMU)) {
> +        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
> +        if (ret) {
> +            error_report("vfio: failed to set group container: %s\n",
> +                         strerror(errno));
> +            g_free(container);
> +            close(fd);
> +            return -1;
> +        }
> +
> +        ret = ioctl(fd, VFIO_SET_IOMMU, SPAPR_TCE_IOMMU);
> +        if (ret) {
> +            error_report("vfio: failed to set iommu for container: %s\n",
> +                         strerror(errno));
> +            g_free(container);
> +            close(fd);
> +            return -1;
> +        }
> +
> +        container->iommu_data.spapr.info.argsz =
> +                sizeof(container->iommu_data.spapr.info);
> +        ret = ioctl(fd, SPAPR_TCE_IOMMU_GET_INFO,
> +                    &container->iommu_data.spapr.info);
> +        if (ret) {
> +            error_report("vfio: failed to get iommu info for container: %s\n",
> +                         strerror(errno));
> +            g_free(container);
> +            close(fd);
> +            return -1;
> +        }
> +
> +        container->iommu_data.spapr.map = spapr_tce_map;
> +        container->iommu_data.spapr.unmap = spapr_tce_unmap;
> +        container->iommu_data.spapr.groupid = group->groupid;

This at least deserves a comment because x86 doesn't have a 1:1 mapping
of container to groupid.  I really think it might make more sense to
pass a PCIBus here instead of a groupid.  Whatever you choose, this
shouldn't be part of sPAPRVFIOData, it should be another parameter to
spapr_register_vfio_container.

> +        spapr_register_vfio_container(&container->iommu_data.spapr);
> +
>      } else {
>          error_report("vfio: No available IOMMU models\n");
>          g_free(container);
> diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
> index 00bb3dd..ddb5332 100644
> --- a/hw/vfio_pci.h
> +++ b/hw/vfio_pci.h
> @@ -6,6 +6,7 @@
>  #include "pci.h"
>  #include "ioapic.h"
>  #include "event_notifier.h"
> +#include "hw/spapr_iommu_vfio.h"
>  
>  typedef struct VFIOPCIHostDevice {
>      uint16_t seg;
> @@ -59,6 +60,7 @@ typedef struct VFIOContainer {
>      struct {
>          union {
>              MemoryListener listener;
> +            struct sPAPRVFIOData spapr;
>          };
>          void (*release)(struct VFIOContainer *);
>      } iommu_data;
> diff --git a/trace-events b/trace-events
> index e548f86..9100591 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -848,6 +848,7 @@ qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride,
>  qxl_render_update_area_done(void *cookie) "%p"
>  
>  # hw/spapr_pci.c
> +spapr_pci(const char *msg1, const char *msg2) "%s%s"
>  spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)"
>  spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64
>  spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u"
Alexey Kardashevskiy - July 19, 2012, 4:01 a.m.
On 19/07/12 00:14, Alex Williamson wrote:
> On Wed, 2012-07-18 at 21:09 +1000, Alexey Kardashevskiy wrote:
>> It literally does the following:
>>
>> 1. POWERPC IOMMU support (the kernel counterpart is required)
>>
>> 2. The patch assumes that IOAPIC calls are going to be replaced
>> with something generic.
>>
>> 3. Added sPAPRVFIOData (hw/spapr_iommu_vfio.h) which describes
>> the interface between VFIO and sPAPR IOMMU.
>>
>> 4. Change sPAPR PHB to scan the PCI bus which is used for
>> the IOMMU-VFIO group. Now it is enough to add the following to
>> the QEMU command line to get VFIO up with all the devices from
>> IOMMU group with id=3:
>> -device spapr-pci-host-bridge,busname=E1000E,buid=0x3,iommu=3,\
>>  mem_win_addr=0x230000000000,io_win_addr=0x240000000000,msi_win_addr=0x250000000000
>>
>> WIth the pathes posted today a bit earlier, this patch fully supports
>> VFIO what includes MSIX as well.
>>
>> ps. yes, I know that linux_vfio.h has moved, will fix it later :)
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  hw/linux-vfio.h       |   26 +++++++++++
>>  hw/ppc/Makefile.objs  |    3 ++
>>  hw/spapr.h            |    4 ++
>>  hw/spapr_iommu.c      |   62 ++++++++++++++++++++++++-
>>  hw/spapr_iommu_vfio.h |   34 ++++++++++++++
>>  hw/spapr_pci.c        |  124 +++++++++++++++++++++++++++++++++++++++++++++++--
>>  hw/spapr_pci.h        |    6 +++
>>  hw/vfio_pci.c         |   64 +++++++++++++++++++++++++
>>  hw/vfio_pci.h         |    2 +
>>  trace-events          |    1 +
>>  10 files changed, 320 insertions(+), 6 deletions(-)
>>  create mode 100644 hw/spapr_iommu_vfio.h
>>
>> diff --git a/hw/linux-vfio.h b/hw/linux-vfio.h
>> index 300d49b..27a0501 100644
>> --- a/hw/linux-vfio.h
>> +++ b/hw/linux-vfio.h
>> @@ -442,4 +442,30 @@ struct vfio_iommu_type1_dma_unmap {
>>  
>>  #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
>>  
>> +/*
>> + * Interface to SPAPR TCE (POWERPC Book3S)
>> + */
>> +#define SPAPR_TCE_IOMMU         2
>> +
>> +struct tce_iommu_info {
>> +    __u32 argsz;
>> +    __u32 flags;
>> +    __u32 dma32_window_start;
>> +    __u32 dma32_window_size;
>> +    __u64 dma64_window_start;
>> +    __u64 dma64_window_size;
>> +};
>> +
>> +#define SPAPR_TCE_IOMMU_GET_INFO        _IO(VFIO_TYPE, VFIO_BASE + 12)
>> +
>> +struct tce_iommu_dma_map {
>> +    __u32 argsz;
>> +    __u32 flags;
>> +    __u64 va;
>> +    __u64 dmaaddr;
>> +};
>> +
>> +#define SPAPR_TCE_IOMMU_MAP_DMA         _IO(VFIO_TYPE, VFIO_BASE + 13)
>> +#define SPAPR_TCE_IOMMU_UNMAP_DMA       _IO(VFIO_TYPE, VFIO_BASE + 14)
>> +
>>  #endif /* VFIO_H */
>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>> index f573a95..c46a049 100644
>> --- a/hw/ppc/Makefile.objs
>> +++ b/hw/ppc/Makefile.objs
>> @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
>>  # Xilinx PPC peripherals
>>  obj-y += xilinx_ethlite.o
>>  
>> +# VFIO PCI device assignment
>> +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
>> +
>>  obj-y := $(addprefix ../,$(obj-y))
>> diff --git a/hw/spapr.h b/hw/spapr.h
>> index b37f337..0c15c88 100644
>> --- a/hw/spapr.h
>> +++ b/hw/spapr.h
>> @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char *propname,
>>  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
>>                        DMAContext *dma);
>>  
>> +struct sPAPRVFIOData;
>> +void spapr_vfio_init_dma(int group_id, uint32_t liobn,
>> +                         struct sPAPRVFIOData *data);
>> +
>>  #endif /* !defined (__HW_SPAPR_H__) */
>> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
>> index 50c288d..0a82842 100644
>> --- a/hw/spapr_iommu.c
>> +++ b/hw/spapr_iommu.c
>> @@ -23,6 +23,8 @@
>>  #include "dma.h"
>>  
>>  #include "hw/spapr.h"
>> +#include "hw/spapr_iommu_vfio.h"
>> +#include "hw/vfio_pci.h"
>>  
>>  #include <libfdt.h>
>>  
>> @@ -183,6 +185,60 @@ static int put_tce_emu(target_ulong liobn, target_ulong ioba, target_ulong tce)
>>      return 0;
>>  }
>>  
>> +typedef struct sPAPRVFIOTable {
>> +    struct sPAPRVFIOData *data;
>> +    uint32_t liobn;
>> +    QLIST_ENTRY(sPAPRVFIOTable) list;
>> +} sPAPRVFIOTable;
>> +
>> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
>> +
>> +void spapr_vfio_init_dma(int group_id, uint32_t liobn,
>> +                         struct sPAPRVFIOData *data)
>> +{
>> +    sPAPRVFIOTable *t;
>> +
>> +    t = g_malloc0(sizeof(*t));
>> +    t->data = data;
>> +    t->liobn = liobn;
>> +
>> +    QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
>> +}
>> +
>> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
>> +{
>> +    sPAPRVFIOTable *t;
>> +    struct tce_iommu_dma_map map = {
>> +        .argsz = sizeof(map),
>> +        .va = 0,
>> +        .dmaaddr = ioba,
>> +    };
>> +
>> +    QLIST_FOREACH(t, &vfio_tce_tables, list) {
>> +        if (t->liobn != liobn) {
>> +            continue;
>> +        }
>> +        if (!t->data) {
>> +            return H_NO_MEM;
>> +        }
> 
> Why would this ever happen?
> 
>> +        if (tce) {
>> +            map.va = (uintptr_t)qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK);
>> +
>> +            if (t->data->map(t->data->groupid, &map)) {
> 
> Just pass t->data, this is why the VFIOContainer has a union.
>
>> +                perror("TCE_MAP_DMA");
>> +                return H_PARAMETER;
>> +            }
>> +        } else {
>> +            if (t->data->unmap(t->data->groupid, &map)) {
>> +                perror("TCE_UNMAP_DMA");
>> +                return H_PARAMETER;
>> +            }
>> +        }
>> +        return H_SUCCESS;
>> +    }
>> +    return H_CONTINUE; /* positive non-zero value */
>> +}
>> +
>>  static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
>>                                target_ulong opcode, target_ulong *args)
>>  {
>> @@ -200,7 +256,11 @@ static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
>>      ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>>  
>>      ret = put_tce_emu(liobn, ioba, tce);
>> -    if (0 >= ret) {
>> +    if (ret <= 0) {
>> +        return ret ? H_PARAMETER : H_SUCCESS;
>> +    }
>> +    ret = put_tce_vfio(liobn, ioba, tce);
>> +    if (ret <= 0) {
>>          return ret ? H_PARAMETER : H_SUCCESS;
>>      }
>>  #ifdef DEBUG_TCE
>> diff --git a/hw/spapr_iommu_vfio.h b/hw/spapr_iommu_vfio.h
>> new file mode 100644
>> index 0000000..b3d6115
>> --- /dev/null
>> +++ b/hw/spapr_iommu_vfio.h
>> @@ -0,0 +1,34 @@
>> +/*
>> + * Definitions for VFIO IOMMU implementation for SPAPR TCE.
>> + *
>> + * Copyright (c) 2012 Alexey Kardashevskiy <aik@olabs.ru>
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2 of the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#if !defined(__HW_SPAPR_IOMMU_VFIO_H__)
>> +#define __HW_SPAPR_IOMMU_VFIO_H__
>> +
>> +#include "hw/linux-vfio.h"
>> +
>> +struct sPAPRVFIOData {
>> +    int groupid;
>> +    struct tce_iommu_info info;
> 
> Seems a little lazy to embed this whole thing here, what does anyone
> need argsz and flags for later?


Trying to keep the same structures for both QEMU and the kernel as it is a SPAPR_TCE protocol. Otherwise I would have to copy the whole structure but argsz/flags.

 
>> +    int (*map)(int groupid, struct tce_iommu_dma_map *param);
>> +    int (*unmap)(int groupid, struct tce_iommu_dma_map *param);
> 
> s/int groupid/struct sPAPRVFIOData */ in map/unmap
> 
>> +};
>> +
>> +void spapr_register_vfio_container(struct sPAPRVFIOData *data);
>> +
>> +#endif
>> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
>> index 88c8f2c..75743e7 100644
>> --- a/hw/spapr_pci.c
>> +++ b/hw/spapr_pci.c
>> @@ -22,6 +22,9 @@
>>   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>>   * THE SOFTWARE.
>>   */
>> +#include <sys/types.h>
>> +#include <dirent.h>
>> +
>>  #include "hw.h"
>>  #include "pci.h"
>>  #include "msi.h"
>> @@ -32,7 +35,6 @@
>>  #include "exec-memory.h"
>>  #include <libfdt.h>
>>  #include "trace.h"
>> -
>>  #include "hw/pci_internals.h"
>>  
>>  /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
>> @@ -440,6 +442,12 @@ static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
>>                   level);
>>  }
>>  
>> +static int pci_spapr_get_irq(void *opaque, int irq_num)
>> +{
>> +    sPAPRPHBState *phb = opaque;
>> +    return phb->lsi_table[irq_num].dt_irq;
>> +}
>> +
>>  static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
>>                                unsigned size)
>>  {
>> @@ -515,6 +523,93 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
>>      return phb->dma;
>>  }
>>  
>> +void spapr_register_vfio_container(struct sPAPRVFIOData *data)
>> +{
>> +    sPAPRPHBState *phb;
>> +
>> +    QLIST_FOREACH(phb, &spapr->phbs, list) {
>> +        if (phb->iommugroupid == data->groupid) {
>> +            phb->vfio_data = *data;
>> +            phb->dma_window_start = phb->vfio_data.info.dma32_window_start;
>> +            phb->dma_window_size = phb->vfio_data.info.dma32_window_size;
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>> +static int spapr_pci_scan_vfio(sPAPRPHBState *phb)
>> +{
>> +    char iommupath[256];
>> +    DIR *dirp;
>> +    struct dirent *entry;
>> +
>> +    if (!phb->scan) {
>> +        trace_spapr_pci("Autoscan disabled for ", phb->dtbusname);
>> +        return 0;
>> +    }
>> +
>> +    snprintf(iommupath, sizeof(iommupath),
>> +             "/sys/kernel/iommu_groups/%d/devices/", phb->iommugroupid);
>> +    dirp = opendir(iommupath);
>> +
>> +    while ((entry = readdir(dirp)) != NULL) {
>> +        char *tmp;
>> +        FILE *deviceclassfile;
>> +        unsigned deviceclass = 0, domainid, busid, devid, fnid;
>> +        char addr[32];
>> +        DeviceState *dev;
>> +
>> +        if (sscanf(entry->d_name, "%X:%X:%X.%x",
>> +                   &domainid, &busid, &devid, &fnid) != 4) {
>> +            continue;
>> +        }
>> +
>> +        tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name);
>> +        trace_spapr_pci("Reading device class from ", tmp);
>> +
>> +        deviceclassfile = fopen(tmp, "r");
>> +        if (deviceclassfile) {
>> +            fscanf(deviceclassfile, "%x", &deviceclass);
>> +            fclose(deviceclassfile);
>> +        }
>> +        g_free(tmp);
>> +
>> +        if (!deviceclass) {
>> +            continue;
>> +        }
>> +        if ((phb->scan < 2) &&
>> +            ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8))) {
>> +            /* Skip _any_ bridge */
>> +            continue;
>> +        }
>> +        if ((deviceclass == 0xc0310) || (deviceclass == 0xc0320)) {
>> +            /* Tweak USB */
>> +            phb->force_addr = 1;
>> +            phb->enable_multifunction = 1;
>> +        }
>> +
>> +        trace_spapr_pci("Creating devicei from ", entry->d_name);
>> +
>> +        dev = qdev_create(&phb->host_state.bus->qbus, "vfio-pci");
>> +        if (!dev) {
>> +            fprintf(stderr, "failed to create vfio-pci\n");
>> +            continue;
>> +        }
>> +        qdev_prop_parse(dev, "host", entry->d_name);
>> +        if (phb->force_addr) {
>> +            snprintf(addr, sizeof(addr), "%x.%x", devid, fnid);
>> +            qdev_prop_parse(dev, "addr", addr);
>> +        }
>> +        if (phb->enable_multifunction) {
>> +            qdev_prop_set_bit(dev, "multifunction", 1);
>> +        }
>> +        qdev_init_nofail(dev);
>> +    }
>> +    closedir(dirp);
>> +
>> +    return 0;
>> +}
>> +
>>  static int spapr_phb_init(SysBusDevice *s)
>>  {
>>      sPAPRPHBState *phb = DO_UPCAST(sPAPRPHBState, host_state.busdev, s);
>> @@ -567,15 +662,13 @@ static int spapr_phb_init(SysBusDevice *s)
>>  
>>      bus = pci_register_bus(&phb->host_state.busdev.qdev,
>>                             phb->busname ? phb->busname : phb->dtbusname,
>> -                           pci_spapr_set_irq, NULL, pci_spapr_map_irq, phb,
>> +                           pci_spapr_set_irq, pci_spapr_get_irq,
>> +                           pci_spapr_map_irq, phb,
>>                             &phb->memspace, &phb->iospace,
>>                             PCI_DEVFN(0, 0), PCI_NUM_PINS);
>>      phb->host_state.bus = bus;
>>  
>>      phb->dma_liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
>> -    phb->dma_window_start = 0;
>> -    phb->dma_window_size = 0x40000000;
>> -    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn, phb->dma_window_size);
>>      pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
>>  
>>      QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
>> @@ -588,6 +681,19 @@ static int spapr_phb_init(SysBusDevice *s)
>>          }
>>      }
>>  
>> +    if (phb->iommugroupid >= 0) {
>> +        if (spapr_pci_scan_vfio(phb) < 0) {
>> +            return -1;
>> +        }
>> +        spapr_vfio_init_dma(phb->iommugroupid, phb->dma_liobn, &phb->vfio_data);
>> +        return 0;
>> +    }
>> +
>> +    phb->dma_window_start = 0;
>> +    phb->dma_window_size = 0x40000000;
>> +    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn,
>> +                                         phb->dma_window_size);
>> +
>>      return 0;
>>  }
>>  
>> @@ -599,6 +705,10 @@ static Property spapr_phb_properties[] = {
>>      DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
>>      DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
>>      DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
>> +    DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
>> +    DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1),
>> +    DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),
>> +    DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>  
>> @@ -729,6 +839,10 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>      _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
>>                       sizeof(interrupt_map)));
>>  
>> +    if (!phb->dma_window_size) {
>> +        fprintf(stderr, "Unexpected error: DMA window is zero, exiting\n");
>> +        exit(1);
>> +    }
>>      spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
>>                   phb->dma_liobn, phb->dma_window_start,
>>                   phb->dma_window_size);
>> diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
>> index 145071c..ed17053 100644
>> --- a/hw/spapr_pci.h
>> +++ b/hw/spapr_pci.h
>> @@ -26,6 +26,7 @@
>>  #include "hw/pci.h"
>>  #include "hw/pci_host.h"
>>  #include "hw/xics.h"
>> +#include "hw/spapr_iommu_vfio.h"
>>  
>>  #define SPAPR_MSIX_MAX_DEVS 32
>>  
>> @@ -57,6 +58,11 @@ typedef struct sPAPRPHBState {
>>          int nvec;
>>      } msi_table[SPAPR_MSIX_MAX_DEVS];
>>  
>> +    struct sPAPRVFIOData vfio_data;
>> +    int32_t iommugroupid;
>> +    uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */
>> +    uint8_t enable_multifunction, force_addr;
>> +
>>      QLIST_ENTRY(sPAPRPHBState) list;
>>  } sPAPRPHBState;
>>  
>> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
>> index 0ad4761..0023d41 100644
>> --- a/hw/vfio_pci.c
>> +++ b/hw/vfio_pci.c
>> @@ -43,6 +43,7 @@
>>  #include "range.h"
>>  #include "vfio_pci.h"
>>  #include "linux-vfio.h"
>> +#include "hw/spapr_iommu_vfio.h"
>>  
>>  //#define DEBUG_VFIO
>>  #ifdef DEBUG_VFIO
>> @@ -55,6 +56,8 @@
>>  
>>  #define MSIX_CAP_LENGTH 12
>>  
>> +static VFIOGroup *vfio_get_group(int groupid);
>> +
> 
> Unnecessary, because...
> 
>>  static QLIST_HEAD(, VFIOContainer)
>>      container_list = QLIST_HEAD_INITIALIZER(container_list);
>>  
>> @@ -1088,6 +1091,31 @@ static void vfio_listener_release(VFIOContainer *container)
>>  }
>>  
>>  /*
>> + * sPAPR TCE DMA interface
>> + */
>> +static int spapr_tce_map(int groupid, struct tce_iommu_dma_map *param)
>> +{
>> +    VFIOGroup *group = vfio_get_group(groupid);
>> +
>> +    if (!group || !group->container) {
>> +        return -1;
>> +    }
> 
> This should be:
> 
> static int spapr_tce_map(sPAPRVFIOData *spapr_data, struct tce_iommu_dma_map *param)
> {
>     VFIOContainer *container;
> 
>     container = container_of(spapr_data, VFIOContainer, iommu_data.spapr);
> 
>     return ioctl(container->fd, ....
> }
> 
> 
>> +
>> +    return ioctl(group->container->fd, SPAPR_TCE_IOMMU_MAP_DMA, param);
>> +}
>> +
>> +static int spapr_tce_unmap(int groupid, struct tce_iommu_dma_map *param)
>> +{
>> +    VFIOGroup *group = vfio_get_group(groupid);
>> +
>> +    if (!group || !group->container) {
>> +        return -1;
>> +    }
>> +
>> +    return ioctl(group->container->fd, SPAPR_TCE_IOMMU_UNMAP_DMA, param);
>> +}
>> +
>> +/*
>>   * Interrupt setup
>>   */
>>  static void vfio_disable_interrupts(VFIODevice *vdev)
>> @@ -1590,6 +1618,42 @@ static int vfio_connect_container(VFIOGroup *group)
>>          memory_listener_register(&container->iommu_data.listener,
>>                                   get_system_memory());
>>  
>> +    } else if (ioctl(fd, VFIO_CHECK_EXTENSION, SPAPR_TCE_IOMMU)) {
>> +        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
>> +        if (ret) {
>> +            error_report("vfio: failed to set group container: %s\n",
>> +                         strerror(errno));
>> +            g_free(container);
>> +            close(fd);
>> +            return -1;
>> +        }
>> +
>> +        ret = ioctl(fd, VFIO_SET_IOMMU, SPAPR_TCE_IOMMU);
>> +        if (ret) {
>> +            error_report("vfio: failed to set iommu for container: %s\n",
>> +                         strerror(errno));
>> +            g_free(container);
>> +            close(fd);
>> +            return -1;
>> +        }
>> +
>> +        container->iommu_data.spapr.info.argsz =
>> +                sizeof(container->iommu_data.spapr.info);
>> +        ret = ioctl(fd, SPAPR_TCE_IOMMU_GET_INFO,
>> +                    &container->iommu_data.spapr.info);
>> +        if (ret) {
>> +            error_report("vfio: failed to get iommu info for container: %s\n",
>> +                         strerror(errno));
>> +            g_free(container);
>> +            close(fd);
>> +            return -1;
>> +        }
>> +
>> +        container->iommu_data.spapr.map = spapr_tce_map;
>> +        container->iommu_data.spapr.unmap = spapr_tce_unmap;
>> +        container->iommu_data.spapr.groupid = group->groupid;
> 
> This at least deserves a comment because x86 doesn't have a 1:1 mapping
> of container to groupid.


Give me a good example and add a comment about why you need MemoryListener as powerpc does not need the whole RAM to be mapped as a DMA Window :)
Honestly, I do not know what to write there what would make sense, explain at least to a powerpc-familiar person what is going on and not to be a 100 lines essay. I will put some though.


> I really think it might make more sense to pass a PCIBus here instead of a groupid.

Or a PCIDevice if I understand SRIOV right. Many choices. Let's stay with groupid for now.


> Whatever you choose, this
> shouldn't be part of sPAPRVFIOData, it should be another parameter to
> spapr_register_vfio_container.


Done. I will resend the patch. Thanks for your comments.



>> +        spapr_register_vfio_container(&container->iommu_data.spapr);
>> +
>>      } else {
>>          error_report("vfio: No available IOMMU models\n");
>>          g_free(container);
>> diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
>> index 00bb3dd..ddb5332 100644
>> --- a/hw/vfio_pci.h
>> +++ b/hw/vfio_pci.h
>> @@ -6,6 +6,7 @@
>>  #include "pci.h"
>>  #include "ioapic.h"
>>  #include "event_notifier.h"
>> +#include "hw/spapr_iommu_vfio.h"
>>  
>>  typedef struct VFIOPCIHostDevice {
>>      uint16_t seg;
>> @@ -59,6 +60,7 @@ typedef struct VFIOContainer {
>>      struct {
>>          union {
>>              MemoryListener listener;
>> +            struct sPAPRVFIOData spapr;
>>          };
>>          void (*release)(struct VFIOContainer *);
>>      } iommu_data;
>> diff --git a/trace-events b/trace-events
>> index e548f86..9100591 100644
>> --- a/trace-events
>> +++ b/trace-events
>> @@ -848,6 +848,7 @@ qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride,
>>  qxl_render_update_area_done(void *cookie) "%p"
>>  
>>  # hw/spapr_pci.c
>> +spapr_pci(const char *msg1, const char *msg2) "%s%s"
>>  spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)"
>>  spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64
>>  spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u"
> 
> 
>

Patch

diff --git a/hw/linux-vfio.h b/hw/linux-vfio.h
index 300d49b..27a0501 100644
--- a/hw/linux-vfio.h
+++ b/hw/linux-vfio.h
@@ -442,4 +442,30 @@  struct vfio_iommu_type1_dma_unmap {
 
 #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
 
+/*
+ * Interface to SPAPR TCE (POWERPC Book3S)
+ */
+#define SPAPR_TCE_IOMMU         2
+
+struct tce_iommu_info {
+    __u32 argsz;
+    __u32 flags;
+    __u32 dma32_window_start;
+    __u32 dma32_window_size;
+    __u64 dma64_window_start;
+    __u64 dma64_window_size;
+};
+
+#define SPAPR_TCE_IOMMU_GET_INFO        _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+struct tce_iommu_dma_map {
+    __u32 argsz;
+    __u32 flags;
+    __u64 va;
+    __u64 dmaaddr;
+};
+
+#define SPAPR_TCE_IOMMU_MAP_DMA         _IO(VFIO_TYPE, VFIO_BASE + 13)
+#define SPAPR_TCE_IOMMU_UNMAP_DMA       _IO(VFIO_TYPE, VFIO_BASE + 14)
+
 #endif /* VFIO_H */
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index f573a95..c46a049 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -25,4 +25,7 @@  obj-$(CONFIG_FDT) += ../device_tree.o
 # Xilinx PPC peripherals
 obj-y += xilinx_ethlite.o
 
+# VFIO PCI device assignment
+obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
+
 obj-y := $(addprefix ../,$(obj-y))
diff --git a/hw/spapr.h b/hw/spapr.h
index b37f337..0c15c88 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -340,4 +340,8 @@  int spapr_dma_dt(void *fdt, int node_off, const char *propname,
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
                       DMAContext *dma);
 
+struct sPAPRVFIOData;
+void spapr_vfio_init_dma(int group_id, uint32_t liobn,
+                         struct sPAPRVFIOData *data);
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 50c288d..0a82842 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -23,6 +23,8 @@ 
 #include "dma.h"
 
 #include "hw/spapr.h"
+#include "hw/spapr_iommu_vfio.h"
+#include "hw/vfio_pci.h"
 
 #include <libfdt.h>
 
@@ -183,6 +185,60 @@  static int put_tce_emu(target_ulong liobn, target_ulong ioba, target_ulong tce)
     return 0;
 }
 
+typedef struct sPAPRVFIOTable {
+    struct sPAPRVFIOData *data;
+    uint32_t liobn;
+    QLIST_ENTRY(sPAPRVFIOTable) list;
+} sPAPRVFIOTable;
+
+QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
+
+void spapr_vfio_init_dma(int group_id, uint32_t liobn,
+                         struct sPAPRVFIOData *data)
+{
+    sPAPRVFIOTable *t;
+
+    t = g_malloc0(sizeof(*t));
+    t->data = data;
+    t->liobn = liobn;
+
+    QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
+}
+
+static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
+{
+    sPAPRVFIOTable *t;
+    struct tce_iommu_dma_map map = {
+        .argsz = sizeof(map),
+        .va = 0,
+        .dmaaddr = ioba,
+    };
+
+    QLIST_FOREACH(t, &vfio_tce_tables, list) {
+        if (t->liobn != liobn) {
+            continue;
+        }
+        if (!t->data) {
+            return H_NO_MEM;
+        }
+        if (tce) {
+            map.va = (uintptr_t)qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK);
+
+            if (t->data->map(t->data->groupid, &map)) {
+                perror("TCE_MAP_DMA");
+                return H_PARAMETER;
+            }
+        } else {
+            if (t->data->unmap(t->data->groupid, &map)) {
+                perror("TCE_UNMAP_DMA");
+                return H_PARAMETER;
+            }
+        }
+        return H_SUCCESS;
+    }
+    return H_CONTINUE; /* positive non-zero value */
+}
+
 static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
                               target_ulong opcode, target_ulong *args)
 {
@@ -200,7 +256,11 @@  static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
     ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
 
     ret = put_tce_emu(liobn, ioba, tce);
-    if (0 >= ret) {
+    if (ret <= 0) {
+        return ret ? H_PARAMETER : H_SUCCESS;
+    }
+    ret = put_tce_vfio(liobn, ioba, tce);
+    if (ret <= 0) {
         return ret ? H_PARAMETER : H_SUCCESS;
     }
 #ifdef DEBUG_TCE
diff --git a/hw/spapr_iommu_vfio.h b/hw/spapr_iommu_vfio.h
new file mode 100644
index 0000000..b3d6115
--- /dev/null
+++ b/hw/spapr_iommu_vfio.h
@@ -0,0 +1,34 @@ 
+/*
+ * Definitions for VFIO IOMMU implementation for SPAPR TCE.
+ *
+ * Copyright (c) 2012 Alexey Kardashevskiy <aik@olabs.ru>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if !defined(__HW_SPAPR_IOMMU_VFIO_H__)
+#define __HW_SPAPR_IOMMU_VFIO_H__
+
+#include "hw/linux-vfio.h"
+
+struct sPAPRVFIOData {
+    int groupid;
+    struct tce_iommu_info info;
+    int (*map)(int groupid, struct tce_iommu_dma_map *param);
+    int (*unmap)(int groupid, struct tce_iommu_dma_map *param);
+};
+
+void spapr_register_vfio_container(struct sPAPRVFIOData *data);
+
+#endif
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 88c8f2c..75743e7 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -22,6 +22,9 @@ 
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+#include <sys/types.h>
+#include <dirent.h>
+
 #include "hw.h"
 #include "pci.h"
 #include "msi.h"
@@ -32,7 +35,6 @@ 
 #include "exec-memory.h"
 #include <libfdt.h>
 #include "trace.h"
-
 #include "hw/pci_internals.h"
 
 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
@@ -440,6 +442,12 @@  static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
                  level);
 }
 
+static int pci_spapr_get_irq(void *opaque, int irq_num)
+{
+    sPAPRPHBState *phb = opaque;
+    return phb->lsi_table[irq_num].dt_irq;
+}
+
 static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
                               unsigned size)
 {
@@ -515,6 +523,93 @@  static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
     return phb->dma;
 }
 
+void spapr_register_vfio_container(struct sPAPRVFIOData *data)
+{
+    sPAPRPHBState *phb;
+
+    QLIST_FOREACH(phb, &spapr->phbs, list) {
+        if (phb->iommugroupid == data->groupid) {
+            phb->vfio_data = *data;
+            phb->dma_window_start = phb->vfio_data.info.dma32_window_start;
+            phb->dma_window_size = phb->vfio_data.info.dma32_window_size;
+            return;
+        }
+    }
+}
+
+static int spapr_pci_scan_vfio(sPAPRPHBState *phb)
+{
+    char iommupath[256];
+    DIR *dirp;
+    struct dirent *entry;
+
+    if (!phb->scan) {
+        trace_spapr_pci("Autoscan disabled for ", phb->dtbusname);
+        return 0;
+    }
+
+    snprintf(iommupath, sizeof(iommupath),
+             "/sys/kernel/iommu_groups/%d/devices/", phb->iommugroupid);
+    dirp = opendir(iommupath);
+
+    while ((entry = readdir(dirp)) != NULL) {
+        char *tmp;
+        FILE *deviceclassfile;
+        unsigned deviceclass = 0, domainid, busid, devid, fnid;
+        char addr[32];
+        DeviceState *dev;
+
+        if (sscanf(entry->d_name, "%X:%X:%X.%x",
+                   &domainid, &busid, &devid, &fnid) != 4) {
+            continue;
+        }
+
+        tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name);
+        trace_spapr_pci("Reading device class from ", tmp);
+
+        deviceclassfile = fopen(tmp, "r");
+        if (deviceclassfile) {
+            fscanf(deviceclassfile, "%x", &deviceclass);
+            fclose(deviceclassfile);
+        }
+        g_free(tmp);
+
+        if (!deviceclass) {
+            continue;
+        }
+        if ((phb->scan < 2) &&
+            ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8))) {
+            /* Skip _any_ bridge */
+            continue;
+        }
+        if ((deviceclass == 0xc0310) || (deviceclass == 0xc0320)) {
+            /* Tweak USB */
+            phb->force_addr = 1;
+            phb->enable_multifunction = 1;
+        }
+
+        trace_spapr_pci("Creating devicei from ", entry->d_name);
+
+        dev = qdev_create(&phb->host_state.bus->qbus, "vfio-pci");
+        if (!dev) {
+            fprintf(stderr, "failed to create vfio-pci\n");
+            continue;
+        }
+        qdev_prop_parse(dev, "host", entry->d_name);
+        if (phb->force_addr) {
+            snprintf(addr, sizeof(addr), "%x.%x", devid, fnid);
+            qdev_prop_parse(dev, "addr", addr);
+        }
+        if (phb->enable_multifunction) {
+            qdev_prop_set_bit(dev, "multifunction", 1);
+        }
+        qdev_init_nofail(dev);
+    }
+    closedir(dirp);
+
+    return 0;
+}
+
 static int spapr_phb_init(SysBusDevice *s)
 {
     sPAPRPHBState *phb = DO_UPCAST(sPAPRPHBState, host_state.busdev, s);
@@ -567,15 +662,13 @@  static int spapr_phb_init(SysBusDevice *s)
 
     bus = pci_register_bus(&phb->host_state.busdev.qdev,
                            phb->busname ? phb->busname : phb->dtbusname,
-                           pci_spapr_set_irq, NULL, pci_spapr_map_irq, phb,
+                           pci_spapr_set_irq, pci_spapr_get_irq,
+                           pci_spapr_map_irq, phb,
                            &phb->memspace, &phb->iospace,
                            PCI_DEVFN(0, 0), PCI_NUM_PINS);
     phb->host_state.bus = bus;
 
     phb->dma_liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
-    phb->dma_window_start = 0;
-    phb->dma_window_size = 0x40000000;
-    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn, phb->dma_window_size);
     pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
 
     QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
@@ -588,6 +681,19 @@  static int spapr_phb_init(SysBusDevice *s)
         }
     }
 
+    if (phb->iommugroupid >= 0) {
+        if (spapr_pci_scan_vfio(phb) < 0) {
+            return -1;
+        }
+        spapr_vfio_init_dma(phb->iommugroupid, phb->dma_liobn, &phb->vfio_data);
+        return 0;
+    }
+
+    phb->dma_window_start = 0;
+    phb->dma_window_size = 0x40000000;
+    phb->dma = spapr_tce_new_dma_context(phb->dma_liobn,
+                                         phb->dma_window_size);
+
     return 0;
 }
 
@@ -599,6 +705,10 @@  static Property spapr_phb_properties[] = {
     DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, 0),
     DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, 0x10000),
     DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, 0),
+    DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
+    DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1),
+    DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),
+    DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -729,6 +839,10 @@  int spapr_populate_pci_dt(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
                      sizeof(interrupt_map)));
 
+    if (!phb->dma_window_size) {
+        fprintf(stderr, "Unexpected error: DMA window is zero, exiting\n");
+        exit(1);
+    }
     spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
                  phb->dma_liobn, phb->dma_window_start,
                  phb->dma_window_size);
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index 145071c..ed17053 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -26,6 +26,7 @@ 
 #include "hw/pci.h"
 #include "hw/pci_host.h"
 #include "hw/xics.h"
+#include "hw/spapr_iommu_vfio.h"
 
 #define SPAPR_MSIX_MAX_DEVS 32
 
@@ -57,6 +58,11 @@  typedef struct sPAPRPHBState {
         int nvec;
     } msi_table[SPAPR_MSIX_MAX_DEVS];
 
+    struct sPAPRVFIOData vfio_data;
+    int32_t iommugroupid;
+    uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */
+    uint8_t enable_multifunction, force_addr;
+
     QLIST_ENTRY(sPAPRPHBState) list;
 } sPAPRPHBState;
 
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 0ad4761..0023d41 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -43,6 +43,7 @@ 
 #include "range.h"
 #include "vfio_pci.h"
 #include "linux-vfio.h"
+#include "hw/spapr_iommu_vfio.h"
 
 //#define DEBUG_VFIO
 #ifdef DEBUG_VFIO
@@ -55,6 +56,8 @@ 
 
 #define MSIX_CAP_LENGTH 12
 
+static VFIOGroup *vfio_get_group(int groupid);
+
 static QLIST_HEAD(, VFIOContainer)
     container_list = QLIST_HEAD_INITIALIZER(container_list);
 
@@ -1088,6 +1091,31 @@  static void vfio_listener_release(VFIOContainer *container)
 }
 
 /*
+ * sPAPR TCE DMA interface
+ */
+static int spapr_tce_map(int groupid, struct tce_iommu_dma_map *param)
+{
+    VFIOGroup *group = vfio_get_group(groupid);
+
+    if (!group || !group->container) {
+        return -1;
+    }
+
+    return ioctl(group->container->fd, SPAPR_TCE_IOMMU_MAP_DMA, param);
+}
+
+static int spapr_tce_unmap(int groupid, struct tce_iommu_dma_map *param)
+{
+    VFIOGroup *group = vfio_get_group(groupid);
+
+    if (!group || !group->container) {
+        return -1;
+    }
+
+    return ioctl(group->container->fd, SPAPR_TCE_IOMMU_UNMAP_DMA, param);
+}
+
+/*
  * Interrupt setup
  */
 static void vfio_disable_interrupts(VFIODevice *vdev)
@@ -1590,6 +1618,42 @@  static int vfio_connect_container(VFIOGroup *group)
         memory_listener_register(&container->iommu_data.listener,
                                  get_system_memory());
 
+    } else if (ioctl(fd, VFIO_CHECK_EXTENSION, SPAPR_TCE_IOMMU)) {
+        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+        if (ret) {
+            error_report("vfio: failed to set group container: %s\n",
+                         strerror(errno));
+            g_free(container);
+            close(fd);
+            return -1;
+        }
+
+        ret = ioctl(fd, VFIO_SET_IOMMU, SPAPR_TCE_IOMMU);
+        if (ret) {
+            error_report("vfio: failed to set iommu for container: %s\n",
+                         strerror(errno));
+            g_free(container);
+            close(fd);
+            return -1;
+        }
+
+        container->iommu_data.spapr.info.argsz =
+                sizeof(container->iommu_data.spapr.info);
+        ret = ioctl(fd, SPAPR_TCE_IOMMU_GET_INFO,
+                    &container->iommu_data.spapr.info);
+        if (ret) {
+            error_report("vfio: failed to get iommu info for container: %s\n",
+                         strerror(errno));
+            g_free(container);
+            close(fd);
+            return -1;
+        }
+
+        container->iommu_data.spapr.map = spapr_tce_map;
+        container->iommu_data.spapr.unmap = spapr_tce_unmap;
+        container->iommu_data.spapr.groupid = group->groupid;
+        spapr_register_vfio_container(&container->iommu_data.spapr);
+
     } else {
         error_report("vfio: No available IOMMU models\n");
         g_free(container);
diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
index 00bb3dd..ddb5332 100644
--- a/hw/vfio_pci.h
+++ b/hw/vfio_pci.h
@@ -6,6 +6,7 @@ 
 #include "pci.h"
 #include "ioapic.h"
 #include "event_notifier.h"
+#include "hw/spapr_iommu_vfio.h"
 
 typedef struct VFIOPCIHostDevice {
     uint16_t seg;
@@ -59,6 +60,7 @@  typedef struct VFIOContainer {
     struct {
         union {
             MemoryListener listener;
+            struct sPAPRVFIOData spapr;
         };
         void (*release)(struct VFIOContainer *);
     } iommu_data;
diff --git a/trace-events b/trace-events
index e548f86..9100591 100644
--- a/trace-events
+++ b/trace-events
@@ -848,6 +848,7 @@  qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride,
 qxl_render_update_area_done(void *cookie) "%p"
 
 # hw/spapr_pci.c
+spapr_pci(const char *msg1, const char *msg2) "%s%s"
 spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)"
 spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64
 spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u"