diff mbox

[v2,1/1] KVM s390 pci infrastructure modelling

Message ID 1435569893-27996-2-git-send-email-lihbbj@linux.vnet.ibm.com
State New
Headers show

Commit Message

Hong Bo Li June 29, 2015, 9:24 a.m. UTC
This patch introduce a new facility(and bus)
to hold devices representing information actually
provided by s390 firmware and I/O configuration.
usage example:
-device s390-pcihost
-device vfio-pci,host=0000:00:00.0,id=vpci1
-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1

The first line will create a s390 pci host bridge
and init the root bus. The second line will create
a standard vfio pci device, and attach it to the
root bus. These are similiar to the standard process
to define a pci device on other platform.

The third line will create a s390 pci device to
store s390 specific information, and references
the corresponding vfio pci device via device id.
We create a s390 pci facility bus to hold all the
zpci devices.

Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
---
 hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
 hw/s390x/s390-pci-bus.h    |  48 ++++++-
 hw/s390x/s390-pci-inst.c   |   4 +-
 hw/s390x/s390-virtio-ccw.c |   5 +-
 4 files changed, 283 insertions(+), 88 deletions(-)

Comments

Michael S. Tsirkin June 29, 2015, 10:01 a.m. UTC | #1
On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> This patch introduce a new facility(and bus)
> to hold devices representing information actually
> provided by s390 firmware and I/O configuration.
> usage example:
> -device s390-pcihost
> -device vfio-pci,host=0000:00:00.0,id=vpci1
> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> 
> The first line will create a s390 pci host bridge
> and init the root bus. The second line will create
> a standard vfio pci device, and attach it to the
> root bus. These are similiar to the standard process
> to define a pci device on other platform.
> 
> The third line will create a s390 pci device to
> store s390 specific information, and references
> the corresponding vfio pci device via device id.
> We create a s390 pci facility bus to hold all the
> zpci devices.
> 
> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>

It's mostly up to s390 maintainers, but I'd like to note
one thing below

> ---
>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
>  hw/s390x/s390-pci-inst.c   |   4 +-
>  hw/s390x/s390-virtio-ccw.c |   5 +-
>  4 files changed, 283 insertions(+), 88 deletions(-)
> 
> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> index 560b66a..d5e7b2e 100644
> --- a/hw/s390x/s390-pci-bus.c
> +++ b/hw/s390x/s390-pci-bus.c
> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>      PciCcdfErr *eccdf;
>      int rc = 1;
>      SeiContainer *sei_cont;
> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> +    S390PCIFacility *s = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>  
>      if (!s) {
>          return rc;
> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>  
>  int chsc_sei_nt2_have_event(void)
>  {
> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> +    S390PCIFacility *s = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>  
>      if (!s) {
>          return 0;
> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>      return !QTAILQ_EMPTY(&s->pending_sei);
>  }
>  
> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
> +{
> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> +}
> +
> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
> +{
> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> +    if (zpci->is_unplugged)
> +        object_unparent(OBJECT(zpci));
> +}
> +
>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>  {
>      S390PCIBusDevice *pbdev;
> -    int i;
> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> +    BusChild *kid;
> +    S390PCIFacility *s = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>  
>      if (!s) {
>          return NULL;
>      }
>  
> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
> -        pbdev = &s->pbdev[i];
> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> +        pbdev = (S390PCIBusDevice *)kid->child;
> +        if (pbdev->fid == fid) {
>              return pbdev;
>          }
>      }
> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>      return;
>  }
>  
> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> -{
> -    return PCI_SLOT(pdev->devfn);
> -}
> -
> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> -{
> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> -}
> -
>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>  {
>      S390PCIBusDevice *pbdev;
> -    int i;
> -    int j = 0;
> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> +    BusChild *kid;
> +    int i = 0;
> +    S390PCIFacility *s = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>  
>      if (!s) {
>          return NULL;
>      }
>  
> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
> -        pbdev = &s->pbdev[i];
> -
> -        if (pbdev->fh == 0) {
> -            continue;
> -        }
> -
> -        if (j == idx) {
> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> +        pbdev = (S390PCIBusDevice *)kid->child;
> +        if (i == idx) {
>              return pbdev;
>          }
> -        j++;
> +        i++;
>      }
>  
>      return NULL;

This relies on the order of children on the qbus, that's wrong I think.
Generally I'm not sure why do you convert all slot lookups to child
lookups: more code to achieve the same effect?

> @@ -167,16 +164,16 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>  S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
>  {
>      S390PCIBusDevice *pbdev;
> -    int i;
> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> +    BusChild *kid;
> +    S390PCIFacility *s = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>  
>      if (!s || !fh) {
>          return NULL;
>      }
>  
> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
> -        pbdev = &s->pbdev[i];
> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> +        pbdev = (S390PCIBusDevice *)kid->child;
>          if (pbdev->fh == fh) {
>              return pbdev;
>          }
> @@ -185,12 +182,33 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
>      return NULL;
>  }
>  
> +static S390PCIBusDevice *s390_pci_find_dev_by_pdev(PCIDevice *pdev)
> +{
> +    S390PCIBusDevice *pbdev;
> +    BusChild *kid;
> +    S390PCIFacility *s = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> +
> +    if (!s || !pdev) {
> +        return NULL;
> +    }
> +
> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> +        pbdev = (S390PCIBusDevice *)kid->child;
> +        if (pbdev->pdev == pdev) {
> +            return pbdev;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
>  static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
>                                      uint32_t fid, uint64_t faddr, uint32_t e)
>  {
>      SeiContainer *sei_cont;
> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> +    S390PCIFacility *s = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>  
>      if (!s) {
>          return;
> @@ -308,7 +326,10 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>  {
>      uint64_t pte;
>      uint32_t flags;
> -    S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr);
> +    S390PCIDeviceConn *conn = container_of(iommu, S390PCIDeviceConn,
> +                                           iommu_mr);
> +    S390PCIBusDevice *pbdev = conn->zpci;
> +
>      S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)
>                                             ->qbus.parent);
>      IOMMUTLBEntry ret = {
> @@ -319,8 +340,14 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>          .perm = IOMMU_NONE,
>      };
>  
> +    if (!pbdev) {
> +        return ret;
> +    }
> +
>      DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
>  
> +    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent);
> +
>      /* s390 does not have an APIC mapped to main storage so we use
>       * a separate AddressSpace only for msix notifications
>       */
> @@ -382,7 +409,7 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>  {
>      S390pciState *s = opaque;
>  
> -    return &s->pbdev[PCI_SLOT(devfn)].as;
> +    return &s->conn[PCI_SLOT(devfn)].iommu_as;
>  }
>  
>  static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
> @@ -455,9 +482,10 @@ static void s390_pcihost_init_as(S390pciState *s)
>      int i;
>  
>      for (i = 0; i < PCI_SLOT_MAX; i++) {
> -        memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s),
> +        memory_region_init_iommu(&s->conn[i].iommu_mr, OBJECT(s),
>                                   &s390_iommu_ops, "iommu-s390", UINT64_MAX);
> -        address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci");
> +        address_space_init(&s->conn[i].iommu_as, &s->conn[i].iommu_mr,
> +                           "iommu-pci");
>      }
>  
>      memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
> @@ -484,7 +512,7 @@ static int s390_pcihost_init(SysBusDevice *dev)
>      bus = BUS(b);
>      qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
>      phb->bus = b;
> -    QTAILQ_INIT(&s->pending_sei);
> +
>      return 0;
>  }
>  
> @@ -519,26 +547,6 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
>  static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
>                                    DeviceState *dev, Error **errp)
>  {
> -    PCIDevice *pci_dev = PCI_DEVICE(dev);
> -    S390PCIBusDevice *pbdev;
> -    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
> -                                           ->qbus.parent);
> -
> -    pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
> -
> -    pbdev->fid = s390_pci_get_pfid(pci_dev);
> -    pbdev->pdev = pci_dev;
> -    pbdev->configured = true;
> -    pbdev->fh = s390_pci_get_pfh(pci_dev);
> -
> -    s390_pcihost_setup_msix(pbdev);
> -
> -    if (dev->hotplugged) {
> -        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
> -                                     pbdev->fh, pbdev->fid);
> -        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
> -                                     pbdev->fh, pbdev->fid);
> -    }
>      return;
>  }
>  
> @@ -546,31 +554,30 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
>                                      DeviceState *dev, Error **errp)
>  {
>      PCIDevice *pci_dev = PCI_DEVICE(dev);
> -    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
> -                                           ->qbus.parent);
> -    S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
> -
> -    if (pbdev->configured) {
> -        pbdev->configured = false;
> -        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
> -                                     pbdev->fh, pbdev->fid);
> +    S390PCIBusDevice *pbdev;
> +    HotplugHandler *hotplug_ctrl;
> +    S390PCIFacility *f = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> +    S390PCIFacilityClass *k = S390_PCI_FACILITY_GET_CLASS(f);
> +    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_CLASS(k);
> +
> +    /* unplug corresponding zpci device */
> +    pbdev = s390_pci_find_dev_by_pdev(pci_dev);
> +    if (pbdev) {
> +        hotplug_ctrl = pbdev->qdev.parent_bus->hotplug_handler;
> +        if (hdc->unplug_request) {
> +            hdc->unplug_request(hotplug_ctrl, &pbdev->qdev, errp);
> +        }
>      }
>  
> -    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
> -                                 pbdev->fh, pbdev->fid);
> -    pbdev->fh = 0;
> -    pbdev->fid = 0;
> -    pbdev->pdev = NULL;
>      object_unparent(OBJECT(pci_dev));
>  }
>  
>  static void s390_pcihost_class_init(ObjectClass *klass, void *data)
>  {
>      SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
> -    DeviceClass *dc = DEVICE_CLASS(klass);
>      HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
>  
> -    dc->cannot_instantiate_with_device_add_yet = true;
>      k->init = s390_pcihost_init;
>      hc->plug = s390_pcihost_hot_plug;
>      hc->unplug = s390_pcihost_hot_unplug;
> @@ -588,9 +595,156 @@ static const TypeInfo s390_pcihost_info = {
>      }
>  };
>  
> +static void s390_pci_device_hot_plug(HotplugHandler *hotplug_dev,
> +                                     DeviceState *dev, Error **errp)
> +{
> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> +
> +    zpci->configured = true;
> +
> +    if (dev->hotplugged) {
> +        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
> +                                     zpci->fh, zpci->fid);
> +        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
> +                                     zpci->fh, zpci->fid);
> +    }
> +}
> +
> +static void s390_pci_device_hot_unplug_request(HotplugHandler *hotplug_dev,
> +                                       DeviceState *dev, Error **errp)
> +{
> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> +
> +    if (zpci->configured) {
> +        zpci->configured = false;
> +        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
> +                                     zpci->fh, zpci->fid);
> +    }
> +
> +    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
> +                                 zpci->fh, zpci->fid);
> +
> +    zpci->is_unplugged = true;
> +}
> +
> +static const TypeInfo s390_pci_fac_bus_info = {
> +    .name = TYPE_S390_PCI_FAC_BUS,
> +    .parent = TYPE_BUS,
> +    .instance_size = sizeof(S390PCIFacBus),
> +};
> +
> +static int s390_pci_facility_init(S390PCIFacility *f)
> +{
> +    DeviceState *dev = DEVICE(f);
> +
> +    QTAILQ_INIT(&f->pending_sei);
> +    msi_supported = true;
> +    f->fbus = S390_PCI_FAC_BUS(qbus_create(TYPE_S390_PCI_FAC_BUS, dev, NULL));
> +    qbus_set_hotplug_handler(BUS(&f->fbus->qbus), DEVICE(dev), NULL);
> +
> +    return 0;
> +}
> +
> +static void s390_pci_facility_class_init(ObjectClass *klass, void *data)
> +{
> +    S390PCIFacilityClass *k = S390_PCI_FACILITY_CLASS(klass);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(k);
> +
> +    k->init = s390_pci_facility_init;
> +    hc->plug = s390_pci_device_hot_plug;
> +    hc->unplug_request = s390_pci_device_hot_unplug_request;
> +}
> +
> +static const TypeInfo s390_pci_facility_info = {
> +    .name          = TYPE_S390_PCI_FACILITY,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(S390PCIFacility),
> +    .class_init    = s390_pci_facility_class_init,
> +    .class_size    = sizeof(S390PCIFacilityClass),
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_HOTPLUG_HANDLER },
> +        { }
> +    }
> +};
> +
> +static void s390_pci_device_realize(DeviceState *dev, Error **errp)
> +{
> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> +    S390PCIBusDevice *tmp;
> +    S390pciState *s;
> +    BusChild *kid;
> +    PCIDevice *pdev;
> +    int ret;
> +    S390PCIFacility *f = S390_PCI_FACILITY(
> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> +
> +    ret = pci_qdev_find_device(zpci->pci_id, &pdev);
> +    if (ret < 0) {
> +        error_setg(errp, "vfio pci device %s not found", zpci->pci_id);
> +        return;
> +    }
> +
> +    QTAILQ_FOREACH(kid, &f->fbus->qbus.children, sibling) {
> +        tmp = (S390PCIBusDevice *)kid->child;
> +        if (tmp == zpci) {
> +            continue;
> +        }
> +
> +        if (tmp->fid == zpci->fid || tmp->uid == zpci->uid ||
> +            !strcmp(tmp->pci_id, zpci->pci_id)) {
> +            error_setg(errp, "zpci needs unique fid, uid and pci_id");
> +            return;
> +        }
> +    }
> +
> +    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pdev)->qbus.parent);
> +    s->conn[PCI_SLOT(pdev->devfn)].zpci = zpci;
> +
> +    zpci->pdev = pdev;
> +    zpci->fh = zpci->fid | FH_VIRT;
> +    s390_pcihost_setup_msix(zpci);
> +}
> +
> +static void s390_pci_device_unrealize(DeviceState *dev, Error **errp)
> +{
> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> +
> +    zpci->fh = 0;
> +    zpci->fid = 0;
> +    zpci->pdev = NULL;
> +}
> +
> +static Property s390_pci_device_properties[] = {
> +    DEFINE_PROP_UINT32("fid", S390PCIBusDevice, fid, 0),
> +    DEFINE_PROP_UINT32("uid", S390PCIBusDevice, uid, 0),
> +    DEFINE_PROP_STRING("pci_id", S390PCIBusDevice, pci_id),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void s390_pci_device_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    dc->desc = "s390 pci device";
> +    dc->bus_type = TYPE_S390_PCI_FAC_BUS;
> +    dc->realize = s390_pci_device_realize;
> +    dc->unrealize = s390_pci_device_unrealize;
> +    dc->props = s390_pci_device_properties;
> +}
> +
> +static const TypeInfo s390_pci_device_type_info = {
> +    .name = TYPE_S390_PCI_DEVICE,
> +    .parent = TYPE_DEVICE,
> +    .instance_size = sizeof(S390PCIBusDevice),
> +    .class_init = s390_pci_device_class_init,
> +};
> +
>  static void s390_pci_register_types(void)
>  {
>      type_register_static(&s390_pcihost_info);
> +    type_register_static(&s390_pci_facility_info);
> +    type_register_static(&s390_pci_fac_bus_info);
> +    type_register_static(&s390_pci_device_type_info);
>  }
>  
>  type_init(s390_pci_register_types)
> diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
> index 464a92e..5bf3913 100644
> --- a/hw/s390x/s390-pci-bus.h
> +++ b/hw/s390x/s390-pci-bus.h
> @@ -149,6 +149,21 @@ enum ZpciIoatDtype {
>  #define ZPCI_TABLE_VALID_MASK           0x20
>  #define ZPCI_TABLE_PROT_MASK            0x200
>  
> +#define TYPE_S390_PCI_FACILITY "s390-pci-facility"
> +#define TYPE_S390_PCI_FAC_BUS "s390-pci-fac-bus"
> +#define TYPE_S390_PCI_DEVICE "zpci"
> +
> +#define S390_PCI_FACILITY(obj) \
> +    OBJECT_CHECK(S390PCIFacility, (obj), TYPE_S390_PCI_FACILITY)
> +#define S390_PCI_FAC_BUS(obj) \
> +    OBJECT_CHECK(S390PCIFacBus, (obj), TYPE_S390_PCI_FAC_BUS)
> +#define S390_PCI_FACILITY_CLASS(klass) \
> +    OBJECT_CLASS_CHECK(S390PCIFacilityClass, (klass), TYPE_S390_PCI_FACILITY)
> +#define S390_PCI_DEVICE(obj) \
> +    OBJECT_CHECK(S390PCIBusDevice, (obj), TYPE_S390_PCI_DEVICE)
> +#define S390_PCI_FACILITY_GET_CLASS(obj) \
> +    OBJECT_GET_CLASS(S390PCIFacilityClass, (obj), TYPE_S390_PCI_FACILITY)
> +
>  typedef struct SeiContainer {
>      QTAILQ_ENTRY(SeiContainer) link;
>      uint32_t fid;
> @@ -214,12 +229,16 @@ typedef struct S390MsixInfo {
>  } S390MsixInfo;
>  
>  typedef struct S390PCIBusDevice {
> +    DeviceState qdev;
>      PCIDevice *pdev;
>      bool configured;
> +    bool is_unplugged;
>      bool error_state;
>      bool lgstg_blocked;
>      uint32_t fh;
>      uint32_t fid;
> +    uint32_t uid;
> +    char *pci_id;
>      uint64_t g_iota;
>      uint64_t pba;
>      uint64_t pal;
> @@ -229,21 +248,42 @@ typedef struct S390PCIBusDevice {
>      uint8_t sum;
>      S390MsixInfo msix;
>      AdapterRoutes routes;
> -    AddressSpace as;
> -    MemoryRegion mr;
> +    QLIST_ENTRY(S390PCIDevice) entry;
>  } S390PCIBusDevice;
>  
> +typedef struct S390PCIDeviceConn {
> +    S390PCIBusDevice *zpci;
> +    AddressSpace iommu_as;
> +    MemoryRegion iommu_mr;
> +} S390PCIDeviceConn;
> +
>  typedef struct S390pciState {
>      PCIHostState parent_obj;
> -    S390PCIBusDevice pbdev[PCI_SLOT_MAX];
> +    S390PCIDeviceConn conn[PCI_SLOT_MAX];
>      AddressSpace msix_notify_as;
>      MemoryRegion msix_notify_mr;
> -    QTAILQ_HEAD(, SeiContainer) pending_sei;
>  } S390pciState;
>  
> +typedef struct S390PCIFacBus {
> +    BusState qbus;
> +} S390PCIFacBus;
> +
> +typedef struct S390PCIFacility {
> +    SysBusDevice parent_obj;
> +    S390PCIFacBus *fbus;
> +    QTAILQ_HEAD(, SeiContainer) pending_sei;
> +} S390PCIFacility;
> +
> +typedef struct S390PCIFacilityClass {
> +    DeviceClass parent_class;
> +    int (*init)(S390PCIFacility *f);
> +} S390PCIFacilityClass;
> +
>  int chsc_sei_nt2_get_event(void *res);
>  int chsc_sei_nt2_have_event(void);
>  void s390_pci_sclp_configure(int configure, SCCB *sccb);
> +void s390_pci_device_enable(S390PCIBusDevice *zpci);
> +void s390_pci_device_disable(S390PCIBusDevice *zpci);
>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
>  S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
> index f9151a9..2977e9c 100644
> --- a/hw/s390x/s390-pci-inst.c
> +++ b/hw/s390x/s390-pci-inst.c
> @@ -208,12 +208,12 @@ int clp_service_call(S390CPU *cpu, uint8_t r2)
>  
>          switch (reqsetpci->oc) {
>          case CLP_SET_ENABLE_PCI_FN:
> -            pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET;
> +            s390_pci_device_enable(pbdev);
>              stl_p(&ressetpci->fh, pbdev->fh);
>              stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
>              break;
>          case CLP_SET_DISABLE_PCI_FN:
> -            pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET);
> +            s390_pci_device_disable(pbdev);
>              pbdev->error_state = false;
>              pbdev->lgstg_blocked = false;
>              stl_p(&ressetpci->fh, pbdev->fh);
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index a3b14b5..56940e8 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -125,8 +125,8 @@ static void ccw_init(MachineState *machine)
>                        machine->initrd_filename, "s390-ccw.img", true);
>      s390_flic_init();
>  
> -    dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
> -    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
> +    dev = qdev_create(NULL, TYPE_S390_PCI_FACILITY);
> +    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_FACILITY,
>                                OBJECT(dev), NULL);
>      qdev_init_nofail(dev);
>  
> @@ -173,6 +173,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
>      mc->max_cpus = 255;
>      mc->hot_add_cpu = ccw_hot_add_cpu;
>      mc->is_default = 1;
> +    mc->has_dynamic_sysbus = true;
>      nc->nmi_monitor_handler = s390_nmi;
>  }
>  
> -- 
> 1.9.3
> 
>
Hong Bo Li June 30, 2015, 6:16 a.m. UTC | #2
On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>> This patch introduce a new facility(and bus)
>> to hold devices representing information actually
>> provided by s390 firmware and I/O configuration.
>> usage example:
>> -device s390-pcihost
>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>
>> The first line will create a s390 pci host bridge
>> and init the root bus. The second line will create
>> a standard vfio pci device, and attach it to the
>> root bus. These are similiar to the standard process
>> to define a pci device on other platform.
>>
>> The third line will create a s390 pci device to
>> store s390 specific information, and references
>> the corresponding vfio pci device via device id.
>> We create a s390 pci facility bus to hold all the
>> zpci devices.
>>
>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> It's mostly up to s390 maintainers, but I'd like to note
> one thing below
>
>> ---
>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>
>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>> index 560b66a..d5e7b2e 100644
>> --- a/hw/s390x/s390-pci-bus.c
>> +++ b/hw/s390x/s390-pci-bus.c
>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>       PciCcdfErr *eccdf;
>>       int rc = 1;
>>       SeiContainer *sei_cont;
>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>   
>>       if (!s) {
>>           return rc;
>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>   
>>   int chsc_sei_nt2_have_event(void)
>>   {
>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>   
>>       if (!s) {
>>           return 0;
>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>   }
>>   
>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>> +{
>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>> +}
>> +
>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>> +{
>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>> +    if (zpci->is_unplugged)
>> +        object_unparent(OBJECT(zpci));
>> +}
>> +
>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>   {
>>       S390PCIBusDevice *pbdev;
>> -    int i;
>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>> +    BusChild *kid;
>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>   
>>       if (!s) {
>>           return NULL;
>>       }
>>   
>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>> -        pbdev = &s->pbdev[i];
>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>> +        pbdev = (S390PCIBusDevice *)kid->child;
>> +        if (pbdev->fid == fid) {
>>               return pbdev;
>>           }
>>       }
>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>       return;
>>   }
>>   
>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>> -{
>> -    return PCI_SLOT(pdev->devfn);
>> -}
>> -
>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>> -{
>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>> -}
>> -
>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>   {
>>       S390PCIBusDevice *pbdev;
>> -    int i;
>> -    int j = 0;
>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>> +    BusChild *kid;
>> +    int i = 0;
>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>   
>>       if (!s) {
>>           return NULL;
>>       }
>>   
>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>> -        pbdev = &s->pbdev[i];
>> -
>> -        if (pbdev->fh == 0) {
>> -            continue;
>> -        }
>> -
>> -        if (j == idx) {
>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>> +        pbdev = (S390PCIBusDevice *)kid->child;
>> +        if (i == idx) {
>>               return pbdev;
>>           }
>> -        j++;
>> +        i++;
>>       }
>>   
>>       return NULL;
> This relies on the order of children on the qbus, that's wrong I think.
> Generally I'm not sure why do you convert all slot lookups to child
> lookups: more code to achieve the same effect?

Thank you Michael.
I do the change due to two reasons:
1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
slots at most. So when it comes to multiple s390 pci root buses, the old code
does not work.
2. Now the zpci device "S390PCIBusDevice" is only a structure to store
s390 specific information, so we can attach all the zpci devices to a
s390 pci facility bus. Since these zpci device has no relation with the "slot",
so the order of them does not matter.

>> @@ -167,16 +164,16 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>   S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
>>   {
>>       S390PCIBusDevice *pbdev;
>> -    int i;
>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>> +    BusChild *kid;
>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>   
>>       if (!s || !fh) {
>>           return NULL;
>>       }
>>   
>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>> -        pbdev = &s->pbdev[i];
>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>           if (pbdev->fh == fh) {
>>               return pbdev;
>>           }
>> @@ -185,12 +182,33 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
>>       return NULL;
>>   }
>>   
>> +static S390PCIBusDevice *s390_pci_find_dev_by_pdev(PCIDevice *pdev)
>> +{
>> +    S390PCIBusDevice *pbdev;
>> +    BusChild *kid;
>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>> +
>> +    if (!s || !pdev) {
>> +        return NULL;
>> +    }
>> +
>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>> +        pbdev = (S390PCIBusDevice *)kid->child;
>> +        if (pbdev->pdev == pdev) {
>> +            return pbdev;
>> +        }
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>>   static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
>>                                       uint32_t fid, uint64_t faddr, uint32_t e)
>>   {
>>       SeiContainer *sei_cont;
>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>   
>>       if (!s) {
>>           return;
>> @@ -308,7 +326,10 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>   {
>>       uint64_t pte;
>>       uint32_t flags;
>> -    S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr);
>> +    S390PCIDeviceConn *conn = container_of(iommu, S390PCIDeviceConn,
>> +                                           iommu_mr);
>> +    S390PCIBusDevice *pbdev = conn->zpci;
>> +
>>       S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)
>>                                              ->qbus.parent);
>>       IOMMUTLBEntry ret = {
>> @@ -319,8 +340,14 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>           .perm = IOMMU_NONE,
>>       };
>>   
>> +    if (!pbdev) {
>> +        return ret;
>> +    }
>> +
>>       DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
>>   
>> +    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent);
>> +
>>       /* s390 does not have an APIC mapped to main storage so we use
>>        * a separate AddressSpace only for msix notifications
>>        */
>> @@ -382,7 +409,7 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>   {
>>       S390pciState *s = opaque;
>>   
>> -    return &s->pbdev[PCI_SLOT(devfn)].as;
>> +    return &s->conn[PCI_SLOT(devfn)].iommu_as;
>>   }
>>   
>>   static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
>> @@ -455,9 +482,10 @@ static void s390_pcihost_init_as(S390pciState *s)
>>       int i;
>>   
>>       for (i = 0; i < PCI_SLOT_MAX; i++) {
>> -        memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s),
>> +        memory_region_init_iommu(&s->conn[i].iommu_mr, OBJECT(s),
>>                                    &s390_iommu_ops, "iommu-s390", UINT64_MAX);
>> -        address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci");
>> +        address_space_init(&s->conn[i].iommu_as, &s->conn[i].iommu_mr,
>> +                           "iommu-pci");
>>       }
>>   
>>       memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
>> @@ -484,7 +512,7 @@ static int s390_pcihost_init(SysBusDevice *dev)
>>       bus = BUS(b);
>>       qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
>>       phb->bus = b;
>> -    QTAILQ_INIT(&s->pending_sei);
>> +
>>       return 0;
>>   }
>>   
>> @@ -519,26 +547,6 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
>>   static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
>>                                     DeviceState *dev, Error **errp)
>>   {
>> -    PCIDevice *pci_dev = PCI_DEVICE(dev);
>> -    S390PCIBusDevice *pbdev;
>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
>> -                                           ->qbus.parent);
>> -
>> -    pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
>> -
>> -    pbdev->fid = s390_pci_get_pfid(pci_dev);
>> -    pbdev->pdev = pci_dev;
>> -    pbdev->configured = true;
>> -    pbdev->fh = s390_pci_get_pfh(pci_dev);
>> -
>> -    s390_pcihost_setup_msix(pbdev);
>> -
>> -    if (dev->hotplugged) {
>> -        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
>> -                                     pbdev->fh, pbdev->fid);
>> -        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
>> -                                     pbdev->fh, pbdev->fid);
>> -    }
>>       return;
>>   }
>>   
>> @@ -546,31 +554,30 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
>>                                       DeviceState *dev, Error **errp)
>>   {
>>       PCIDevice *pci_dev = PCI_DEVICE(dev);
>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
>> -                                           ->qbus.parent);
>> -    S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
>> -
>> -    if (pbdev->configured) {
>> -        pbdev->configured = false;
>> -        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
>> -                                     pbdev->fh, pbdev->fid);
>> +    S390PCIBusDevice *pbdev;
>> +    HotplugHandler *hotplug_ctrl;
>> +    S390PCIFacility *f = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>> +    S390PCIFacilityClass *k = S390_PCI_FACILITY_GET_CLASS(f);
>> +    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_CLASS(k);
>> +
>> +    /* unplug corresponding zpci device */
>> +    pbdev = s390_pci_find_dev_by_pdev(pci_dev);
>> +    if (pbdev) {
>> +        hotplug_ctrl = pbdev->qdev.parent_bus->hotplug_handler;
>> +        if (hdc->unplug_request) {
>> +            hdc->unplug_request(hotplug_ctrl, &pbdev->qdev, errp);
>> +        }
>>       }
>>   
>> -    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
>> -                                 pbdev->fh, pbdev->fid);
>> -    pbdev->fh = 0;
>> -    pbdev->fid = 0;
>> -    pbdev->pdev = NULL;
>>       object_unparent(OBJECT(pci_dev));
>>   }
>>   
>>   static void s390_pcihost_class_init(ObjectClass *klass, void *data)
>>   {
>>       SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
>> -    DeviceClass *dc = DEVICE_CLASS(klass);
>>       HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
>>   
>> -    dc->cannot_instantiate_with_device_add_yet = true;
>>       k->init = s390_pcihost_init;
>>       hc->plug = s390_pcihost_hot_plug;
>>       hc->unplug = s390_pcihost_hot_unplug;
>> @@ -588,9 +595,156 @@ static const TypeInfo s390_pcihost_info = {
>>       }
>>   };
>>   
>> +static void s390_pci_device_hot_plug(HotplugHandler *hotplug_dev,
>> +                                     DeviceState *dev, Error **errp)
>> +{
>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>> +
>> +    zpci->configured = true;
>> +
>> +    if (dev->hotplugged) {
>> +        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
>> +                                     zpci->fh, zpci->fid);
>> +        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
>> +                                     zpci->fh, zpci->fid);
>> +    }
>> +}
>> +
>> +static void s390_pci_device_hot_unplug_request(HotplugHandler *hotplug_dev,
>> +                                       DeviceState *dev, Error **errp)
>> +{
>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>> +
>> +    if (zpci->configured) {
>> +        zpci->configured = false;
>> +        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
>> +                                     zpci->fh, zpci->fid);
>> +    }
>> +
>> +    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
>> +                                 zpci->fh, zpci->fid);
>> +
>> +    zpci->is_unplugged = true;
>> +}
>> +
>> +static const TypeInfo s390_pci_fac_bus_info = {
>> +    .name = TYPE_S390_PCI_FAC_BUS,
>> +    .parent = TYPE_BUS,
>> +    .instance_size = sizeof(S390PCIFacBus),
>> +};
>> +
>> +static int s390_pci_facility_init(S390PCIFacility *f)
>> +{
>> +    DeviceState *dev = DEVICE(f);
>> +
>> +    QTAILQ_INIT(&f->pending_sei);
>> +    msi_supported = true;
>> +    f->fbus = S390_PCI_FAC_BUS(qbus_create(TYPE_S390_PCI_FAC_BUS, dev, NULL));
>> +    qbus_set_hotplug_handler(BUS(&f->fbus->qbus), DEVICE(dev), NULL);
>> +
>> +    return 0;
>> +}
>> +
>> +static void s390_pci_facility_class_init(ObjectClass *klass, void *data)
>> +{
>> +    S390PCIFacilityClass *k = S390_PCI_FACILITY_CLASS(klass);
>> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(k);
>> +
>> +    k->init = s390_pci_facility_init;
>> +    hc->plug = s390_pci_device_hot_plug;
>> +    hc->unplug_request = s390_pci_device_hot_unplug_request;
>> +}
>> +
>> +static const TypeInfo s390_pci_facility_info = {
>> +    .name          = TYPE_S390_PCI_FACILITY,
>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>> +    .instance_size = sizeof(S390PCIFacility),
>> +    .class_init    = s390_pci_facility_class_init,
>> +    .class_size    = sizeof(S390PCIFacilityClass),
>> +    .interfaces = (InterfaceInfo[]) {
>> +        { TYPE_HOTPLUG_HANDLER },
>> +        { }
>> +    }
>> +};
>> +
>> +static void s390_pci_device_realize(DeviceState *dev, Error **errp)
>> +{
>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>> +    S390PCIBusDevice *tmp;
>> +    S390pciState *s;
>> +    BusChild *kid;
>> +    PCIDevice *pdev;
>> +    int ret;
>> +    S390PCIFacility *f = S390_PCI_FACILITY(
>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>> +
>> +    ret = pci_qdev_find_device(zpci->pci_id, &pdev);
>> +    if (ret < 0) {
>> +        error_setg(errp, "vfio pci device %s not found", zpci->pci_id);
>> +        return;
>> +    }
>> +
>> +    QTAILQ_FOREACH(kid, &f->fbus->qbus.children, sibling) {
>> +        tmp = (S390PCIBusDevice *)kid->child;
>> +        if (tmp == zpci) {
>> +            continue;
>> +        }
>> +
>> +        if (tmp->fid == zpci->fid || tmp->uid == zpci->uid ||
>> +            !strcmp(tmp->pci_id, zpci->pci_id)) {
>> +            error_setg(errp, "zpci needs unique fid, uid and pci_id");
>> +            return;
>> +        }
>> +    }
>> +
>> +    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pdev)->qbus.parent);
>> +    s->conn[PCI_SLOT(pdev->devfn)].zpci = zpci;
>> +
>> +    zpci->pdev = pdev;
>> +    zpci->fh = zpci->fid | FH_VIRT;
>> +    s390_pcihost_setup_msix(zpci);
>> +}
>> +
>> +static void s390_pci_device_unrealize(DeviceState *dev, Error **errp)
>> +{
>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>> +
>> +    zpci->fh = 0;
>> +    zpci->fid = 0;
>> +    zpci->pdev = NULL;
>> +}
>> +
>> +static Property s390_pci_device_properties[] = {
>> +    DEFINE_PROP_UINT32("fid", S390PCIBusDevice, fid, 0),
>> +    DEFINE_PROP_UINT32("uid", S390PCIBusDevice, uid, 0),
>> +    DEFINE_PROP_STRING("pci_id", S390PCIBusDevice, pci_id),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void s390_pci_device_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> +    dc->desc = "s390 pci device";
>> +    dc->bus_type = TYPE_S390_PCI_FAC_BUS;
>> +    dc->realize = s390_pci_device_realize;
>> +    dc->unrealize = s390_pci_device_unrealize;
>> +    dc->props = s390_pci_device_properties;
>> +}
>> +
>> +static const TypeInfo s390_pci_device_type_info = {
>> +    .name = TYPE_S390_PCI_DEVICE,
>> +    .parent = TYPE_DEVICE,
>> +    .instance_size = sizeof(S390PCIBusDevice),
>> +    .class_init = s390_pci_device_class_init,
>> +};
>> +
>>   static void s390_pci_register_types(void)
>>   {
>>       type_register_static(&s390_pcihost_info);
>> +    type_register_static(&s390_pci_facility_info);
>> +    type_register_static(&s390_pci_fac_bus_info);
>> +    type_register_static(&s390_pci_device_type_info);
>>   }
>>   
>>   type_init(s390_pci_register_types)
>> diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
>> index 464a92e..5bf3913 100644
>> --- a/hw/s390x/s390-pci-bus.h
>> +++ b/hw/s390x/s390-pci-bus.h
>> @@ -149,6 +149,21 @@ enum ZpciIoatDtype {
>>   #define ZPCI_TABLE_VALID_MASK           0x20
>>   #define ZPCI_TABLE_PROT_MASK            0x200
>>   
>> +#define TYPE_S390_PCI_FACILITY "s390-pci-facility"
>> +#define TYPE_S390_PCI_FAC_BUS "s390-pci-fac-bus"
>> +#define TYPE_S390_PCI_DEVICE "zpci"
>> +
>> +#define S390_PCI_FACILITY(obj) \
>> +    OBJECT_CHECK(S390PCIFacility, (obj), TYPE_S390_PCI_FACILITY)
>> +#define S390_PCI_FAC_BUS(obj) \
>> +    OBJECT_CHECK(S390PCIFacBus, (obj), TYPE_S390_PCI_FAC_BUS)
>> +#define S390_PCI_FACILITY_CLASS(klass) \
>> +    OBJECT_CLASS_CHECK(S390PCIFacilityClass, (klass), TYPE_S390_PCI_FACILITY)
>> +#define S390_PCI_DEVICE(obj) \
>> +    OBJECT_CHECK(S390PCIBusDevice, (obj), TYPE_S390_PCI_DEVICE)
>> +#define S390_PCI_FACILITY_GET_CLASS(obj) \
>> +    OBJECT_GET_CLASS(S390PCIFacilityClass, (obj), TYPE_S390_PCI_FACILITY)
>> +
>>   typedef struct SeiContainer {
>>       QTAILQ_ENTRY(SeiContainer) link;
>>       uint32_t fid;
>> @@ -214,12 +229,16 @@ typedef struct S390MsixInfo {
>>   } S390MsixInfo;
>>   
>>   typedef struct S390PCIBusDevice {
>> +    DeviceState qdev;
>>       PCIDevice *pdev;
>>       bool configured;
>> +    bool is_unplugged;
>>       bool error_state;
>>       bool lgstg_blocked;
>>       uint32_t fh;
>>       uint32_t fid;
>> +    uint32_t uid;
>> +    char *pci_id;
>>       uint64_t g_iota;
>>       uint64_t pba;
>>       uint64_t pal;
>> @@ -229,21 +248,42 @@ typedef struct S390PCIBusDevice {
>>       uint8_t sum;
>>       S390MsixInfo msix;
>>       AdapterRoutes routes;
>> -    AddressSpace as;
>> -    MemoryRegion mr;
>> +    QLIST_ENTRY(S390PCIDevice) entry;
>>   } S390PCIBusDevice;
>>   
>> +typedef struct S390PCIDeviceConn {
>> +    S390PCIBusDevice *zpci;
>> +    AddressSpace iommu_as;
>> +    MemoryRegion iommu_mr;
>> +} S390PCIDeviceConn;
>> +
>>   typedef struct S390pciState {
>>       PCIHostState parent_obj;
>> -    S390PCIBusDevice pbdev[PCI_SLOT_MAX];
>> +    S390PCIDeviceConn conn[PCI_SLOT_MAX];
>>       AddressSpace msix_notify_as;
>>       MemoryRegion msix_notify_mr;
>> -    QTAILQ_HEAD(, SeiContainer) pending_sei;
>>   } S390pciState;
>>   
>> +typedef struct S390PCIFacBus {
>> +    BusState qbus;
>> +} S390PCIFacBus;
>> +
>> +typedef struct S390PCIFacility {
>> +    SysBusDevice parent_obj;
>> +    S390PCIFacBus *fbus;
>> +    QTAILQ_HEAD(, SeiContainer) pending_sei;
>> +} S390PCIFacility;
>> +
>> +typedef struct S390PCIFacilityClass {
>> +    DeviceClass parent_class;
>> +    int (*init)(S390PCIFacility *f);
>> +} S390PCIFacilityClass;
>> +
>>   int chsc_sei_nt2_get_event(void *res);
>>   int chsc_sei_nt2_have_event(void);
>>   void s390_pci_sclp_configure(int configure, SCCB *sccb);
>> +void s390_pci_device_enable(S390PCIBusDevice *zpci);
>> +void s390_pci_device_disable(S390PCIBusDevice *zpci);
>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
>>   S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
>> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
>> index f9151a9..2977e9c 100644
>> --- a/hw/s390x/s390-pci-inst.c
>> +++ b/hw/s390x/s390-pci-inst.c
>> @@ -208,12 +208,12 @@ int clp_service_call(S390CPU *cpu, uint8_t r2)
>>   
>>           switch (reqsetpci->oc) {
>>           case CLP_SET_ENABLE_PCI_FN:
>> -            pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET;
>> +            s390_pci_device_enable(pbdev);
>>               stl_p(&ressetpci->fh, pbdev->fh);
>>               stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
>>               break;
>>           case CLP_SET_DISABLE_PCI_FN:
>> -            pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET);
>> +            s390_pci_device_disable(pbdev);
>>               pbdev->error_state = false;
>>               pbdev->lgstg_blocked = false;
>>               stl_p(&ressetpci->fh, pbdev->fh);
>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>> index a3b14b5..56940e8 100644
>> --- a/hw/s390x/s390-virtio-ccw.c
>> +++ b/hw/s390x/s390-virtio-ccw.c
>> @@ -125,8 +125,8 @@ static void ccw_init(MachineState *machine)
>>                         machine->initrd_filename, "s390-ccw.img", true);
>>       s390_flic_init();
>>   
>> -    dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
>> -    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
>> +    dev = qdev_create(NULL, TYPE_S390_PCI_FACILITY);
>> +    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_FACILITY,
>>                                 OBJECT(dev), NULL);
>>       qdev_init_nofail(dev);
>>   
>> @@ -173,6 +173,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
>>       mc->max_cpus = 255;
>>       mc->hot_add_cpu = ccw_hot_add_cpu;
>>       mc->is_default = 1;
>> +    mc->has_dynamic_sysbus = true;
>>       nc->nmi_monitor_handler = s390_nmi;
>>   }
>>   
>> -- 
>> 1.9.3
>>
>>
Michael S. Tsirkin July 1, 2015, 6:22 a.m. UTC | #3
On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
> 
> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> >On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> >>This patch introduce a new facility(and bus)
> >>to hold devices representing information actually
> >>provided by s390 firmware and I/O configuration.
> >>usage example:
> >>-device s390-pcihost
> >>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >>
> >>The first line will create a s390 pci host bridge
> >>and init the root bus. The second line will create
> >>a standard vfio pci device, and attach it to the
> >>root bus. These are similiar to the standard process
> >>to define a pci device on other platform.
> >>
> >>The third line will create a s390 pci device to
> >>store s390 specific information, and references
> >>the corresponding vfio pci device via device id.
> >>We create a s390 pci facility bus to hold all the
> >>zpci devices.
> >>
> >>Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> >It's mostly up to s390 maintainers, but I'd like to note
> >one thing below
> >
> >>---
> >>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
> >>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
> >>  hw/s390x/s390-pci-inst.c   |   4 +-
> >>  hw/s390x/s390-virtio-ccw.c |   5 +-
> >>  4 files changed, 283 insertions(+), 88 deletions(-)
> >>
> >>diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> >>index 560b66a..d5e7b2e 100644
> >>--- a/hw/s390x/s390-pci-bus.c
> >>+++ b/hw/s390x/s390-pci-bus.c
> >>@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>      PciCcdfErr *eccdf;
> >>      int rc = 1;
> >>      SeiContainer *sei_cont;
> >>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>      if (!s) {
> >>          return rc;
> >>@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>  int chsc_sei_nt2_have_event(void)
> >>  {
> >>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>      if (!s) {
> >>          return 0;
> >>@@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
> >>      return !QTAILQ_EMPTY(&s->pending_sei);
> >>  }
> >>+void s390_pci_device_enable(S390PCIBusDevice *zpci)
> >>+{
> >>+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> >>+}
> >>+
> >>+void s390_pci_device_disable(S390PCIBusDevice *zpci)
> >>+{
> >>+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>+    if (zpci->is_unplugged)
> >>+        object_unparent(OBJECT(zpci));
> >>+}
> >>+
> >>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
> >>  {
> >>      S390PCIBusDevice *pbdev;
> >>-    int i;
> >>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>+    BusChild *kid;
> >>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>      if (!s) {
> >>          return NULL;
> >>      }
> >>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>-        pbdev = &s->pbdev[i];
> >>-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> >>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>+        if (pbdev->fid == fid) {
> >>              return pbdev;
> >>          }
> >>      }
> >>@@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
> >>      return;
> >>  }
> >>-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> >>-{
> >>-    return PCI_SLOT(pdev->devfn);
> >>-}
> >>-
> >>-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> >>-{
> >>-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> >>-}
> >>-
> >>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>  {
> >>      S390PCIBusDevice *pbdev;
> >>-    int i;
> >>-    int j = 0;
> >>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>+    BusChild *kid;
> >>+    int i = 0;
> >>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>      if (!s) {
> >>          return NULL;
> >>      }
> >>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>-        pbdev = &s->pbdev[i];
> >>-
> >>-        if (pbdev->fh == 0) {
> >>-            continue;
> >>-        }
> >>-
> >>-        if (j == idx) {
> >>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>+        if (i == idx) {
> >>              return pbdev;
> >>          }
> >>-        j++;
> >>+        i++;
> >>      }
> >>      return NULL;
> >This relies on the order of children on the qbus, that's wrong I think.
> >Generally I'm not sure why do you convert all slot lookups to child
> >lookups: more code to achieve the same effect?
> 
> Thank you Michael.
> I do the change due to two reasons:
> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
> slots at most. So when it comes to multiple s390 pci root buses, the old code
> does not work.
> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
> s390 specific information, so we can attach all the zpci devices to a
> s390 pci facility bus. Since these zpci device has no relation with the "slot",
> so the order of them does not matter.

But you make this order guest-visible which seems wrong.



> >>@@ -167,16 +164,16 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>  S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
> >>  {
> >>      S390PCIBusDevice *pbdev;
> >>-    int i;
> >>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>+    BusChild *kid;
> >>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>      if (!s || !fh) {
> >>          return NULL;
> >>      }
> >>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>-        pbdev = &s->pbdev[i];
> >>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>          if (pbdev->fh == fh) {
> >>              return pbdev;
> >>          }
> >>@@ -185,12 +182,33 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
> >>      return NULL;
> >>  }
> >>+static S390PCIBusDevice *s390_pci_find_dev_by_pdev(PCIDevice *pdev)
> >>+{
> >>+    S390PCIBusDevice *pbdev;
> >>+    BusChild *kid;
> >>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>+
> >>+    if (!s || !pdev) {
> >>+        return NULL;
> >>+    }
> >>+
> >>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>+        if (pbdev->pdev == pdev) {
> >>+            return pbdev;
> >>+        }
> >>+    }
> >>+
> >>+    return NULL;
> >>+}
> >>+
> >>  static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
> >>                                      uint32_t fid, uint64_t faddr, uint32_t e)
> >>  {
> >>      SeiContainer *sei_cont;
> >>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>      if (!s) {
> >>          return;
> >>@@ -308,7 +326,10 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
> >>  {
> >>      uint64_t pte;
> >>      uint32_t flags;
> >>-    S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr);
> >>+    S390PCIDeviceConn *conn = container_of(iommu, S390PCIDeviceConn,
> >>+                                           iommu_mr);
> >>+    S390PCIBusDevice *pbdev = conn->zpci;
> >>+
> >>      S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)
> >>                                             ->qbus.parent);
> >>      IOMMUTLBEntry ret = {
> >>@@ -319,8 +340,14 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
> >>          .perm = IOMMU_NONE,
> >>      };
> >>+    if (!pbdev) {
> >>+        return ret;
> >>+    }
> >>+
> >>      DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
> >>+    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent);
> >>+
> >>      /* s390 does not have an APIC mapped to main storage so we use
> >>       * a separate AddressSpace only for msix notifications
> >>       */
> >>@@ -382,7 +409,7 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
> >>  {
> >>      S390pciState *s = opaque;
> >>-    return &s->pbdev[PCI_SLOT(devfn)].as;
> >>+    return &s->conn[PCI_SLOT(devfn)].iommu_as;
> >>  }
> >>  static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
> >>@@ -455,9 +482,10 @@ static void s390_pcihost_init_as(S390pciState *s)
> >>      int i;
> >>      for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>-        memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s),
> >>+        memory_region_init_iommu(&s->conn[i].iommu_mr, OBJECT(s),
> >>                                   &s390_iommu_ops, "iommu-s390", UINT64_MAX);
> >>-        address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci");
> >>+        address_space_init(&s->conn[i].iommu_as, &s->conn[i].iommu_mr,
> >>+                           "iommu-pci");
> >>      }
> >>      memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
> >>@@ -484,7 +512,7 @@ static int s390_pcihost_init(SysBusDevice *dev)
> >>      bus = BUS(b);
> >>      qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
> >>      phb->bus = b;
> >>-    QTAILQ_INIT(&s->pending_sei);
> >>+
> >>      return 0;
> >>  }
> >>@@ -519,26 +547,6 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
> >>  static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
> >>                                    DeviceState *dev, Error **errp)
> >>  {
> >>-    PCIDevice *pci_dev = PCI_DEVICE(dev);
> >>-    S390PCIBusDevice *pbdev;
> >>-    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
> >>-                                           ->qbus.parent);
> >>-
> >>-    pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
> >>-
> >>-    pbdev->fid = s390_pci_get_pfid(pci_dev);
> >>-    pbdev->pdev = pci_dev;
> >>-    pbdev->configured = true;
> >>-    pbdev->fh = s390_pci_get_pfh(pci_dev);
> >>-
> >>-    s390_pcihost_setup_msix(pbdev);
> >>-
> >>-    if (dev->hotplugged) {
> >>-        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
> >>-                                     pbdev->fh, pbdev->fid);
> >>-        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
> >>-                                     pbdev->fh, pbdev->fid);
> >>-    }
> >>      return;
> >>  }
> >>@@ -546,31 +554,30 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
> >>                                      DeviceState *dev, Error **errp)
> >>  {
> >>      PCIDevice *pci_dev = PCI_DEVICE(dev);
> >>-    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
> >>-                                           ->qbus.parent);
> >>-    S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
> >>-
> >>-    if (pbdev->configured) {
> >>-        pbdev->configured = false;
> >>-        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
> >>-                                     pbdev->fh, pbdev->fid);
> >>+    S390PCIBusDevice *pbdev;
> >>+    HotplugHandler *hotplug_ctrl;
> >>+    S390PCIFacility *f = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>+    S390PCIFacilityClass *k = S390_PCI_FACILITY_GET_CLASS(f);
> >>+    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_CLASS(k);
> >>+
> >>+    /* unplug corresponding zpci device */
> >>+    pbdev = s390_pci_find_dev_by_pdev(pci_dev);
> >>+    if (pbdev) {
> >>+        hotplug_ctrl = pbdev->qdev.parent_bus->hotplug_handler;
> >>+        if (hdc->unplug_request) {
> >>+            hdc->unplug_request(hotplug_ctrl, &pbdev->qdev, errp);
> >>+        }
> >>      }
> >>-    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
> >>-                                 pbdev->fh, pbdev->fid);
> >>-    pbdev->fh = 0;
> >>-    pbdev->fid = 0;
> >>-    pbdev->pdev = NULL;
> >>      object_unparent(OBJECT(pci_dev));
> >>  }
> >>  static void s390_pcihost_class_init(ObjectClass *klass, void *data)
> >>  {
> >>      SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
> >>-    DeviceClass *dc = DEVICE_CLASS(klass);
> >>      HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
> >>-    dc->cannot_instantiate_with_device_add_yet = true;
> >>      k->init = s390_pcihost_init;
> >>      hc->plug = s390_pcihost_hot_plug;
> >>      hc->unplug = s390_pcihost_hot_unplug;
> >>@@ -588,9 +595,156 @@ static const TypeInfo s390_pcihost_info = {
> >>      }
> >>  };
> >>+static void s390_pci_device_hot_plug(HotplugHandler *hotplug_dev,
> >>+                                     DeviceState *dev, Error **errp)
> >>+{
> >>+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> >>+
> >>+    zpci->configured = true;
> >>+
> >>+    if (dev->hotplugged) {
> >>+        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
> >>+                                     zpci->fh, zpci->fid);
> >>+        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
> >>+                                     zpci->fh, zpci->fid);
> >>+    }
> >>+}
> >>+
> >>+static void s390_pci_device_hot_unplug_request(HotplugHandler *hotplug_dev,
> >>+                                       DeviceState *dev, Error **errp)
> >>+{
> >>+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> >>+
> >>+    if (zpci->configured) {
> >>+        zpci->configured = false;
> >>+        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
> >>+                                     zpci->fh, zpci->fid);
> >>+    }
> >>+
> >>+    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
> >>+                                 zpci->fh, zpci->fid);
> >>+
> >>+    zpci->is_unplugged = true;
> >>+}
> >>+
> >>+static const TypeInfo s390_pci_fac_bus_info = {
> >>+    .name = TYPE_S390_PCI_FAC_BUS,
> >>+    .parent = TYPE_BUS,
> >>+    .instance_size = sizeof(S390PCIFacBus),
> >>+};
> >>+
> >>+static int s390_pci_facility_init(S390PCIFacility *f)
> >>+{
> >>+    DeviceState *dev = DEVICE(f);
> >>+
> >>+    QTAILQ_INIT(&f->pending_sei);
> >>+    msi_supported = true;
> >>+    f->fbus = S390_PCI_FAC_BUS(qbus_create(TYPE_S390_PCI_FAC_BUS, dev, NULL));
> >>+    qbus_set_hotplug_handler(BUS(&f->fbus->qbus), DEVICE(dev), NULL);
> >>+
> >>+    return 0;
> >>+}
> >>+
> >>+static void s390_pci_facility_class_init(ObjectClass *klass, void *data)
> >>+{
> >>+    S390PCIFacilityClass *k = S390_PCI_FACILITY_CLASS(klass);
> >>+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(k);
> >>+
> >>+    k->init = s390_pci_facility_init;
> >>+    hc->plug = s390_pci_device_hot_plug;
> >>+    hc->unplug_request = s390_pci_device_hot_unplug_request;
> >>+}
> >>+
> >>+static const TypeInfo s390_pci_facility_info = {
> >>+    .name          = TYPE_S390_PCI_FACILITY,
> >>+    .parent        = TYPE_SYS_BUS_DEVICE,
> >>+    .instance_size = sizeof(S390PCIFacility),
> >>+    .class_init    = s390_pci_facility_class_init,
> >>+    .class_size    = sizeof(S390PCIFacilityClass),
> >>+    .interfaces = (InterfaceInfo[]) {
> >>+        { TYPE_HOTPLUG_HANDLER },
> >>+        { }
> >>+    }
> >>+};
> >>+
> >>+static void s390_pci_device_realize(DeviceState *dev, Error **errp)
> >>+{
> >>+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> >>+    S390PCIBusDevice *tmp;
> >>+    S390pciState *s;
> >>+    BusChild *kid;
> >>+    PCIDevice *pdev;
> >>+    int ret;
> >>+    S390PCIFacility *f = S390_PCI_FACILITY(
> >>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>+
> >>+    ret = pci_qdev_find_device(zpci->pci_id, &pdev);
> >>+    if (ret < 0) {
> >>+        error_setg(errp, "vfio pci device %s not found", zpci->pci_id);
> >>+        return;
> >>+    }
> >>+
> >>+    QTAILQ_FOREACH(kid, &f->fbus->qbus.children, sibling) {
> >>+        tmp = (S390PCIBusDevice *)kid->child;
> >>+        if (tmp == zpci) {
> >>+            continue;
> >>+        }
> >>+
> >>+        if (tmp->fid == zpci->fid || tmp->uid == zpci->uid ||
> >>+            !strcmp(tmp->pci_id, zpci->pci_id)) {
> >>+            error_setg(errp, "zpci needs unique fid, uid and pci_id");
> >>+            return;
> >>+        }
> >>+    }
> >>+
> >>+    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pdev)->qbus.parent);
> >>+    s->conn[PCI_SLOT(pdev->devfn)].zpci = zpci;
> >>+
> >>+    zpci->pdev = pdev;
> >>+    zpci->fh = zpci->fid | FH_VIRT;
> >>+    s390_pcihost_setup_msix(zpci);
> >>+}
> >>+
> >>+static void s390_pci_device_unrealize(DeviceState *dev, Error **errp)
> >>+{
> >>+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> >>+
> >>+    zpci->fh = 0;
> >>+    zpci->fid = 0;
> >>+    zpci->pdev = NULL;
> >>+}
> >>+
> >>+static Property s390_pci_device_properties[] = {
> >>+    DEFINE_PROP_UINT32("fid", S390PCIBusDevice, fid, 0),
> >>+    DEFINE_PROP_UINT32("uid", S390PCIBusDevice, uid, 0),
> >>+    DEFINE_PROP_STRING("pci_id", S390PCIBusDevice, pci_id),
> >>+    DEFINE_PROP_END_OF_LIST(),
> >>+};
> >>+
> >>+static void s390_pci_device_class_init(ObjectClass *klass, void *data)
> >>+{
> >>+    DeviceClass *dc = DEVICE_CLASS(klass);
> >>+
> >>+    dc->desc = "s390 pci device";
> >>+    dc->bus_type = TYPE_S390_PCI_FAC_BUS;
> >>+    dc->realize = s390_pci_device_realize;
> >>+    dc->unrealize = s390_pci_device_unrealize;
> >>+    dc->props = s390_pci_device_properties;
> >>+}
> >>+
> >>+static const TypeInfo s390_pci_device_type_info = {
> >>+    .name = TYPE_S390_PCI_DEVICE,
> >>+    .parent = TYPE_DEVICE,
> >>+    .instance_size = sizeof(S390PCIBusDevice),
> >>+    .class_init = s390_pci_device_class_init,
> >>+};
> >>+
> >>  static void s390_pci_register_types(void)
> >>  {
> >>      type_register_static(&s390_pcihost_info);
> >>+    type_register_static(&s390_pci_facility_info);
> >>+    type_register_static(&s390_pci_fac_bus_info);
> >>+    type_register_static(&s390_pci_device_type_info);
> >>  }
> >>  type_init(s390_pci_register_types)
> >>diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
> >>index 464a92e..5bf3913 100644
> >>--- a/hw/s390x/s390-pci-bus.h
> >>+++ b/hw/s390x/s390-pci-bus.h
> >>@@ -149,6 +149,21 @@ enum ZpciIoatDtype {
> >>  #define ZPCI_TABLE_VALID_MASK           0x20
> >>  #define ZPCI_TABLE_PROT_MASK            0x200
> >>+#define TYPE_S390_PCI_FACILITY "s390-pci-facility"
> >>+#define TYPE_S390_PCI_FAC_BUS "s390-pci-fac-bus"
> >>+#define TYPE_S390_PCI_DEVICE "zpci"
> >>+
> >>+#define S390_PCI_FACILITY(obj) \
> >>+    OBJECT_CHECK(S390PCIFacility, (obj), TYPE_S390_PCI_FACILITY)
> >>+#define S390_PCI_FAC_BUS(obj) \
> >>+    OBJECT_CHECK(S390PCIFacBus, (obj), TYPE_S390_PCI_FAC_BUS)
> >>+#define S390_PCI_FACILITY_CLASS(klass) \
> >>+    OBJECT_CLASS_CHECK(S390PCIFacilityClass, (klass), TYPE_S390_PCI_FACILITY)
> >>+#define S390_PCI_DEVICE(obj) \
> >>+    OBJECT_CHECK(S390PCIBusDevice, (obj), TYPE_S390_PCI_DEVICE)
> >>+#define S390_PCI_FACILITY_GET_CLASS(obj) \
> >>+    OBJECT_GET_CLASS(S390PCIFacilityClass, (obj), TYPE_S390_PCI_FACILITY)
> >>+
> >>  typedef struct SeiContainer {
> >>      QTAILQ_ENTRY(SeiContainer) link;
> >>      uint32_t fid;
> >>@@ -214,12 +229,16 @@ typedef struct S390MsixInfo {
> >>  } S390MsixInfo;
> >>  typedef struct S390PCIBusDevice {
> >>+    DeviceState qdev;
> >>      PCIDevice *pdev;
> >>      bool configured;
> >>+    bool is_unplugged;
> >>      bool error_state;
> >>      bool lgstg_blocked;
> >>      uint32_t fh;
> >>      uint32_t fid;
> >>+    uint32_t uid;
> >>+    char *pci_id;
> >>      uint64_t g_iota;
> >>      uint64_t pba;
> >>      uint64_t pal;
> >>@@ -229,21 +248,42 @@ typedef struct S390PCIBusDevice {
> >>      uint8_t sum;
> >>      S390MsixInfo msix;
> >>      AdapterRoutes routes;
> >>-    AddressSpace as;
> >>-    MemoryRegion mr;
> >>+    QLIST_ENTRY(S390PCIDevice) entry;
> >>  } S390PCIBusDevice;
> >>+typedef struct S390PCIDeviceConn {
> >>+    S390PCIBusDevice *zpci;
> >>+    AddressSpace iommu_as;
> >>+    MemoryRegion iommu_mr;
> >>+} S390PCIDeviceConn;
> >>+
> >>  typedef struct S390pciState {
> >>      PCIHostState parent_obj;
> >>-    S390PCIBusDevice pbdev[PCI_SLOT_MAX];
> >>+    S390PCIDeviceConn conn[PCI_SLOT_MAX];
> >>      AddressSpace msix_notify_as;
> >>      MemoryRegion msix_notify_mr;
> >>-    QTAILQ_HEAD(, SeiContainer) pending_sei;
> >>  } S390pciState;
> >>+typedef struct S390PCIFacBus {
> >>+    BusState qbus;
> >>+} S390PCIFacBus;
> >>+
> >>+typedef struct S390PCIFacility {
> >>+    SysBusDevice parent_obj;
> >>+    S390PCIFacBus *fbus;
> >>+    QTAILQ_HEAD(, SeiContainer) pending_sei;
> >>+} S390PCIFacility;
> >>+
> >>+typedef struct S390PCIFacilityClass {
> >>+    DeviceClass parent_class;
> >>+    int (*init)(S390PCIFacility *f);
> >>+} S390PCIFacilityClass;
> >>+
> >>  int chsc_sei_nt2_get_event(void *res);
> >>  int chsc_sei_nt2_have_event(void);
> >>  void s390_pci_sclp_configure(int configure, SCCB *sccb);
> >>+void s390_pci_device_enable(S390PCIBusDevice *zpci);
> >>+void s390_pci_device_disable(S390PCIBusDevice *zpci);
> >>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
> >>  S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
> >>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
> >>diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
> >>index f9151a9..2977e9c 100644
> >>--- a/hw/s390x/s390-pci-inst.c
> >>+++ b/hw/s390x/s390-pci-inst.c
> >>@@ -208,12 +208,12 @@ int clp_service_call(S390CPU *cpu, uint8_t r2)
> >>          switch (reqsetpci->oc) {
> >>          case CLP_SET_ENABLE_PCI_FN:
> >>-            pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET;
> >>+            s390_pci_device_enable(pbdev);
> >>              stl_p(&ressetpci->fh, pbdev->fh);
> >>              stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
> >>              break;
> >>          case CLP_SET_DISABLE_PCI_FN:
> >>-            pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>+            s390_pci_device_disable(pbdev);
> >>              pbdev->error_state = false;
> >>              pbdev->lgstg_blocked = false;
> >>              stl_p(&ressetpci->fh, pbdev->fh);
> >>diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> >>index a3b14b5..56940e8 100644
> >>--- a/hw/s390x/s390-virtio-ccw.c
> >>+++ b/hw/s390x/s390-virtio-ccw.c
> >>@@ -125,8 +125,8 @@ static void ccw_init(MachineState *machine)
> >>                        machine->initrd_filename, "s390-ccw.img", true);
> >>      s390_flic_init();
> >>-    dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
> >>-    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
> >>+    dev = qdev_create(NULL, TYPE_S390_PCI_FACILITY);
> >>+    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_FACILITY,
> >>                                OBJECT(dev), NULL);
> >>      qdev_init_nofail(dev);
> >>@@ -173,6 +173,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
> >>      mc->max_cpus = 255;
> >>      mc->hot_add_cpu = ccw_hot_add_cpu;
> >>      mc->is_default = 1;
> >>+    mc->has_dynamic_sysbus = true;
> >>      nc->nmi_monitor_handler = s390_nmi;
> >>  }
> >>-- 
> >>1.9.3
> >>
> >>
Hong Bo Li July 1, 2015, 7:56 a.m. UTC | #4
On 7/1/2015 14:22, Michael S. Tsirkin wrote:
> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>> This patch introduce a new facility(and bus)
>>>> to hold devices representing information actually
>>>> provided by s390 firmware and I/O configuration.
>>>> usage example:
>>>> -device s390-pcihost
>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>
>>>> The first line will create a s390 pci host bridge
>>>> and init the root bus. The second line will create
>>>> a standard vfio pci device, and attach it to the
>>>> root bus. These are similiar to the standard process
>>>> to define a pci device on other platform.
>>>>
>>>> The third line will create a s390 pci device to
>>>> store s390 specific information, and references
>>>> the corresponding vfio pci device via device id.
>>>> We create a s390 pci facility bus to hold all the
>>>> zpci devices.
>>>>
>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>> It's mostly up to s390 maintainers, but I'd like to note
>>> one thing below
>>>
>>>> ---
>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>
>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>> index 560b66a..d5e7b2e 100644
>>>> --- a/hw/s390x/s390-pci-bus.c
>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>       PciCcdfErr *eccdf;
>>>>       int rc = 1;
>>>>       SeiContainer *sei_cont;
>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>       if (!s) {
>>>>           return rc;
>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>   int chsc_sei_nt2_have_event(void)
>>>>   {
>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>       if (!s) {
>>>>           return 0;
>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>   }
>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>> +{
>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>> +}
>>>> +
>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>> +{
>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>> +    if (zpci->is_unplugged)
>>>> +        object_unparent(OBJECT(zpci));
>>>> +}
>>>> +
>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>   {
>>>>       S390PCIBusDevice *pbdev;
>>>> -    int i;
>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>> +    BusChild *kid;
>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>       if (!s) {
>>>>           return NULL;
>>>>       }
>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>> -        pbdev = &s->pbdev[i];
>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>> +        if (pbdev->fid == fid) {
>>>>               return pbdev;
>>>>           }
>>>>       }
>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>       return;
>>>>   }
>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>> -{
>>>> -    return PCI_SLOT(pdev->devfn);
>>>> -}
>>>> -
>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>> -{
>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>> -}
>>>> -
>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>   {
>>>>       S390PCIBusDevice *pbdev;
>>>> -    int i;
>>>> -    int j = 0;
>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>> +    BusChild *kid;
>>>> +    int i = 0;
>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>       if (!s) {
>>>>           return NULL;
>>>>       }
>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>> -        pbdev = &s->pbdev[i];
>>>> -
>>>> -        if (pbdev->fh == 0) {
>>>> -            continue;
>>>> -        }
>>>> -
>>>> -        if (j == idx) {
>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>> +        if (i == idx) {
>>>>               return pbdev;
>>>>           }
>>>> -        j++;
>>>> +        i++;
>>>>       }
>>>>       return NULL;
>>> This relies on the order of children on the qbus, that's wrong I think.
>>> Generally I'm not sure why do you convert all slot lookups to child
>>> lookups: more code to achieve the same effect?
>> Thank you Michael.
>> I do the change due to two reasons:
>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>> does not work.
>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>> s390 specific information, so we can attach all the zpci devices to a
>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>> so the order of them does not matter.
> But you make this order guest-visible which seems wrong.
>
The guest uses a s390 specific "list pci" instruction to get all the 
zpci devices, and will
create a root s390 pci bus for each device.  So the order has no 
relation with the pci
topology on guest.

If we assign  too many zpci devices to one guest, the "list pci" 
instruction will use a
resume token to get all the zpci devices. For example, first time we 
return 32 zpci
devices to guest. Next time we'll return another 32 zpci devices. The 
resume token
is used to store the beginning of zpci devices that will be returned to 
guest at next time.

So, if we change the order of the zpci device on s390 facility bus, it 
may change the
"batch" in which this device be returned to guest. But this will not 
change the  pci
topology on guest.

>
>>>> @@ -167,16 +164,16 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
>>>>   {
>>>>       S390PCIBusDevice *pbdev;
>>>> -    int i;
>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>> +    BusChild *kid;
>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>       if (!s || !fh) {
>>>>           return NULL;
>>>>       }
>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>> -        pbdev = &s->pbdev[i];
>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>           if (pbdev->fh == fh) {
>>>>               return pbdev;
>>>>           }
>>>> @@ -185,12 +182,33 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
>>>>       return NULL;
>>>>   }
>>>> +static S390PCIBusDevice *s390_pci_find_dev_by_pdev(PCIDevice *pdev)
>>>> +{
>>>> +    S390PCIBusDevice *pbdev;
>>>> +    BusChild *kid;
>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>> +
>>>> +    if (!s || !pdev) {
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>> +        if (pbdev->pdev == pdev) {
>>>> +            return pbdev;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return NULL;
>>>> +}
>>>> +
>>>>   static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
>>>>                                       uint32_t fid, uint64_t faddr, uint32_t e)
>>>>   {
>>>>       SeiContainer *sei_cont;
>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>       if (!s) {
>>>>           return;
>>>> @@ -308,7 +326,10 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>>>   {
>>>>       uint64_t pte;
>>>>       uint32_t flags;
>>>> -    S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr);
>>>> +    S390PCIDeviceConn *conn = container_of(iommu, S390PCIDeviceConn,
>>>> +                                           iommu_mr);
>>>> +    S390PCIBusDevice *pbdev = conn->zpci;
>>>> +
>>>>       S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)
>>>>                                              ->qbus.parent);
>>>>       IOMMUTLBEntry ret = {
>>>> @@ -319,8 +340,14 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>>>           .perm = IOMMU_NONE,
>>>>       };
>>>> +    if (!pbdev) {
>>>> +        return ret;
>>>> +    }
>>>> +
>>>>       DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
>>>> +    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent);
>>>> +
>>>>       /* s390 does not have an APIC mapped to main storage so we use
>>>>        * a separate AddressSpace only for msix notifications
>>>>        */
>>>> @@ -382,7 +409,7 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>>>   {
>>>>       S390pciState *s = opaque;
>>>> -    return &s->pbdev[PCI_SLOT(devfn)].as;
>>>> +    return &s->conn[PCI_SLOT(devfn)].iommu_as;
>>>>   }
>>>>   static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
>>>> @@ -455,9 +482,10 @@ static void s390_pcihost_init_as(S390pciState *s)
>>>>       int i;
>>>>       for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>> -        memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s),
>>>> +        memory_region_init_iommu(&s->conn[i].iommu_mr, OBJECT(s),
>>>>                                    &s390_iommu_ops, "iommu-s390", UINT64_MAX);
>>>> -        address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci");
>>>> +        address_space_init(&s->conn[i].iommu_as, &s->conn[i].iommu_mr,
>>>> +                           "iommu-pci");
>>>>       }
>>>>       memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
>>>> @@ -484,7 +512,7 @@ static int s390_pcihost_init(SysBusDevice *dev)
>>>>       bus = BUS(b);
>>>>       qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
>>>>       phb->bus = b;
>>>> -    QTAILQ_INIT(&s->pending_sei);
>>>> +
>>>>       return 0;
>>>>   }
>>>> @@ -519,26 +547,6 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
>>>>   static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
>>>>                                     DeviceState *dev, Error **errp)
>>>>   {
>>>> -    PCIDevice *pci_dev = PCI_DEVICE(dev);
>>>> -    S390PCIBusDevice *pbdev;
>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
>>>> -                                           ->qbus.parent);
>>>> -
>>>> -    pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
>>>> -
>>>> -    pbdev->fid = s390_pci_get_pfid(pci_dev);
>>>> -    pbdev->pdev = pci_dev;
>>>> -    pbdev->configured = true;
>>>> -    pbdev->fh = s390_pci_get_pfh(pci_dev);
>>>> -
>>>> -    s390_pcihost_setup_msix(pbdev);
>>>> -
>>>> -    if (dev->hotplugged) {
>>>> -        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
>>>> -                                     pbdev->fh, pbdev->fid);
>>>> -        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
>>>> -                                     pbdev->fh, pbdev->fid);
>>>> -    }
>>>>       return;
>>>>   }
>>>> @@ -546,31 +554,30 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
>>>>                                       DeviceState *dev, Error **errp)
>>>>   {
>>>>       PCIDevice *pci_dev = PCI_DEVICE(dev);
>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
>>>> -                                           ->qbus.parent);
>>>> -    S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
>>>> -
>>>> -    if (pbdev->configured) {
>>>> -        pbdev->configured = false;
>>>> -        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
>>>> -                                     pbdev->fh, pbdev->fid);
>>>> +    S390PCIBusDevice *pbdev;
>>>> +    HotplugHandler *hotplug_ctrl;
>>>> +    S390PCIFacility *f = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>> +    S390PCIFacilityClass *k = S390_PCI_FACILITY_GET_CLASS(f);
>>>> +    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_CLASS(k);
>>>> +
>>>> +    /* unplug corresponding zpci device */
>>>> +    pbdev = s390_pci_find_dev_by_pdev(pci_dev);
>>>> +    if (pbdev) {
>>>> +        hotplug_ctrl = pbdev->qdev.parent_bus->hotplug_handler;
>>>> +        if (hdc->unplug_request) {
>>>> +            hdc->unplug_request(hotplug_ctrl, &pbdev->qdev, errp);
>>>> +        }
>>>>       }
>>>> -    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
>>>> -                                 pbdev->fh, pbdev->fid);
>>>> -    pbdev->fh = 0;
>>>> -    pbdev->fid = 0;
>>>> -    pbdev->pdev = NULL;
>>>>       object_unparent(OBJECT(pci_dev));
>>>>   }
>>>>   static void s390_pcihost_class_init(ObjectClass *klass, void *data)
>>>>   {
>>>>       SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
>>>> -    DeviceClass *dc = DEVICE_CLASS(klass);
>>>>       HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
>>>> -    dc->cannot_instantiate_with_device_add_yet = true;
>>>>       k->init = s390_pcihost_init;
>>>>       hc->plug = s390_pcihost_hot_plug;
>>>>       hc->unplug = s390_pcihost_hot_unplug;
>>>> @@ -588,9 +595,156 @@ static const TypeInfo s390_pcihost_info = {
>>>>       }
>>>>   };
>>>> +static void s390_pci_device_hot_plug(HotplugHandler *hotplug_dev,
>>>> +                                     DeviceState *dev, Error **errp)
>>>> +{
>>>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>>>> +
>>>> +    zpci->configured = true;
>>>> +
>>>> +    if (dev->hotplugged) {
>>>> +        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
>>>> +                                     zpci->fh, zpci->fid);
>>>> +        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
>>>> +                                     zpci->fh, zpci->fid);
>>>> +    }
>>>> +}
>>>> +
>>>> +static void s390_pci_device_hot_unplug_request(HotplugHandler *hotplug_dev,
>>>> +                                       DeviceState *dev, Error **errp)
>>>> +{
>>>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>>>> +
>>>> +    if (zpci->configured) {
>>>> +        zpci->configured = false;
>>>> +        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
>>>> +                                     zpci->fh, zpci->fid);
>>>> +    }
>>>> +
>>>> +    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
>>>> +                                 zpci->fh, zpci->fid);
>>>> +
>>>> +    zpci->is_unplugged = true;
>>>> +}
>>>> +
>>>> +static const TypeInfo s390_pci_fac_bus_info = {
>>>> +    .name = TYPE_S390_PCI_FAC_BUS,
>>>> +    .parent = TYPE_BUS,
>>>> +    .instance_size = sizeof(S390PCIFacBus),
>>>> +};
>>>> +
>>>> +static int s390_pci_facility_init(S390PCIFacility *f)
>>>> +{
>>>> +    DeviceState *dev = DEVICE(f);
>>>> +
>>>> +    QTAILQ_INIT(&f->pending_sei);
>>>> +    msi_supported = true;
>>>> +    f->fbus = S390_PCI_FAC_BUS(qbus_create(TYPE_S390_PCI_FAC_BUS, dev, NULL));
>>>> +    qbus_set_hotplug_handler(BUS(&f->fbus->qbus), DEVICE(dev), NULL);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static void s390_pci_facility_class_init(ObjectClass *klass, void *data)
>>>> +{
>>>> +    S390PCIFacilityClass *k = S390_PCI_FACILITY_CLASS(klass);
>>>> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(k);
>>>> +
>>>> +    k->init = s390_pci_facility_init;
>>>> +    hc->plug = s390_pci_device_hot_plug;
>>>> +    hc->unplug_request = s390_pci_device_hot_unplug_request;
>>>> +}
>>>> +
>>>> +static const TypeInfo s390_pci_facility_info = {
>>>> +    .name          = TYPE_S390_PCI_FACILITY,
>>>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>>>> +    .instance_size = sizeof(S390PCIFacility),
>>>> +    .class_init    = s390_pci_facility_class_init,
>>>> +    .class_size    = sizeof(S390PCIFacilityClass),
>>>> +    .interfaces = (InterfaceInfo[]) {
>>>> +        { TYPE_HOTPLUG_HANDLER },
>>>> +        { }
>>>> +    }
>>>> +};
>>>> +
>>>> +static void s390_pci_device_realize(DeviceState *dev, Error **errp)
>>>> +{
>>>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>>>> +    S390PCIBusDevice *tmp;
>>>> +    S390pciState *s;
>>>> +    BusChild *kid;
>>>> +    PCIDevice *pdev;
>>>> +    int ret;
>>>> +    S390PCIFacility *f = S390_PCI_FACILITY(
>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>> +
>>>> +    ret = pci_qdev_find_device(zpci->pci_id, &pdev);
>>>> +    if (ret < 0) {
>>>> +        error_setg(errp, "vfio pci device %s not found", zpci->pci_id);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    QTAILQ_FOREACH(kid, &f->fbus->qbus.children, sibling) {
>>>> +        tmp = (S390PCIBusDevice *)kid->child;
>>>> +        if (tmp == zpci) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        if (tmp->fid == zpci->fid || tmp->uid == zpci->uid ||
>>>> +            !strcmp(tmp->pci_id, zpci->pci_id)) {
>>>> +            error_setg(errp, "zpci needs unique fid, uid and pci_id");
>>>> +            return;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pdev)->qbus.parent);
>>>> +    s->conn[PCI_SLOT(pdev->devfn)].zpci = zpci;
>>>> +
>>>> +    zpci->pdev = pdev;
>>>> +    zpci->fh = zpci->fid | FH_VIRT;
>>>> +    s390_pcihost_setup_msix(zpci);
>>>> +}
>>>> +
>>>> +static void s390_pci_device_unrealize(DeviceState *dev, Error **errp)
>>>> +{
>>>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>>>> +
>>>> +    zpci->fh = 0;
>>>> +    zpci->fid = 0;
>>>> +    zpci->pdev = NULL;
>>>> +}
>>>> +
>>>> +static Property s390_pci_device_properties[] = {
>>>> +    DEFINE_PROP_UINT32("fid", S390PCIBusDevice, fid, 0),
>>>> +    DEFINE_PROP_UINT32("uid", S390PCIBusDevice, uid, 0),
>>>> +    DEFINE_PROP_STRING("pci_id", S390PCIBusDevice, pci_id),
>>>> +    DEFINE_PROP_END_OF_LIST(),
>>>> +};
>>>> +
>>>> +static void s390_pci_device_class_init(ObjectClass *klass, void *data)
>>>> +{
>>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>>> +
>>>> +    dc->desc = "s390 pci device";
>>>> +    dc->bus_type = TYPE_S390_PCI_FAC_BUS;
>>>> +    dc->realize = s390_pci_device_realize;
>>>> +    dc->unrealize = s390_pci_device_unrealize;
>>>> +    dc->props = s390_pci_device_properties;
>>>> +}
>>>> +
>>>> +static const TypeInfo s390_pci_device_type_info = {
>>>> +    .name = TYPE_S390_PCI_DEVICE,
>>>> +    .parent = TYPE_DEVICE,
>>>> +    .instance_size = sizeof(S390PCIBusDevice),
>>>> +    .class_init = s390_pci_device_class_init,
>>>> +};
>>>> +
>>>>   static void s390_pci_register_types(void)
>>>>   {
>>>>       type_register_static(&s390_pcihost_info);
>>>> +    type_register_static(&s390_pci_facility_info);
>>>> +    type_register_static(&s390_pci_fac_bus_info);
>>>> +    type_register_static(&s390_pci_device_type_info);
>>>>   }
>>>>   type_init(s390_pci_register_types)
>>>> diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
>>>> index 464a92e..5bf3913 100644
>>>> --- a/hw/s390x/s390-pci-bus.h
>>>> +++ b/hw/s390x/s390-pci-bus.h
>>>> @@ -149,6 +149,21 @@ enum ZpciIoatDtype {
>>>>   #define ZPCI_TABLE_VALID_MASK           0x20
>>>>   #define ZPCI_TABLE_PROT_MASK            0x200
>>>> +#define TYPE_S390_PCI_FACILITY "s390-pci-facility"
>>>> +#define TYPE_S390_PCI_FAC_BUS "s390-pci-fac-bus"
>>>> +#define TYPE_S390_PCI_DEVICE "zpci"
>>>> +
>>>> +#define S390_PCI_FACILITY(obj) \
>>>> +    OBJECT_CHECK(S390PCIFacility, (obj), TYPE_S390_PCI_FACILITY)
>>>> +#define S390_PCI_FAC_BUS(obj) \
>>>> +    OBJECT_CHECK(S390PCIFacBus, (obj), TYPE_S390_PCI_FAC_BUS)
>>>> +#define S390_PCI_FACILITY_CLASS(klass) \
>>>> +    OBJECT_CLASS_CHECK(S390PCIFacilityClass, (klass), TYPE_S390_PCI_FACILITY)
>>>> +#define S390_PCI_DEVICE(obj) \
>>>> +    OBJECT_CHECK(S390PCIBusDevice, (obj), TYPE_S390_PCI_DEVICE)
>>>> +#define S390_PCI_FACILITY_GET_CLASS(obj) \
>>>> +    OBJECT_GET_CLASS(S390PCIFacilityClass, (obj), TYPE_S390_PCI_FACILITY)
>>>> +
>>>>   typedef struct SeiContainer {
>>>>       QTAILQ_ENTRY(SeiContainer) link;
>>>>       uint32_t fid;
>>>> @@ -214,12 +229,16 @@ typedef struct S390MsixInfo {
>>>>   } S390MsixInfo;
>>>>   typedef struct S390PCIBusDevice {
>>>> +    DeviceState qdev;
>>>>       PCIDevice *pdev;
>>>>       bool configured;
>>>> +    bool is_unplugged;
>>>>       bool error_state;
>>>>       bool lgstg_blocked;
>>>>       uint32_t fh;
>>>>       uint32_t fid;
>>>> +    uint32_t uid;
>>>> +    char *pci_id;
>>>>       uint64_t g_iota;
>>>>       uint64_t pba;
>>>>       uint64_t pal;
>>>> @@ -229,21 +248,42 @@ typedef struct S390PCIBusDevice {
>>>>       uint8_t sum;
>>>>       S390MsixInfo msix;
>>>>       AdapterRoutes routes;
>>>> -    AddressSpace as;
>>>> -    MemoryRegion mr;
>>>> +    QLIST_ENTRY(S390PCIDevice) entry;
>>>>   } S390PCIBusDevice;
>>>> +typedef struct S390PCIDeviceConn {
>>>> +    S390PCIBusDevice *zpci;
>>>> +    AddressSpace iommu_as;
>>>> +    MemoryRegion iommu_mr;
>>>> +} S390PCIDeviceConn;
>>>> +
>>>>   typedef struct S390pciState {
>>>>       PCIHostState parent_obj;
>>>> -    S390PCIBusDevice pbdev[PCI_SLOT_MAX];
>>>> +    S390PCIDeviceConn conn[PCI_SLOT_MAX];
>>>>       AddressSpace msix_notify_as;
>>>>       MemoryRegion msix_notify_mr;
>>>> -    QTAILQ_HEAD(, SeiContainer) pending_sei;
>>>>   } S390pciState;
>>>> +typedef struct S390PCIFacBus {
>>>> +    BusState qbus;
>>>> +} S390PCIFacBus;
>>>> +
>>>> +typedef struct S390PCIFacility {
>>>> +    SysBusDevice parent_obj;
>>>> +    S390PCIFacBus *fbus;
>>>> +    QTAILQ_HEAD(, SeiContainer) pending_sei;
>>>> +} S390PCIFacility;
>>>> +
>>>> +typedef struct S390PCIFacilityClass {
>>>> +    DeviceClass parent_class;
>>>> +    int (*init)(S390PCIFacility *f);
>>>> +} S390PCIFacilityClass;
>>>> +
>>>>   int chsc_sei_nt2_get_event(void *res);
>>>>   int chsc_sei_nt2_have_event(void);
>>>>   void s390_pci_sclp_configure(int configure, SCCB *sccb);
>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci);
>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci);
>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
>>>> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
>>>> index f9151a9..2977e9c 100644
>>>> --- a/hw/s390x/s390-pci-inst.c
>>>> +++ b/hw/s390x/s390-pci-inst.c
>>>> @@ -208,12 +208,12 @@ int clp_service_call(S390CPU *cpu, uint8_t r2)
>>>>           switch (reqsetpci->oc) {
>>>>           case CLP_SET_ENABLE_PCI_FN:
>>>> -            pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET;
>>>> +            s390_pci_device_enable(pbdev);
>>>>               stl_p(&ressetpci->fh, pbdev->fh);
>>>>               stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
>>>>               break;
>>>>           case CLP_SET_DISABLE_PCI_FN:
>>>> -            pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>> +            s390_pci_device_disable(pbdev);
>>>>               pbdev->error_state = false;
>>>>               pbdev->lgstg_blocked = false;
>>>>               stl_p(&ressetpci->fh, pbdev->fh);
>>>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>>>> index a3b14b5..56940e8 100644
>>>> --- a/hw/s390x/s390-virtio-ccw.c
>>>> +++ b/hw/s390x/s390-virtio-ccw.c
>>>> @@ -125,8 +125,8 @@ static void ccw_init(MachineState *machine)
>>>>                         machine->initrd_filename, "s390-ccw.img", true);
>>>>       s390_flic_init();
>>>> -    dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
>>>> -    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
>>>> +    dev = qdev_create(NULL, TYPE_S390_PCI_FACILITY);
>>>> +    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_FACILITY,
>>>>                                 OBJECT(dev), NULL);
>>>>       qdev_init_nofail(dev);
>>>> @@ -173,6 +173,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
>>>>       mc->max_cpus = 255;
>>>>       mc->hot_add_cpu = ccw_hot_add_cpu;
>>>>       mc->is_default = 1;
>>>> +    mc->has_dynamic_sysbus = true;
>>>>       nc->nmi_monitor_handler = s390_nmi;
>>>>   }
>>>> -- 
>>>> 1.9.3
>>>>
>>>>
Michael S. Tsirkin July 1, 2015, 8:05 a.m. UTC | #5
On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
> 
> 
> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
> >On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
> >>On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> >>>On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> >>>>This patch introduce a new facility(and bus)
> >>>>to hold devices representing information actually
> >>>>provided by s390 firmware and I/O configuration.
> >>>>usage example:
> >>>>-device s390-pcihost
> >>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >>>>
> >>>>The first line will create a s390 pci host bridge
> >>>>and init the root bus. The second line will create
> >>>>a standard vfio pci device, and attach it to the
> >>>>root bus. These are similiar to the standard process
> >>>>to define a pci device on other platform.
> >>>>
> >>>>The third line will create a s390 pci device to
> >>>>store s390 specific information, and references
> >>>>the corresponding vfio pci device via device id.
> >>>>We create a s390 pci facility bus to hold all the
> >>>>zpci devices.
> >>>>
> >>>>Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> >>>It's mostly up to s390 maintainers, but I'd like to note
> >>>one thing below
> >>>
> >>>>---
> >>>>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
> >>>>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
> >>>>  hw/s390x/s390-pci-inst.c   |   4 +-
> >>>>  hw/s390x/s390-virtio-ccw.c |   5 +-
> >>>>  4 files changed, 283 insertions(+), 88 deletions(-)
> >>>>
> >>>>diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> >>>>index 560b66a..d5e7b2e 100644
> >>>>--- a/hw/s390x/s390-pci-bus.c
> >>>>+++ b/hw/s390x/s390-pci-bus.c
> >>>>@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>      PciCcdfErr *eccdf;
> >>>>      int rc = 1;
> >>>>      SeiContainer *sei_cont;
> >>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>      if (!s) {
> >>>>          return rc;
> >>>>@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>  int chsc_sei_nt2_have_event(void)
> >>>>  {
> >>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>      if (!s) {
> >>>>          return 0;
> >>>>@@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
> >>>>      return !QTAILQ_EMPTY(&s->pending_sei);
> >>>>  }
> >>>>+void s390_pci_device_enable(S390PCIBusDevice *zpci)
> >>>>+{
> >>>>+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> >>>>+}
> >>>>+
> >>>>+void s390_pci_device_disable(S390PCIBusDevice *zpci)
> >>>>+{
> >>>>+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>>>+    if (zpci->is_unplugged)
> >>>>+        object_unparent(OBJECT(zpci));
> >>>>+}
> >>>>+
> >>>>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
> >>>>  {
> >>>>      S390PCIBusDevice *pbdev;
> >>>>-    int i;
> >>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>+    BusChild *kid;
> >>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>      if (!s) {
> >>>>          return NULL;
> >>>>      }
> >>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>-        pbdev = &s->pbdev[i];
> >>>>-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> >>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>+        if (pbdev->fid == fid) {
> >>>>              return pbdev;
> >>>>          }
> >>>>      }
> >>>>@@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
> >>>>      return;
> >>>>  }
> >>>>-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> >>>>-{
> >>>>-    return PCI_SLOT(pdev->devfn);
> >>>>-}
> >>>>-
> >>>>-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> >>>>-{
> >>>>-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> >>>>-}
> >>>>-
> >>>>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>>>  {
> >>>>      S390PCIBusDevice *pbdev;
> >>>>-    int i;
> >>>>-    int j = 0;
> >>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>+    BusChild *kid;
> >>>>+    int i = 0;
> >>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>      if (!s) {
> >>>>          return NULL;
> >>>>      }
> >>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>-        pbdev = &s->pbdev[i];
> >>>>-
> >>>>-        if (pbdev->fh == 0) {
> >>>>-            continue;
> >>>>-        }
> >>>>-
> >>>>-        if (j == idx) {
> >>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>+        if (i == idx) {
> >>>>              return pbdev;
> >>>>          }
> >>>>-        j++;
> >>>>+        i++;
> >>>>      }
> >>>>      return NULL;
> >>>This relies on the order of children on the qbus, that's wrong I think.
> >>>Generally I'm not sure why do you convert all slot lookups to child
> >>>lookups: more code to achieve the same effect?
> >>Thank you Michael.
> >>I do the change due to two reasons:
> >>1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
> >>slots at most. So when it comes to multiple s390 pci root buses, the old code
> >>does not work.
> >>2. Now the zpci device "S390PCIBusDevice" is only a structure to store
> >>s390 specific information, so we can attach all the zpci devices to a
> >>s390 pci facility bus. Since these zpci device has no relation with the "slot",
> >>so the order of them does not matter.
> >But you make this order guest-visible which seems wrong.
> >
> The guest uses a s390 specific "list pci" instruction to get all the zpci
> devices, and will
> create a root s390 pci bus for each device.  So the order has no relation
> with the pci
> topology on guest.
> 
> If we assign  too many zpci devices to one guest, the "list pci" instruction
> will use a
> resume token to get all the zpci devices. For example, first time we return
> 32 zpci
> devices to guest. Next time we'll return another 32 zpci devices. The resume
> token
> is used to store the beginning of zpci devices that will be returned to
> guest at next time.
> 
> So, if we change the order of the zpci device on s390 facility bus, it may
> change the
> "batch" in which this device be returned to guest. But this will not change
> the  pci
> topology on guest.

Yes but that's still guest visible, and will break
for example if guest is migrated between qemu instances
where list order is different precisely when
it's enumerating the bus.



> >
> >>>>@@ -167,16 +164,16 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>>>  S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
> >>>>  {
> >>>>      S390PCIBusDevice *pbdev;
> >>>>-    int i;
> >>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>+    BusChild *kid;
> >>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>      if (!s || !fh) {
> >>>>          return NULL;
> >>>>      }
> >>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>-        pbdev = &s->pbdev[i];
> >>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>          if (pbdev->fh == fh) {
> >>>>              return pbdev;
> >>>>          }
> >>>>@@ -185,12 +182,33 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
> >>>>      return NULL;
> >>>>  }
> >>>>+static S390PCIBusDevice *s390_pci_find_dev_by_pdev(PCIDevice *pdev)
> >>>>+{
> >>>>+    S390PCIBusDevice *pbdev;
> >>>>+    BusChild *kid;
> >>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>+
> >>>>+    if (!s || !pdev) {
> >>>>+        return NULL;
> >>>>+    }
> >>>>+
> >>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>+        if (pbdev->pdev == pdev) {
> >>>>+            return pbdev;
> >>>>+        }
> >>>>+    }
> >>>>+
> >>>>+    return NULL;
> >>>>+}
> >>>>+
> >>>>  static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
> >>>>                                      uint32_t fid, uint64_t faddr, uint32_t e)
> >>>>  {
> >>>>      SeiContainer *sei_cont;
> >>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>      if (!s) {
> >>>>          return;
> >>>>@@ -308,7 +326,10 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
> >>>>  {
> >>>>      uint64_t pte;
> >>>>      uint32_t flags;
> >>>>-    S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr);
> >>>>+    S390PCIDeviceConn *conn = container_of(iommu, S390PCIDeviceConn,
> >>>>+                                           iommu_mr);
> >>>>+    S390PCIBusDevice *pbdev = conn->zpci;
> >>>>+
> >>>>      S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)
> >>>>                                             ->qbus.parent);
> >>>>      IOMMUTLBEntry ret = {
> >>>>@@ -319,8 +340,14 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
> >>>>          .perm = IOMMU_NONE,
> >>>>      };
> >>>>+    if (!pbdev) {
> >>>>+        return ret;
> >>>>+    }
> >>>>+
> >>>>      DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
> >>>>+    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent);
> >>>>+
> >>>>      /* s390 does not have an APIC mapped to main storage so we use
> >>>>       * a separate AddressSpace only for msix notifications
> >>>>       */
> >>>>@@ -382,7 +409,7 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
> >>>>  {
> >>>>      S390pciState *s = opaque;
> >>>>-    return &s->pbdev[PCI_SLOT(devfn)].as;
> >>>>+    return &s->conn[PCI_SLOT(devfn)].iommu_as;
> >>>>  }
> >>>>  static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
> >>>>@@ -455,9 +482,10 @@ static void s390_pcihost_init_as(S390pciState *s)
> >>>>      int i;
> >>>>      for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>-        memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s),
> >>>>+        memory_region_init_iommu(&s->conn[i].iommu_mr, OBJECT(s),
> >>>>                                   &s390_iommu_ops, "iommu-s390", UINT64_MAX);
> >>>>-        address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci");
> >>>>+        address_space_init(&s->conn[i].iommu_as, &s->conn[i].iommu_mr,
> >>>>+                           "iommu-pci");
> >>>>      }
> >>>>      memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
> >>>>@@ -484,7 +512,7 @@ static int s390_pcihost_init(SysBusDevice *dev)
> >>>>      bus = BUS(b);
> >>>>      qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
> >>>>      phb->bus = b;
> >>>>-    QTAILQ_INIT(&s->pending_sei);
> >>>>+
> >>>>      return 0;
> >>>>  }
> >>>>@@ -519,26 +547,6 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
> >>>>  static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
> >>>>                                    DeviceState *dev, Error **errp)
> >>>>  {
> >>>>-    PCIDevice *pci_dev = PCI_DEVICE(dev);
> >>>>-    S390PCIBusDevice *pbdev;
> >>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
> >>>>-                                           ->qbus.parent);
> >>>>-
> >>>>-    pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
> >>>>-
> >>>>-    pbdev->fid = s390_pci_get_pfid(pci_dev);
> >>>>-    pbdev->pdev = pci_dev;
> >>>>-    pbdev->configured = true;
> >>>>-    pbdev->fh = s390_pci_get_pfh(pci_dev);
> >>>>-
> >>>>-    s390_pcihost_setup_msix(pbdev);
> >>>>-
> >>>>-    if (dev->hotplugged) {
> >>>>-        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
> >>>>-                                     pbdev->fh, pbdev->fid);
> >>>>-        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
> >>>>-                                     pbdev->fh, pbdev->fid);
> >>>>-    }
> >>>>      return;
> >>>>  }
> >>>>@@ -546,31 +554,30 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
> >>>>                                      DeviceState *dev, Error **errp)
> >>>>  {
> >>>>      PCIDevice *pci_dev = PCI_DEVICE(dev);
> >>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
> >>>>-                                           ->qbus.parent);
> >>>>-    S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
> >>>>-
> >>>>-    if (pbdev->configured) {
> >>>>-        pbdev->configured = false;
> >>>>-        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
> >>>>-                                     pbdev->fh, pbdev->fid);
> >>>>+    S390PCIBusDevice *pbdev;
> >>>>+    HotplugHandler *hotplug_ctrl;
> >>>>+    S390PCIFacility *f = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>+    S390PCIFacilityClass *k = S390_PCI_FACILITY_GET_CLASS(f);
> >>>>+    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_CLASS(k);
> >>>>+
> >>>>+    /* unplug corresponding zpci device */
> >>>>+    pbdev = s390_pci_find_dev_by_pdev(pci_dev);
> >>>>+    if (pbdev) {
> >>>>+        hotplug_ctrl = pbdev->qdev.parent_bus->hotplug_handler;
> >>>>+        if (hdc->unplug_request) {
> >>>>+            hdc->unplug_request(hotplug_ctrl, &pbdev->qdev, errp);
> >>>>+        }
> >>>>      }
> >>>>-    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
> >>>>-                                 pbdev->fh, pbdev->fid);
> >>>>-    pbdev->fh = 0;
> >>>>-    pbdev->fid = 0;
> >>>>-    pbdev->pdev = NULL;
> >>>>      object_unparent(OBJECT(pci_dev));
> >>>>  }
> >>>>  static void s390_pcihost_class_init(ObjectClass *klass, void *data)
> >>>>  {
> >>>>      SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
> >>>>-    DeviceClass *dc = DEVICE_CLASS(klass);
> >>>>      HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
> >>>>-    dc->cannot_instantiate_with_device_add_yet = true;
> >>>>      k->init = s390_pcihost_init;
> >>>>      hc->plug = s390_pcihost_hot_plug;
> >>>>      hc->unplug = s390_pcihost_hot_unplug;
> >>>>@@ -588,9 +595,156 @@ static const TypeInfo s390_pcihost_info = {
> >>>>      }
> >>>>  };
> >>>>+static void s390_pci_device_hot_plug(HotplugHandler *hotplug_dev,
> >>>>+                                     DeviceState *dev, Error **errp)
> >>>>+{
> >>>>+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> >>>>+
> >>>>+    zpci->configured = true;
> >>>>+
> >>>>+    if (dev->hotplugged) {
> >>>>+        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
> >>>>+                                     zpci->fh, zpci->fid);
> >>>>+        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
> >>>>+                                     zpci->fh, zpci->fid);
> >>>>+    }
> >>>>+}
> >>>>+
> >>>>+static void s390_pci_device_hot_unplug_request(HotplugHandler *hotplug_dev,
> >>>>+                                       DeviceState *dev, Error **errp)
> >>>>+{
> >>>>+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> >>>>+
> >>>>+    if (zpci->configured) {
> >>>>+        zpci->configured = false;
> >>>>+        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
> >>>>+                                     zpci->fh, zpci->fid);
> >>>>+    }
> >>>>+
> >>>>+    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
> >>>>+                                 zpci->fh, zpci->fid);
> >>>>+
> >>>>+    zpci->is_unplugged = true;
> >>>>+}
> >>>>+
> >>>>+static const TypeInfo s390_pci_fac_bus_info = {
> >>>>+    .name = TYPE_S390_PCI_FAC_BUS,
> >>>>+    .parent = TYPE_BUS,
> >>>>+    .instance_size = sizeof(S390PCIFacBus),
> >>>>+};
> >>>>+
> >>>>+static int s390_pci_facility_init(S390PCIFacility *f)
> >>>>+{
> >>>>+    DeviceState *dev = DEVICE(f);
> >>>>+
> >>>>+    QTAILQ_INIT(&f->pending_sei);
> >>>>+    msi_supported = true;
> >>>>+    f->fbus = S390_PCI_FAC_BUS(qbus_create(TYPE_S390_PCI_FAC_BUS, dev, NULL));
> >>>>+    qbus_set_hotplug_handler(BUS(&f->fbus->qbus), DEVICE(dev), NULL);
> >>>>+
> >>>>+    return 0;
> >>>>+}
> >>>>+
> >>>>+static void s390_pci_facility_class_init(ObjectClass *klass, void *data)
> >>>>+{
> >>>>+    S390PCIFacilityClass *k = S390_PCI_FACILITY_CLASS(klass);
> >>>>+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(k);
> >>>>+
> >>>>+    k->init = s390_pci_facility_init;
> >>>>+    hc->plug = s390_pci_device_hot_plug;
> >>>>+    hc->unplug_request = s390_pci_device_hot_unplug_request;
> >>>>+}
> >>>>+
> >>>>+static const TypeInfo s390_pci_facility_info = {
> >>>>+    .name          = TYPE_S390_PCI_FACILITY,
> >>>>+    .parent        = TYPE_SYS_BUS_DEVICE,
> >>>>+    .instance_size = sizeof(S390PCIFacility),
> >>>>+    .class_init    = s390_pci_facility_class_init,
> >>>>+    .class_size    = sizeof(S390PCIFacilityClass),
> >>>>+    .interfaces = (InterfaceInfo[]) {
> >>>>+        { TYPE_HOTPLUG_HANDLER },
> >>>>+        { }
> >>>>+    }
> >>>>+};
> >>>>+
> >>>>+static void s390_pci_device_realize(DeviceState *dev, Error **errp)
> >>>>+{
> >>>>+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> >>>>+    S390PCIBusDevice *tmp;
> >>>>+    S390pciState *s;
> >>>>+    BusChild *kid;
> >>>>+    PCIDevice *pdev;
> >>>>+    int ret;
> >>>>+    S390PCIFacility *f = S390_PCI_FACILITY(
> >>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>+
> >>>>+    ret = pci_qdev_find_device(zpci->pci_id, &pdev);
> >>>>+    if (ret < 0) {
> >>>>+        error_setg(errp, "vfio pci device %s not found", zpci->pci_id);
> >>>>+        return;
> >>>>+    }
> >>>>+
> >>>>+    QTAILQ_FOREACH(kid, &f->fbus->qbus.children, sibling) {
> >>>>+        tmp = (S390PCIBusDevice *)kid->child;
> >>>>+        if (tmp == zpci) {
> >>>>+            continue;
> >>>>+        }
> >>>>+
> >>>>+        if (tmp->fid == zpci->fid || tmp->uid == zpci->uid ||
> >>>>+            !strcmp(tmp->pci_id, zpci->pci_id)) {
> >>>>+            error_setg(errp, "zpci needs unique fid, uid and pci_id");
> >>>>+            return;
> >>>>+        }
> >>>>+    }
> >>>>+
> >>>>+    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pdev)->qbus.parent);
> >>>>+    s->conn[PCI_SLOT(pdev->devfn)].zpci = zpci;
> >>>>+
> >>>>+    zpci->pdev = pdev;
> >>>>+    zpci->fh = zpci->fid | FH_VIRT;
> >>>>+    s390_pcihost_setup_msix(zpci);
> >>>>+}
> >>>>+
> >>>>+static void s390_pci_device_unrealize(DeviceState *dev, Error **errp)
> >>>>+{
> >>>>+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
> >>>>+
> >>>>+    zpci->fh = 0;
> >>>>+    zpci->fid = 0;
> >>>>+    zpci->pdev = NULL;
> >>>>+}
> >>>>+
> >>>>+static Property s390_pci_device_properties[] = {
> >>>>+    DEFINE_PROP_UINT32("fid", S390PCIBusDevice, fid, 0),
> >>>>+    DEFINE_PROP_UINT32("uid", S390PCIBusDevice, uid, 0),
> >>>>+    DEFINE_PROP_STRING("pci_id", S390PCIBusDevice, pci_id),
> >>>>+    DEFINE_PROP_END_OF_LIST(),
> >>>>+};
> >>>>+
> >>>>+static void s390_pci_device_class_init(ObjectClass *klass, void *data)
> >>>>+{
> >>>>+    DeviceClass *dc = DEVICE_CLASS(klass);
> >>>>+
> >>>>+    dc->desc = "s390 pci device";
> >>>>+    dc->bus_type = TYPE_S390_PCI_FAC_BUS;
> >>>>+    dc->realize = s390_pci_device_realize;
> >>>>+    dc->unrealize = s390_pci_device_unrealize;
> >>>>+    dc->props = s390_pci_device_properties;
> >>>>+}
> >>>>+
> >>>>+static const TypeInfo s390_pci_device_type_info = {
> >>>>+    .name = TYPE_S390_PCI_DEVICE,
> >>>>+    .parent = TYPE_DEVICE,
> >>>>+    .instance_size = sizeof(S390PCIBusDevice),
> >>>>+    .class_init = s390_pci_device_class_init,
> >>>>+};
> >>>>+
> >>>>  static void s390_pci_register_types(void)
> >>>>  {
> >>>>      type_register_static(&s390_pcihost_info);
> >>>>+    type_register_static(&s390_pci_facility_info);
> >>>>+    type_register_static(&s390_pci_fac_bus_info);
> >>>>+    type_register_static(&s390_pci_device_type_info);
> >>>>  }
> >>>>  type_init(s390_pci_register_types)
> >>>>diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
> >>>>index 464a92e..5bf3913 100644
> >>>>--- a/hw/s390x/s390-pci-bus.h
> >>>>+++ b/hw/s390x/s390-pci-bus.h
> >>>>@@ -149,6 +149,21 @@ enum ZpciIoatDtype {
> >>>>  #define ZPCI_TABLE_VALID_MASK           0x20
> >>>>  #define ZPCI_TABLE_PROT_MASK            0x200
> >>>>+#define TYPE_S390_PCI_FACILITY "s390-pci-facility"
> >>>>+#define TYPE_S390_PCI_FAC_BUS "s390-pci-fac-bus"
> >>>>+#define TYPE_S390_PCI_DEVICE "zpci"
> >>>>+
> >>>>+#define S390_PCI_FACILITY(obj) \
> >>>>+    OBJECT_CHECK(S390PCIFacility, (obj), TYPE_S390_PCI_FACILITY)
> >>>>+#define S390_PCI_FAC_BUS(obj) \
> >>>>+    OBJECT_CHECK(S390PCIFacBus, (obj), TYPE_S390_PCI_FAC_BUS)
> >>>>+#define S390_PCI_FACILITY_CLASS(klass) \
> >>>>+    OBJECT_CLASS_CHECK(S390PCIFacilityClass, (klass), TYPE_S390_PCI_FACILITY)
> >>>>+#define S390_PCI_DEVICE(obj) \
> >>>>+    OBJECT_CHECK(S390PCIBusDevice, (obj), TYPE_S390_PCI_DEVICE)
> >>>>+#define S390_PCI_FACILITY_GET_CLASS(obj) \
> >>>>+    OBJECT_GET_CLASS(S390PCIFacilityClass, (obj), TYPE_S390_PCI_FACILITY)
> >>>>+
> >>>>  typedef struct SeiContainer {
> >>>>      QTAILQ_ENTRY(SeiContainer) link;
> >>>>      uint32_t fid;
> >>>>@@ -214,12 +229,16 @@ typedef struct S390MsixInfo {
> >>>>  } S390MsixInfo;
> >>>>  typedef struct S390PCIBusDevice {
> >>>>+    DeviceState qdev;
> >>>>      PCIDevice *pdev;
> >>>>      bool configured;
> >>>>+    bool is_unplugged;
> >>>>      bool error_state;
> >>>>      bool lgstg_blocked;
> >>>>      uint32_t fh;
> >>>>      uint32_t fid;
> >>>>+    uint32_t uid;
> >>>>+    char *pci_id;
> >>>>      uint64_t g_iota;
> >>>>      uint64_t pba;
> >>>>      uint64_t pal;
> >>>>@@ -229,21 +248,42 @@ typedef struct S390PCIBusDevice {
> >>>>      uint8_t sum;
> >>>>      S390MsixInfo msix;
> >>>>      AdapterRoutes routes;
> >>>>-    AddressSpace as;
> >>>>-    MemoryRegion mr;
> >>>>+    QLIST_ENTRY(S390PCIDevice) entry;
> >>>>  } S390PCIBusDevice;
> >>>>+typedef struct S390PCIDeviceConn {
> >>>>+    S390PCIBusDevice *zpci;
> >>>>+    AddressSpace iommu_as;
> >>>>+    MemoryRegion iommu_mr;
> >>>>+} S390PCIDeviceConn;
> >>>>+
> >>>>  typedef struct S390pciState {
> >>>>      PCIHostState parent_obj;
> >>>>-    S390PCIBusDevice pbdev[PCI_SLOT_MAX];
> >>>>+    S390PCIDeviceConn conn[PCI_SLOT_MAX];
> >>>>      AddressSpace msix_notify_as;
> >>>>      MemoryRegion msix_notify_mr;
> >>>>-    QTAILQ_HEAD(, SeiContainer) pending_sei;
> >>>>  } S390pciState;
> >>>>+typedef struct S390PCIFacBus {
> >>>>+    BusState qbus;
> >>>>+} S390PCIFacBus;
> >>>>+
> >>>>+typedef struct S390PCIFacility {
> >>>>+    SysBusDevice parent_obj;
> >>>>+    S390PCIFacBus *fbus;
> >>>>+    QTAILQ_HEAD(, SeiContainer) pending_sei;
> >>>>+} S390PCIFacility;
> >>>>+
> >>>>+typedef struct S390PCIFacilityClass {
> >>>>+    DeviceClass parent_class;
> >>>>+    int (*init)(S390PCIFacility *f);
> >>>>+} S390PCIFacilityClass;
> >>>>+
> >>>>  int chsc_sei_nt2_get_event(void *res);
> >>>>  int chsc_sei_nt2_have_event(void);
> >>>>  void s390_pci_sclp_configure(int configure, SCCB *sccb);
> >>>>+void s390_pci_device_enable(S390PCIBusDevice *zpci);
> >>>>+void s390_pci_device_disable(S390PCIBusDevice *zpci);
> >>>>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
> >>>>  S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
> >>>>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
> >>>>diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
> >>>>index f9151a9..2977e9c 100644
> >>>>--- a/hw/s390x/s390-pci-inst.c
> >>>>+++ b/hw/s390x/s390-pci-inst.c
> >>>>@@ -208,12 +208,12 @@ int clp_service_call(S390CPU *cpu, uint8_t r2)
> >>>>          switch (reqsetpci->oc) {
> >>>>          case CLP_SET_ENABLE_PCI_FN:
> >>>>-            pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET;
> >>>>+            s390_pci_device_enable(pbdev);
> >>>>              stl_p(&ressetpci->fh, pbdev->fh);
> >>>>              stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
> >>>>              break;
> >>>>          case CLP_SET_DISABLE_PCI_FN:
> >>>>-            pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>>>+            s390_pci_device_disable(pbdev);
> >>>>              pbdev->error_state = false;
> >>>>              pbdev->lgstg_blocked = false;
> >>>>              stl_p(&ressetpci->fh, pbdev->fh);
> >>>>diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> >>>>index a3b14b5..56940e8 100644
> >>>>--- a/hw/s390x/s390-virtio-ccw.c
> >>>>+++ b/hw/s390x/s390-virtio-ccw.c
> >>>>@@ -125,8 +125,8 @@ static void ccw_init(MachineState *machine)
> >>>>                        machine->initrd_filename, "s390-ccw.img", true);
> >>>>      s390_flic_init();
> >>>>-    dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
> >>>>-    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
> >>>>+    dev = qdev_create(NULL, TYPE_S390_PCI_FACILITY);
> >>>>+    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_FACILITY,
> >>>>                                OBJECT(dev), NULL);
> >>>>      qdev_init_nofail(dev);
> >>>>@@ -173,6 +173,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
> >>>>      mc->max_cpus = 255;
> >>>>      mc->hot_add_cpu = ccw_hot_add_cpu;
> >>>>      mc->is_default = 1;
> >>>>+    mc->has_dynamic_sysbus = true;
> >>>>      nc->nmi_monitor_handler = s390_nmi;
> >>>>  }
> >>>>-- 
> >>>>1.9.3
> >>>>
> >>>>
Hong Bo Li July 1, 2015, 9:13 a.m. UTC | #6
On 7/1/2015 16:05, Michael S. Tsirkin wrote:
> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>> This patch introduce a new facility(and bus)
>>>>>> to hold devices representing information actually
>>>>>> provided by s390 firmware and I/O configuration.
>>>>>> usage example:
>>>>>> -device s390-pcihost
>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>
>>>>>> The first line will create a s390 pci host bridge
>>>>>> and init the root bus. The second line will create
>>>>>> a standard vfio pci device, and attach it to the
>>>>>> root bus. These are similiar to the standard process
>>>>>> to define a pci device on other platform.
>>>>>>
>>>>>> The third line will create a s390 pci device to
>>>>>> store s390 specific information, and references
>>>>>> the corresponding vfio pci device via device id.
>>>>>> We create a s390 pci facility bus to hold all the
>>>>>> zpci devices.
>>>>>>
>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>> one thing below
>>>>>
>>>>>> ---
>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>> index 560b66a..d5e7b2e 100644
>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>       PciCcdfErr *eccdf;
>>>>>>       int rc = 1;
>>>>>>       SeiContainer *sei_cont;
>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>       if (!s) {
>>>>>>           return rc;
>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>   {
>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>       if (!s) {
>>>>>>           return 0;
>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>   }
>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>> +{
>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>> +}
>>>>>> +
>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>> +{
>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>> +    if (zpci->is_unplugged)
>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>> +}
>>>>>> +
>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>   {
>>>>>>       S390PCIBusDevice *pbdev;
>>>>>> -    int i;
>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>> +    BusChild *kid;
>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>       if (!s) {
>>>>>>           return NULL;
>>>>>>       }
>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>> -        pbdev = &s->pbdev[i];
>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>> +        if (pbdev->fid == fid) {
>>>>>>               return pbdev;
>>>>>>           }
>>>>>>       }
>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>       return;
>>>>>>   }
>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>> -{
>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>> -}
>>>>>> -
>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>> -{
>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>> -}
>>>>>> -
>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>   {
>>>>>>       S390PCIBusDevice *pbdev;
>>>>>> -    int i;
>>>>>> -    int j = 0;
>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>> +    BusChild *kid;
>>>>>> +    int i = 0;
>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>       if (!s) {
>>>>>>           return NULL;
>>>>>>       }
>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>> -        pbdev = &s->pbdev[i];
>>>>>> -
>>>>>> -        if (pbdev->fh == 0) {
>>>>>> -            continue;
>>>>>> -        }
>>>>>> -
>>>>>> -        if (j == idx) {
>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>> +        if (i == idx) {
>>>>>>               return pbdev;
>>>>>>           }
>>>>>> -        j++;
>>>>>> +        i++;
>>>>>>       }
>>>>>>       return NULL;
>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>> lookups: more code to achieve the same effect?
>>>> Thank you Michael.
>>>> I do the change due to two reasons:
>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>> does not work.
>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>> s390 specific information, so we can attach all the zpci devices to a
>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>> so the order of them does not matter.
>>> But you make this order guest-visible which seems wrong.
>>>
>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>> devices, and will
>> create a root s390 pci bus for each device.  So the order has no relation
>> with the pci
>> topology on guest.
>>
>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>> will use a
>> resume token to get all the zpci devices. For example, first time we return
>> 32 zpci
>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>> token
>> is used to store the beginning of zpci devices that will be returned to
>> guest at next time.
>>
>> So, if we change the order of the zpci device on s390 facility bus, it may
>> change the
>> "batch" in which this device be returned to guest. But this will not change
>> the  pci
>> topology on guest.
> Yes but that's still guest visible, and will break
> for example if guest is migrated between qemu instances
> where list order is different precisely when
> it's enumerating the bus.
>
Yes, and the list order is not the only s390 specific information that 
exposed to
guest. Besides that,  we need to migrate all other zpci information. For 
now,
we have no plan to support zpci migration yet.

>
>>>>>> @@ -167,16 +164,16 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
>>>>>>   {
>>>>>>       S390PCIBusDevice *pbdev;
>>>>>> -    int i;
>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>> +    BusChild *kid;
>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>       if (!s || !fh) {
>>>>>>           return NULL;
>>>>>>       }
>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>> -        pbdev = &s->pbdev[i];
>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>           if (pbdev->fh == fh) {
>>>>>>               return pbdev;
>>>>>>           }
>>>>>> @@ -185,12 +182,33 @@ S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
>>>>>>       return NULL;
>>>>>>   }
>>>>>> +static S390PCIBusDevice *s390_pci_find_dev_by_pdev(PCIDevice *pdev)
>>>>>> +{
>>>>>> +    S390PCIBusDevice *pbdev;
>>>>>> +    BusChild *kid;
>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>> +
>>>>>> +    if (!s || !pdev) {
>>>>>> +        return NULL;
>>>>>> +    }
>>>>>> +
>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>> +        if (pbdev->pdev == pdev) {
>>>>>> +            return pbdev;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    return NULL;
>>>>>> +}
>>>>>> +
>>>>>>   static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
>>>>>>                                       uint32_t fid, uint64_t faddr, uint32_t e)
>>>>>>   {
>>>>>>       SeiContainer *sei_cont;
>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>       if (!s) {
>>>>>>           return;
>>>>>> @@ -308,7 +326,10 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>>>>>   {
>>>>>>       uint64_t pte;
>>>>>>       uint32_t flags;
>>>>>> -    S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr);
>>>>>> +    S390PCIDeviceConn *conn = container_of(iommu, S390PCIDeviceConn,
>>>>>> +                                           iommu_mr);
>>>>>> +    S390PCIBusDevice *pbdev = conn->zpci;
>>>>>> +
>>>>>>       S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)
>>>>>>                                              ->qbus.parent);
>>>>>>       IOMMUTLBEntry ret = {
>>>>>> @@ -319,8 +340,14 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>>>>>           .perm = IOMMU_NONE,
>>>>>>       };
>>>>>> +    if (!pbdev) {
>>>>>> +        return ret;
>>>>>> +    }
>>>>>> +
>>>>>>       DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
>>>>>> +    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent);
>>>>>> +
>>>>>>       /* s390 does not have an APIC mapped to main storage so we use
>>>>>>        * a separate AddressSpace only for msix notifications
>>>>>>        */
>>>>>> @@ -382,7 +409,7 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>>>>>   {
>>>>>>       S390pciState *s = opaque;
>>>>>> -    return &s->pbdev[PCI_SLOT(devfn)].as;
>>>>>> +    return &s->conn[PCI_SLOT(devfn)].iommu_as;
>>>>>>   }
>>>>>>   static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
>>>>>> @@ -455,9 +482,10 @@ static void s390_pcihost_init_as(S390pciState *s)
>>>>>>       int i;
>>>>>>       for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>> -        memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s),
>>>>>> +        memory_region_init_iommu(&s->conn[i].iommu_mr, OBJECT(s),
>>>>>>                                    &s390_iommu_ops, "iommu-s390", UINT64_MAX);
>>>>>> -        address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci");
>>>>>> +        address_space_init(&s->conn[i].iommu_as, &s->conn[i].iommu_mr,
>>>>>> +                           "iommu-pci");
>>>>>>       }
>>>>>>       memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
>>>>>> @@ -484,7 +512,7 @@ static int s390_pcihost_init(SysBusDevice *dev)
>>>>>>       bus = BUS(b);
>>>>>>       qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
>>>>>>       phb->bus = b;
>>>>>> -    QTAILQ_INIT(&s->pending_sei);
>>>>>> +
>>>>>>       return 0;
>>>>>>   }
>>>>>> @@ -519,26 +547,6 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
>>>>>>   static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
>>>>>>                                     DeviceState *dev, Error **errp)
>>>>>>   {
>>>>>> -    PCIDevice *pci_dev = PCI_DEVICE(dev);
>>>>>> -    S390PCIBusDevice *pbdev;
>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
>>>>>> -                                           ->qbus.parent);
>>>>>> -
>>>>>> -    pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
>>>>>> -
>>>>>> -    pbdev->fid = s390_pci_get_pfid(pci_dev);
>>>>>> -    pbdev->pdev = pci_dev;
>>>>>> -    pbdev->configured = true;
>>>>>> -    pbdev->fh = s390_pci_get_pfh(pci_dev);
>>>>>> -
>>>>>> -    s390_pcihost_setup_msix(pbdev);
>>>>>> -
>>>>>> -    if (dev->hotplugged) {
>>>>>> -        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
>>>>>> -                                     pbdev->fh, pbdev->fid);
>>>>>> -        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
>>>>>> -                                     pbdev->fh, pbdev->fid);
>>>>>> -    }
>>>>>>       return;
>>>>>>   }
>>>>>> @@ -546,31 +554,30 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
>>>>>>                                       DeviceState *dev, Error **errp)
>>>>>>   {
>>>>>>       PCIDevice *pci_dev = PCI_DEVICE(dev);
>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
>>>>>> -                                           ->qbus.parent);
>>>>>> -    S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
>>>>>> -
>>>>>> -    if (pbdev->configured) {
>>>>>> -        pbdev->configured = false;
>>>>>> -        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
>>>>>> -                                     pbdev->fh, pbdev->fid);
>>>>>> +    S390PCIBusDevice *pbdev;
>>>>>> +    HotplugHandler *hotplug_ctrl;
>>>>>> +    S390PCIFacility *f = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>> +    S390PCIFacilityClass *k = S390_PCI_FACILITY_GET_CLASS(f);
>>>>>> +    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_CLASS(k);
>>>>>> +
>>>>>> +    /* unplug corresponding zpci device */
>>>>>> +    pbdev = s390_pci_find_dev_by_pdev(pci_dev);
>>>>>> +    if (pbdev) {
>>>>>> +        hotplug_ctrl = pbdev->qdev.parent_bus->hotplug_handler;
>>>>>> +        if (hdc->unplug_request) {
>>>>>> +            hdc->unplug_request(hotplug_ctrl, &pbdev->qdev, errp);
>>>>>> +        }
>>>>>>       }
>>>>>> -    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
>>>>>> -                                 pbdev->fh, pbdev->fid);
>>>>>> -    pbdev->fh = 0;
>>>>>> -    pbdev->fid = 0;
>>>>>> -    pbdev->pdev = NULL;
>>>>>>       object_unparent(OBJECT(pci_dev));
>>>>>>   }
>>>>>>   static void s390_pcihost_class_init(ObjectClass *klass, void *data)
>>>>>>   {
>>>>>>       SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
>>>>>> -    DeviceClass *dc = DEVICE_CLASS(klass);
>>>>>>       HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
>>>>>> -    dc->cannot_instantiate_with_device_add_yet = true;
>>>>>>       k->init = s390_pcihost_init;
>>>>>>       hc->plug = s390_pcihost_hot_plug;
>>>>>>       hc->unplug = s390_pcihost_hot_unplug;
>>>>>> @@ -588,9 +595,156 @@ static const TypeInfo s390_pcihost_info = {
>>>>>>       }
>>>>>>   };
>>>>>> +static void s390_pci_device_hot_plug(HotplugHandler *hotplug_dev,
>>>>>> +                                     DeviceState *dev, Error **errp)
>>>>>> +{
>>>>>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>>>>>> +
>>>>>> +    zpci->configured = true;
>>>>>> +
>>>>>> +    if (dev->hotplugged) {
>>>>>> +        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
>>>>>> +                                     zpci->fh, zpci->fid);
>>>>>> +        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
>>>>>> +                                     zpci->fh, zpci->fid);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +static void s390_pci_device_hot_unplug_request(HotplugHandler *hotplug_dev,
>>>>>> +                                       DeviceState *dev, Error **errp)
>>>>>> +{
>>>>>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>>>>>> +
>>>>>> +    if (zpci->configured) {
>>>>>> +        zpci->configured = false;
>>>>>> +        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
>>>>>> +                                     zpci->fh, zpci->fid);
>>>>>> +    }
>>>>>> +
>>>>>> +    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
>>>>>> +                                 zpci->fh, zpci->fid);
>>>>>> +
>>>>>> +    zpci->is_unplugged = true;
>>>>>> +}
>>>>>> +
>>>>>> +static const TypeInfo s390_pci_fac_bus_info = {
>>>>>> +    .name = TYPE_S390_PCI_FAC_BUS,
>>>>>> +    .parent = TYPE_BUS,
>>>>>> +    .instance_size = sizeof(S390PCIFacBus),
>>>>>> +};
>>>>>> +
>>>>>> +static int s390_pci_facility_init(S390PCIFacility *f)
>>>>>> +{
>>>>>> +    DeviceState *dev = DEVICE(f);
>>>>>> +
>>>>>> +    QTAILQ_INIT(&f->pending_sei);
>>>>>> +    msi_supported = true;
>>>>>> +    f->fbus = S390_PCI_FAC_BUS(qbus_create(TYPE_S390_PCI_FAC_BUS, dev, NULL));
>>>>>> +    qbus_set_hotplug_handler(BUS(&f->fbus->qbus), DEVICE(dev), NULL);
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static void s390_pci_facility_class_init(ObjectClass *klass, void *data)
>>>>>> +{
>>>>>> +    S390PCIFacilityClass *k = S390_PCI_FACILITY_CLASS(klass);
>>>>>> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(k);
>>>>>> +
>>>>>> +    k->init = s390_pci_facility_init;
>>>>>> +    hc->plug = s390_pci_device_hot_plug;
>>>>>> +    hc->unplug_request = s390_pci_device_hot_unplug_request;
>>>>>> +}
>>>>>> +
>>>>>> +static const TypeInfo s390_pci_facility_info = {
>>>>>> +    .name          = TYPE_S390_PCI_FACILITY,
>>>>>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>>>>>> +    .instance_size = sizeof(S390PCIFacility),
>>>>>> +    .class_init    = s390_pci_facility_class_init,
>>>>>> +    .class_size    = sizeof(S390PCIFacilityClass),
>>>>>> +    .interfaces = (InterfaceInfo[]) {
>>>>>> +        { TYPE_HOTPLUG_HANDLER },
>>>>>> +        { }
>>>>>> +    }
>>>>>> +};
>>>>>> +
>>>>>> +static void s390_pci_device_realize(DeviceState *dev, Error **errp)
>>>>>> +{
>>>>>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>>>>>> +    S390PCIBusDevice *tmp;
>>>>>> +    S390pciState *s;
>>>>>> +    BusChild *kid;
>>>>>> +    PCIDevice *pdev;
>>>>>> +    int ret;
>>>>>> +    S390PCIFacility *f = S390_PCI_FACILITY(
>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>> +
>>>>>> +    ret = pci_qdev_find_device(zpci->pci_id, &pdev);
>>>>>> +    if (ret < 0) {
>>>>>> +        error_setg(errp, "vfio pci device %s not found", zpci->pci_id);
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>> +    QTAILQ_FOREACH(kid, &f->fbus->qbus.children, sibling) {
>>>>>> +        tmp = (S390PCIBusDevice *)kid->child;
>>>>>> +        if (tmp == zpci) {
>>>>>> +            continue;
>>>>>> +        }
>>>>>> +
>>>>>> +        if (tmp->fid == zpci->fid || tmp->uid == zpci->uid ||
>>>>>> +            !strcmp(tmp->pci_id, zpci->pci_id)) {
>>>>>> +            error_setg(errp, "zpci needs unique fid, uid and pci_id");
>>>>>> +            return;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pdev)->qbus.parent);
>>>>>> +    s->conn[PCI_SLOT(pdev->devfn)].zpci = zpci;
>>>>>> +
>>>>>> +    zpci->pdev = pdev;
>>>>>> +    zpci->fh = zpci->fid | FH_VIRT;
>>>>>> +    s390_pcihost_setup_msix(zpci);
>>>>>> +}
>>>>>> +
>>>>>> +static void s390_pci_device_unrealize(DeviceState *dev, Error **errp)
>>>>>> +{
>>>>>> +    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
>>>>>> +
>>>>>> +    zpci->fh = 0;
>>>>>> +    zpci->fid = 0;
>>>>>> +    zpci->pdev = NULL;
>>>>>> +}
>>>>>> +
>>>>>> +static Property s390_pci_device_properties[] = {
>>>>>> +    DEFINE_PROP_UINT32("fid", S390PCIBusDevice, fid, 0),
>>>>>> +    DEFINE_PROP_UINT32("uid", S390PCIBusDevice, uid, 0),
>>>>>> +    DEFINE_PROP_STRING("pci_id", S390PCIBusDevice, pci_id),
>>>>>> +    DEFINE_PROP_END_OF_LIST(),
>>>>>> +};
>>>>>> +
>>>>>> +static void s390_pci_device_class_init(ObjectClass *klass, void *data)
>>>>>> +{
>>>>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>>>>> +
>>>>>> +    dc->desc = "s390 pci device";
>>>>>> +    dc->bus_type = TYPE_S390_PCI_FAC_BUS;
>>>>>> +    dc->realize = s390_pci_device_realize;
>>>>>> +    dc->unrealize = s390_pci_device_unrealize;
>>>>>> +    dc->props = s390_pci_device_properties;
>>>>>> +}
>>>>>> +
>>>>>> +static const TypeInfo s390_pci_device_type_info = {
>>>>>> +    .name = TYPE_S390_PCI_DEVICE,
>>>>>> +    .parent = TYPE_DEVICE,
>>>>>> +    .instance_size = sizeof(S390PCIBusDevice),
>>>>>> +    .class_init = s390_pci_device_class_init,
>>>>>> +};
>>>>>> +
>>>>>>   static void s390_pci_register_types(void)
>>>>>>   {
>>>>>>       type_register_static(&s390_pcihost_info);
>>>>>> +    type_register_static(&s390_pci_facility_info);
>>>>>> +    type_register_static(&s390_pci_fac_bus_info);
>>>>>> +    type_register_static(&s390_pci_device_type_info);
>>>>>>   }
>>>>>>   type_init(s390_pci_register_types)
>>>>>> diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
>>>>>> index 464a92e..5bf3913 100644
>>>>>> --- a/hw/s390x/s390-pci-bus.h
>>>>>> +++ b/hw/s390x/s390-pci-bus.h
>>>>>> @@ -149,6 +149,21 @@ enum ZpciIoatDtype {
>>>>>>   #define ZPCI_TABLE_VALID_MASK           0x20
>>>>>>   #define ZPCI_TABLE_PROT_MASK            0x200
>>>>>> +#define TYPE_S390_PCI_FACILITY "s390-pci-facility"
>>>>>> +#define TYPE_S390_PCI_FAC_BUS "s390-pci-fac-bus"
>>>>>> +#define TYPE_S390_PCI_DEVICE "zpci"
>>>>>> +
>>>>>> +#define S390_PCI_FACILITY(obj) \
>>>>>> +    OBJECT_CHECK(S390PCIFacility, (obj), TYPE_S390_PCI_FACILITY)
>>>>>> +#define S390_PCI_FAC_BUS(obj) \
>>>>>> +    OBJECT_CHECK(S390PCIFacBus, (obj), TYPE_S390_PCI_FAC_BUS)
>>>>>> +#define S390_PCI_FACILITY_CLASS(klass) \
>>>>>> +    OBJECT_CLASS_CHECK(S390PCIFacilityClass, (klass), TYPE_S390_PCI_FACILITY)
>>>>>> +#define S390_PCI_DEVICE(obj) \
>>>>>> +    OBJECT_CHECK(S390PCIBusDevice, (obj), TYPE_S390_PCI_DEVICE)
>>>>>> +#define S390_PCI_FACILITY_GET_CLASS(obj) \
>>>>>> +    OBJECT_GET_CLASS(S390PCIFacilityClass, (obj), TYPE_S390_PCI_FACILITY)
>>>>>> +
>>>>>>   typedef struct SeiContainer {
>>>>>>       QTAILQ_ENTRY(SeiContainer) link;
>>>>>>       uint32_t fid;
>>>>>> @@ -214,12 +229,16 @@ typedef struct S390MsixInfo {
>>>>>>   } S390MsixInfo;
>>>>>>   typedef struct S390PCIBusDevice {
>>>>>> +    DeviceState qdev;
>>>>>>       PCIDevice *pdev;
>>>>>>       bool configured;
>>>>>> +    bool is_unplugged;
>>>>>>       bool error_state;
>>>>>>       bool lgstg_blocked;
>>>>>>       uint32_t fh;
>>>>>>       uint32_t fid;
>>>>>> +    uint32_t uid;
>>>>>> +    char *pci_id;
>>>>>>       uint64_t g_iota;
>>>>>>       uint64_t pba;
>>>>>>       uint64_t pal;
>>>>>> @@ -229,21 +248,42 @@ typedef struct S390PCIBusDevice {
>>>>>>       uint8_t sum;
>>>>>>       S390MsixInfo msix;
>>>>>>       AdapterRoutes routes;
>>>>>> -    AddressSpace as;
>>>>>> -    MemoryRegion mr;
>>>>>> +    QLIST_ENTRY(S390PCIDevice) entry;
>>>>>>   } S390PCIBusDevice;
>>>>>> +typedef struct S390PCIDeviceConn {
>>>>>> +    S390PCIBusDevice *zpci;
>>>>>> +    AddressSpace iommu_as;
>>>>>> +    MemoryRegion iommu_mr;
>>>>>> +} S390PCIDeviceConn;
>>>>>> +
>>>>>>   typedef struct S390pciState {
>>>>>>       PCIHostState parent_obj;
>>>>>> -    S390PCIBusDevice pbdev[PCI_SLOT_MAX];
>>>>>> +    S390PCIDeviceConn conn[PCI_SLOT_MAX];
>>>>>>       AddressSpace msix_notify_as;
>>>>>>       MemoryRegion msix_notify_mr;
>>>>>> -    QTAILQ_HEAD(, SeiContainer) pending_sei;
>>>>>>   } S390pciState;
>>>>>> +typedef struct S390PCIFacBus {
>>>>>> +    BusState qbus;
>>>>>> +} S390PCIFacBus;
>>>>>> +
>>>>>> +typedef struct S390PCIFacility {
>>>>>> +    SysBusDevice parent_obj;
>>>>>> +    S390PCIFacBus *fbus;
>>>>>> +    QTAILQ_HEAD(, SeiContainer) pending_sei;
>>>>>> +} S390PCIFacility;
>>>>>> +
>>>>>> +typedef struct S390PCIFacilityClass {
>>>>>> +    DeviceClass parent_class;
>>>>>> +    int (*init)(S390PCIFacility *f);
>>>>>> +} S390PCIFacilityClass;
>>>>>> +
>>>>>>   int chsc_sei_nt2_get_event(void *res);
>>>>>>   int chsc_sei_nt2_have_event(void);
>>>>>>   void s390_pci_sclp_configure(int configure, SCCB *sccb);
>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci);
>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci);
>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
>>>>>> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
>>>>>> index f9151a9..2977e9c 100644
>>>>>> --- a/hw/s390x/s390-pci-inst.c
>>>>>> +++ b/hw/s390x/s390-pci-inst.c
>>>>>> @@ -208,12 +208,12 @@ int clp_service_call(S390CPU *cpu, uint8_t r2)
>>>>>>           switch (reqsetpci->oc) {
>>>>>>           case CLP_SET_ENABLE_PCI_FN:
>>>>>> -            pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>> +            s390_pci_device_enable(pbdev);
>>>>>>               stl_p(&ressetpci->fh, pbdev->fh);
>>>>>>               stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
>>>>>>               break;
>>>>>>           case CLP_SET_DISABLE_PCI_FN:
>>>>>> -            pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>> +            s390_pci_device_disable(pbdev);
>>>>>>               pbdev->error_state = false;
>>>>>>               pbdev->lgstg_blocked = false;
>>>>>>               stl_p(&ressetpci->fh, pbdev->fh);
>>>>>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>>>>>> index a3b14b5..56940e8 100644
>>>>>> --- a/hw/s390x/s390-virtio-ccw.c
>>>>>> +++ b/hw/s390x/s390-virtio-ccw.c
>>>>>> @@ -125,8 +125,8 @@ static void ccw_init(MachineState *machine)
>>>>>>                         machine->initrd_filename, "s390-ccw.img", true);
>>>>>>       s390_flic_init();
>>>>>> -    dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
>>>>>> -    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
>>>>>> +    dev = qdev_create(NULL, TYPE_S390_PCI_FACILITY);
>>>>>> +    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_FACILITY,
>>>>>>                                 OBJECT(dev), NULL);
>>>>>>       qdev_init_nofail(dev);
>>>>>> @@ -173,6 +173,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
>>>>>>       mc->max_cpus = 255;
>>>>>>       mc->hot_add_cpu = ccw_hot_add_cpu;
>>>>>>       mc->is_default = 1;
>>>>>> +    mc->has_dynamic_sysbus = true;
>>>>>>       nc->nmi_monitor_handler = s390_nmi;
>>>>>>   }
>>>>>> -- 
>>>>>> 1.9.3
>>>>>>
>>>>>>
Michael S. Tsirkin July 1, 2015, 9:22 a.m. UTC | #7
On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
> 
> 
> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
> >On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
> >>
> >>On 7/1/2015 14:22, Michael S. Tsirkin wrote:
> >>>On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
> >>>>On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> >>>>>On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> >>>>>>This patch introduce a new facility(and bus)
> >>>>>>to hold devices representing information actually
> >>>>>>provided by s390 firmware and I/O configuration.
> >>>>>>usage example:
> >>>>>>-device s390-pcihost
> >>>>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>>>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >>>>>>
> >>>>>>The first line will create a s390 pci host bridge
> >>>>>>and init the root bus. The second line will create
> >>>>>>a standard vfio pci device, and attach it to the
> >>>>>>root bus. These are similiar to the standard process
> >>>>>>to define a pci device on other platform.
> >>>>>>
> >>>>>>The third line will create a s390 pci device to
> >>>>>>store s390 specific information, and references
> >>>>>>the corresponding vfio pci device via device id.
> >>>>>>We create a s390 pci facility bus to hold all the
> >>>>>>zpci devices.
> >>>>>>
> >>>>>>Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> >>>>>It's mostly up to s390 maintainers, but I'd like to note
> >>>>>one thing below
> >>>>>
> >>>>>>---
> >>>>>>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
> >>>>>>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
> >>>>>>  hw/s390x/s390-pci-inst.c   |   4 +-
> >>>>>>  hw/s390x/s390-virtio-ccw.c |   5 +-
> >>>>>>  4 files changed, 283 insertions(+), 88 deletions(-)
> >>>>>>
> >>>>>>diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> >>>>>>index 560b66a..d5e7b2e 100644
> >>>>>>--- a/hw/s390x/s390-pci-bus.c
> >>>>>>+++ b/hw/s390x/s390-pci-bus.c
> >>>>>>@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>      PciCcdfErr *eccdf;
> >>>>>>      int rc = 1;
> >>>>>>      SeiContainer *sei_cont;
> >>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>      if (!s) {
> >>>>>>          return rc;
> >>>>>>@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>  int chsc_sei_nt2_have_event(void)
> >>>>>>  {
> >>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>      if (!s) {
> >>>>>>          return 0;
> >>>>>>@@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
> >>>>>>      return !QTAILQ_EMPTY(&s->pending_sei);
> >>>>>>  }
> >>>>>>+void s390_pci_device_enable(S390PCIBusDevice *zpci)
> >>>>>>+{
> >>>>>>+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> >>>>>>+}
> >>>>>>+
> >>>>>>+void s390_pci_device_disable(S390PCIBusDevice *zpci)
> >>>>>>+{
> >>>>>>+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>>>>>+    if (zpci->is_unplugged)
> >>>>>>+        object_unparent(OBJECT(zpci));
> >>>>>>+}
> >>>>>>+
> >>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
> >>>>>>  {
> >>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>-    int i;
> >>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>+    BusChild *kid;
> >>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>      if (!s) {
> >>>>>>          return NULL;
> >>>>>>      }
> >>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> >>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>+        if (pbdev->fid == fid) {
> >>>>>>              return pbdev;
> >>>>>>          }
> >>>>>>      }
> >>>>>>@@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
> >>>>>>      return;
> >>>>>>  }
> >>>>>>-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> >>>>>>-{
> >>>>>>-    return PCI_SLOT(pdev->devfn);
> >>>>>>-}
> >>>>>>-
> >>>>>>-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> >>>>>>-{
> >>>>>>-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> >>>>>>-}
> >>>>>>-
> >>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>>>>>  {
> >>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>-    int i;
> >>>>>>-    int j = 0;
> >>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>+    BusChild *kid;
> >>>>>>+    int i = 0;
> >>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>      if (!s) {
> >>>>>>          return NULL;
> >>>>>>      }
> >>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>-
> >>>>>>-        if (pbdev->fh == 0) {
> >>>>>>-            continue;
> >>>>>>-        }
> >>>>>>-
> >>>>>>-        if (j == idx) {
> >>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>+        if (i == idx) {
> >>>>>>              return pbdev;
> >>>>>>          }
> >>>>>>-        j++;
> >>>>>>+        i++;
> >>>>>>      }
> >>>>>>      return NULL;
> >>>>>This relies on the order of children on the qbus, that's wrong I think.
> >>>>>Generally I'm not sure why do you convert all slot lookups to child
> >>>>>lookups: more code to achieve the same effect?
> >>>>Thank you Michael.
> >>>>I do the change due to two reasons:
> >>>>1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
> >>>>slots at most. So when it comes to multiple s390 pci root buses, the old code
> >>>>does not work.
> >>>>2. Now the zpci device "S390PCIBusDevice" is only a structure to store
> >>>>s390 specific information, so we can attach all the zpci devices to a
> >>>>s390 pci facility bus. Since these zpci device has no relation with the "slot",
> >>>>so the order of them does not matter.
> >>>But you make this order guest-visible which seems wrong.
> >>>
> >>The guest uses a s390 specific "list pci" instruction to get all the zpci
> >>devices, and will
> >>create a root s390 pci bus for each device.  So the order has no relation
> >>with the pci
> >>topology on guest.
> >>
> >>If we assign  too many zpci devices to one guest, the "list pci" instruction
> >>will use a
> >>resume token to get all the zpci devices. For example, first time we return
> >>32 zpci
> >>devices to guest. Next time we'll return another 32 zpci devices. The resume
> >>token
> >>is used to store the beginning of zpci devices that will be returned to
> >>guest at next time.
> >>
> >>So, if we change the order of the zpci device on s390 facility bus, it may
> >>change the
> >>"batch" in which this device be returned to guest. But this will not change
> >>the  pci
> >>topology on guest.
> >Yes but that's still guest visible, and will break
> >for example if guest is migrated between qemu instances
> >where list order is different precisely when
> >it's enumerating the bus.
> >
> Yes, and the list order is not the only s390 specific information that
> exposed to
> guest. Besides that,  we need to migrate all other zpci information. For
> now,
> we have no plan to support zpci migration yet.

BTW how will hotplug work? If it happens while guest
enumerates the bus the naturally all index values
become invalid.

Just don't expose internal qdev data structures to guest.
It's not by chance that we don't have a look up by index
capability, it's an attempt to enfoce sane usage.
You are misusing the API with your hack.

PCI has standard ways to enumerate the bus, maybe you
should emulate it.  Or find some other way that works.
The idea to poke at s->fbus->qbus and count things there
is a bad one.
Hong Bo Li July 1, 2015, 10:04 a.m. UTC | #8
On 7/1/2015 17:22, Michael S. Tsirkin wrote:
> On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
>>> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>>>> This patch introduce a new facility(and bus)
>>>>>>>> to hold devices representing information actually
>>>>>>>> provided by s390 firmware and I/O configuration.
>>>>>>>> usage example:
>>>>>>>> -device s390-pcihost
>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>>>
>>>>>>>> The first line will create a s390 pci host bridge
>>>>>>>> and init the root bus. The second line will create
>>>>>>>> a standard vfio pci device, and attach it to the
>>>>>>>> root bus. These are similiar to the standard process
>>>>>>>> to define a pci device on other platform.
>>>>>>>>
>>>>>>>> The third line will create a s390 pci device to
>>>>>>>> store s390 specific information, and references
>>>>>>>> the corresponding vfio pci device via device id.
>>>>>>>> We create a s390 pci facility bus to hold all the
>>>>>>>> zpci devices.
>>>>>>>>
>>>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>>>> one thing below
>>>>>>>
>>>>>>>> ---
>>>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>>>> index 560b66a..d5e7b2e 100644
>>>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>       PciCcdfErr *eccdf;
>>>>>>>>       int rc = 1;
>>>>>>>>       SeiContainer *sei_cont;
>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>       if (!s) {
>>>>>>>>           return rc;
>>>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>>>   {
>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>       if (!s) {
>>>>>>>>           return 0;
>>>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>>>   }
>>>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>>>> +{
>>>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>>>> +{
>>>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>>>> +    if (zpci->is_unplugged)
>>>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>>>   {
>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>> -    int i;
>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>> +    BusChild *kid;
>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>       if (!s) {
>>>>>>>>           return NULL;
>>>>>>>>       }
>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>> +        if (pbdev->fid == fid) {
>>>>>>>>               return pbdev;
>>>>>>>>           }
>>>>>>>>       }
>>>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>>>       return;
>>>>>>>>   }
>>>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>>>> -{
>>>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>>>> -}
>>>>>>>> -
>>>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>>>> -{
>>>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>>>> -}
>>>>>>>> -
>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>>>   {
>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>> -    int i;
>>>>>>>> -    int j = 0;
>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>> +    BusChild *kid;
>>>>>>>> +    int i = 0;
>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>       if (!s) {
>>>>>>>>           return NULL;
>>>>>>>>       }
>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>> -
>>>>>>>> -        if (pbdev->fh == 0) {
>>>>>>>> -            continue;
>>>>>>>> -        }
>>>>>>>> -
>>>>>>>> -        if (j == idx) {
>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>> +        if (i == idx) {
>>>>>>>>               return pbdev;
>>>>>>>>           }
>>>>>>>> -        j++;
>>>>>>>> +        i++;
>>>>>>>>       }
>>>>>>>>       return NULL;
>>>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>>>> lookups: more code to achieve the same effect?
>>>>>> Thank you Michael.
>>>>>> I do the change due to two reasons:
>>>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>>>> does not work.
>>>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>>>> s390 specific information, so we can attach all the zpci devices to a
>>>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>>>> so the order of them does not matter.
>>>>> But you make this order guest-visible which seems wrong.
>>>>>
>>>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>>>> devices, and will
>>>> create a root s390 pci bus for each device.  So the order has no relation
>>>> with the pci
>>>> topology on guest.
>>>>
>>>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>>>> will use a
>>>> resume token to get all the zpci devices. For example, first time we return
>>>> 32 zpci
>>>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>>>> token
>>>> is used to store the beginning of zpci devices that will be returned to
>>>> guest at next time.
>>>>
>>>> So, if we change the order of the zpci device on s390 facility bus, it may
>>>> change the
>>>> "batch" in which this device be returned to guest. But this will not change
>>>> the  pci
>>>> topology on guest.
>>> Yes but that's still guest visible, and will break
>>> for example if guest is migrated between qemu instances
>>> where list order is different precisely when
>>> it's enumerating the bus.
>>>
>> Yes, and the list order is not the only s390 specific information that
>> exposed to
>> guest. Besides that,  we need to migrate all other zpci information. For
>> now,
>> we have no plan to support zpci migration yet.
> BTW how will hotplug work? If it happens while guest
> enumerates the bus the naturally all index values
> become invalid.

The list zpci only happen when the guest doing pci_base_init() for s390.
At that moment,  hotplug does not work yet. And assume we have
that case, we still have the index issue even when scan standard pci
bus. Please see my following words.

>
> Just don't expose internal qdev data structures to guest.
> It's not by chance that we don't have a look up by index
> capability, it's an attempt to enfoce sane usage.
> You are misusing the API with your hack.

The resume token of list zpci is indeed an index of iteration:(

>
> PCI has standard ways to enumerate the bus, maybe you
> should emulate it.  Or find some other way that works.
> The idea to poke at s->fbus->qbus and count things there
> is a bad one.
>
I can define multiple zpci buses, and attach zpci device to a slot of a 
root bus.
Then I need to add a api to the common pci code to do the scan of all the
pci host bridges. And in this way, it still has the index issue. I need 
to scan
from the first bus to count the index. So any suggestion from you?
Michael S. Tsirkin July 1, 2015, 10:36 a.m. UTC | #9
On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
> 
> 
> On 7/1/2015 17:22, Michael S. Tsirkin wrote:
> >On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
> >>
> >>On 7/1/2015 16:05, Michael S. Tsirkin wrote:
> >>>On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
> >>>>On 7/1/2015 14:22, Michael S. Tsirkin wrote:
> >>>>>On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
> >>>>>>On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> >>>>>>>On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> >>>>>>>>This patch introduce a new facility(and bus)
> >>>>>>>>to hold devices representing information actually
> >>>>>>>>provided by s390 firmware and I/O configuration.
> >>>>>>>>usage example:
> >>>>>>>>-device s390-pcihost
> >>>>>>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>>>>>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >>>>>>>>
> >>>>>>>>The first line will create a s390 pci host bridge
> >>>>>>>>and init the root bus. The second line will create
> >>>>>>>>a standard vfio pci device, and attach it to the
> >>>>>>>>root bus. These are similiar to the standard process
> >>>>>>>>to define a pci device on other platform.
> >>>>>>>>
> >>>>>>>>The third line will create a s390 pci device to
> >>>>>>>>store s390 specific information, and references
> >>>>>>>>the corresponding vfio pci device via device id.
> >>>>>>>>We create a s390 pci facility bus to hold all the
> >>>>>>>>zpci devices.
> >>>>>>>>
> >>>>>>>>Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> >>>>>>>It's mostly up to s390 maintainers, but I'd like to note
> >>>>>>>one thing below
> >>>>>>>
> >>>>>>>>---
> >>>>>>>>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
> >>>>>>>>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
> >>>>>>>>  hw/s390x/s390-pci-inst.c   |   4 +-
> >>>>>>>>  hw/s390x/s390-virtio-ccw.c |   5 +-
> >>>>>>>>  4 files changed, 283 insertions(+), 88 deletions(-)
> >>>>>>>>
> >>>>>>>>diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> >>>>>>>>index 560b66a..d5e7b2e 100644
> >>>>>>>>--- a/hw/s390x/s390-pci-bus.c
> >>>>>>>>+++ b/hw/s390x/s390-pci-bus.c
> >>>>>>>>@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>      PciCcdfErr *eccdf;
> >>>>>>>>      int rc = 1;
> >>>>>>>>      SeiContainer *sei_cont;
> >>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>      if (!s) {
> >>>>>>>>          return rc;
> >>>>>>>>@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>  int chsc_sei_nt2_have_event(void)
> >>>>>>>>  {
> >>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>      if (!s) {
> >>>>>>>>          return 0;
> >>>>>>>>@@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
> >>>>>>>>      return !QTAILQ_EMPTY(&s->pending_sei);
> >>>>>>>>  }
> >>>>>>>>+void s390_pci_device_enable(S390PCIBusDevice *zpci)
> >>>>>>>>+{
> >>>>>>>>+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> >>>>>>>>+}
> >>>>>>>>+
> >>>>>>>>+void s390_pci_device_disable(S390PCIBusDevice *zpci)
> >>>>>>>>+{
> >>>>>>>>+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>>>>>>>+    if (zpci->is_unplugged)
> >>>>>>>>+        object_unparent(OBJECT(zpci));
> >>>>>>>>+}
> >>>>>>>>+
> >>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
> >>>>>>>>  {
> >>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>-    int i;
> >>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>+    BusChild *kid;
> >>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>      if (!s) {
> >>>>>>>>          return NULL;
> >>>>>>>>      }
> >>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> >>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>+        if (pbdev->fid == fid) {
> >>>>>>>>              return pbdev;
> >>>>>>>>          }
> >>>>>>>>      }
> >>>>>>>>@@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
> >>>>>>>>      return;
> >>>>>>>>  }
> >>>>>>>>-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> >>>>>>>>-{
> >>>>>>>>-    return PCI_SLOT(pdev->devfn);
> >>>>>>>>-}
> >>>>>>>>-
> >>>>>>>>-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> >>>>>>>>-{
> >>>>>>>>-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> >>>>>>>>-}
> >>>>>>>>-
> >>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>>>>>>>  {
> >>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>-    int i;
> >>>>>>>>-    int j = 0;
> >>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>+    BusChild *kid;
> >>>>>>>>+    int i = 0;
> >>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>      if (!s) {
> >>>>>>>>          return NULL;
> >>>>>>>>      }
> >>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>-
> >>>>>>>>-        if (pbdev->fh == 0) {
> >>>>>>>>-            continue;
> >>>>>>>>-        }
> >>>>>>>>-
> >>>>>>>>-        if (j == idx) {
> >>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>+        if (i == idx) {
> >>>>>>>>              return pbdev;
> >>>>>>>>          }
> >>>>>>>>-        j++;
> >>>>>>>>+        i++;
> >>>>>>>>      }
> >>>>>>>>      return NULL;
> >>>>>>>This relies on the order of children on the qbus, that's wrong I think.
> >>>>>>>Generally I'm not sure why do you convert all slot lookups to child
> >>>>>>>lookups: more code to achieve the same effect?
> >>>>>>Thank you Michael.
> >>>>>>I do the change due to two reasons:
> >>>>>>1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
> >>>>>>slots at most. So when it comes to multiple s390 pci root buses, the old code
> >>>>>>does not work.
> >>>>>>2. Now the zpci device "S390PCIBusDevice" is only a structure to store
> >>>>>>s390 specific information, so we can attach all the zpci devices to a
> >>>>>>s390 pci facility bus. Since these zpci device has no relation with the "slot",
> >>>>>>so the order of them does not matter.
> >>>>>But you make this order guest-visible which seems wrong.
> >>>>>
> >>>>The guest uses a s390 specific "list pci" instruction to get all the zpci
> >>>>devices, and will
> >>>>create a root s390 pci bus for each device.  So the order has no relation
> >>>>with the pci
> >>>>topology on guest.
> >>>>
> >>>>If we assign  too many zpci devices to one guest, the "list pci" instruction
> >>>>will use a
> >>>>resume token to get all the zpci devices. For example, first time we return
> >>>>32 zpci
> >>>>devices to guest. Next time we'll return another 32 zpci devices. The resume
> >>>>token
> >>>>is used to store the beginning of zpci devices that will be returned to
> >>>>guest at next time.
> >>>>
> >>>>So, if we change the order of the zpci device on s390 facility bus, it may
> >>>>change the
> >>>>"batch" in which this device be returned to guest. But this will not change
> >>>>the  pci
> >>>>topology on guest.
> >>>Yes but that's still guest visible, and will break
> >>>for example if guest is migrated between qemu instances
> >>>where list order is different precisely when
> >>>it's enumerating the bus.
> >>>
> >>Yes, and the list order is not the only s390 specific information that
> >>exposed to
> >>guest. Besides that,  we need to migrate all other zpci information. For
> >>now,
> >>we have no plan to support zpci migration yet.
> >BTW how will hotplug work? If it happens while guest
> >enumerates the bus the naturally all index values
> >become invalid.
> 
> The list zpci only happen when the guest doing pci_base_init() for s390.
> At that moment,  hotplug does not work yet.

You can't prevent this: user can request hotplug at this time.

> And assume we have
> that case, we still have the index issue even when scan standard pci
> bus. Please see my following words.
> 
> >
> >Just don't expose internal qdev data structures to guest.
> >It's not by chance that we don't have a look up by index
> >capability, it's an attempt to enfoce sane usage.
> >You are misusing the API with your hack.
> 
> The resume token of list zpci is indeed an index of iteration:(
> 
> >
> >PCI has standard ways to enumerate the bus, maybe you
> >should emulate it.  Or find some other way that works.
> >The idea to poke at s->fbus->qbus and count things there
> >is a bad one.
> >
> I can define multiple zpci buses, and attach zpci device to a slot of a root
> bus.
> Then I need to add a api to the common pci code to do the scan of all the
> pci host bridges. And in this way, it still has the index issue. I need to
> scan
> from the first bus to count the index. So any suggestion from you?
> 

OK, I looked at arch/s390/pci/pci.c.
First of all, it seems to run the regular PCI thing on bridges.

        zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
                                      zdev, &resources);

so to me, it looks like there's no need to expose
non-root buses through special means.

What to do for root buses is a different question but again,
you definitely do not want to rely on the order of things
on that linked list.
The simplest thing is to ask user to give them unique
numbers, or find some stable way to sort them that
does not rely on order of initialization (e.g. device IDs?).

But again, this only works ok for root buses.
Hong Bo Li July 1, 2015, 11:11 a.m. UTC | #10
On 7/1/2015 18:36, Michael S. Tsirkin wrote:
> On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 17:22, Michael S. Tsirkin wrote:
>>> On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
>>>> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
>>>>> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>>>>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>>>>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>>>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>>>>>> This patch introduce a new facility(and bus)
>>>>>>>>>> to hold devices representing information actually
>>>>>>>>>> provided by s390 firmware and I/O configuration.
>>>>>>>>>> usage example:
>>>>>>>>>> -device s390-pcihost
>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>>>>>
>>>>>>>>>> The first line will create a s390 pci host bridge
>>>>>>>>>> and init the root bus. The second line will create
>>>>>>>>>> a standard vfio pci device, and attach it to the
>>>>>>>>>> root bus. These are similiar to the standard process
>>>>>>>>>> to define a pci device on other platform.
>>>>>>>>>>
>>>>>>>>>> The third line will create a s390 pci device to
>>>>>>>>>> store s390 specific information, and references
>>>>>>>>>> the corresponding vfio pci device via device id.
>>>>>>>>>> We create a s390 pci facility bus to hold all the
>>>>>>>>>> zpci devices.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>>>>>> one thing below
>>>>>>>>>
>>>>>>>>>> ---
>>>>>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>>>>>> index 560b66a..d5e7b2e 100644
>>>>>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>       PciCcdfErr *eccdf;
>>>>>>>>>>       int rc = 1;
>>>>>>>>>>       SeiContainer *sei_cont;
>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>       if (!s) {
>>>>>>>>>>           return rc;
>>>>>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>>>>>   {
>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>       if (!s) {
>>>>>>>>>>           return 0;
>>>>>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>>>>>   }
>>>>>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>>>>>> +{
>>>>>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>>>>>> +{
>>>>>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>>>>>> +    if (zpci->is_unplugged)
>>>>>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>>>>>   {
>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>> -    int i;
>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>       if (!s) {
>>>>>>>>>>           return NULL;
>>>>>>>>>>       }
>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>> +        if (pbdev->fid == fid) {
>>>>>>>>>>               return pbdev;
>>>>>>>>>>           }
>>>>>>>>>>       }
>>>>>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>>>>>       return;
>>>>>>>>>>   }
>>>>>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>>>>>> -{
>>>>>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>>>>>> -}
>>>>>>>>>> -
>>>>>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>>>>>> -{
>>>>>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>>>>>> -}
>>>>>>>>>> -
>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>>>>>   {
>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>> -    int i;
>>>>>>>>>> -    int j = 0;
>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>> +    int i = 0;
>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>       if (!s) {
>>>>>>>>>>           return NULL;
>>>>>>>>>>       }
>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>> -
>>>>>>>>>> -        if (pbdev->fh == 0) {
>>>>>>>>>> -            continue;
>>>>>>>>>> -        }
>>>>>>>>>> -
>>>>>>>>>> -        if (j == idx) {
>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>> +        if (i == idx) {
>>>>>>>>>>               return pbdev;
>>>>>>>>>>           }
>>>>>>>>>> -        j++;
>>>>>>>>>> +        i++;
>>>>>>>>>>       }
>>>>>>>>>>       return NULL;
>>>>>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>>>>>> lookups: more code to achieve the same effect?
>>>>>>>> Thank you Michael.
>>>>>>>> I do the change due to two reasons:
>>>>>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>>>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>>>>>> does not work.
>>>>>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>>>>>> s390 specific information, so we can attach all the zpci devices to a
>>>>>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>>>>>> so the order of them does not matter.
>>>>>>> But you make this order guest-visible which seems wrong.
>>>>>>>
>>>>>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>>>>>> devices, and will
>>>>>> create a root s390 pci bus for each device.  So the order has no relation
>>>>>> with the pci
>>>>>> topology on guest.
>>>>>>
>>>>>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>>>>>> will use a
>>>>>> resume token to get all the zpci devices. For example, first time we return
>>>>>> 32 zpci
>>>>>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>>>>>> token
>>>>>> is used to store the beginning of zpci devices that will be returned to
>>>>>> guest at next time.
>>>>>>
>>>>>> So, if we change the order of the zpci device on s390 facility bus, it may
>>>>>> change the
>>>>>> "batch" in which this device be returned to guest. But this will not change
>>>>>> the  pci
>>>>>> topology on guest.
>>>>> Yes but that's still guest visible, and will break
>>>>> for example if guest is migrated between qemu instances
>>>>> where list order is different precisely when
>>>>> it's enumerating the bus.
>>>>>
>>>> Yes, and the list order is not the only s390 specific information that
>>>> exposed to
>>>> guest. Besides that,  we need to migrate all other zpci information. For
>>>> now,
>>>> we have no plan to support zpci migration yet.
>>> BTW how will hotplug work? If it happens while guest
>>> enumerates the bus the naturally all index values
>>> become invalid.
>> The list zpci only happen when the guest doing pci_base_init() for s390.
>> At that moment,  hotplug does not work yet.
> You can't prevent this: user can request hotplug at this time.
>
>> And assume we have
>> that case, we still have the index issue even when scan standard pci
>> bus. Please see my following words.
>>
>>> Just don't expose internal qdev data structures to guest.
>>> It's not by chance that we don't have a look up by index
>>> capability, it's an attempt to enfoce sane usage.
>>> You are misusing the API with your hack.
>> The resume token of list zpci is indeed an index of iteration:(
>>
>>> PCI has standard ways to enumerate the bus, maybe you
>>> should emulate it.  Or find some other way that works.
>>> The idea to poke at s->fbus->qbus and count things there
>>> is a bad one.
>>>
>> I can define multiple zpci buses, and attach zpci device to a slot of a root
>> bus.
>> Then I need to add a api to the common pci code to do the scan of all the
>> pci host bridges. And in this way, it still has the index issue. I need to
>> scan
>> from the first bus to count the index. So any suggestion from you?
>>
> OK, I looked at arch/s390/pci/pci.c.
> First of all, it seems to run the regular PCI thing on bridges.
>
>          zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>                                        zdev, &resources);

At this moment, the guest has got all the zpci devices through clp list zpci
instruction. For each device, in the pci_scan_root_bus(), it will create
a root bus. So for s390, we get pci devices first, then create a new root bus
for it.


>
> so to me, it looks like there's no need to expose
> non-root buses through special means.
>
> What to do for root buses is a different question but again,
> you definitely do not want to rely on the order of things
> on that linked list.
> The simplest thing is to ask user to give them unique
> numbers, or find some stable way to sort them that
> does not rely on order of initialization (e.g. device IDs?).
>
> But again, this only works ok for root buses.
>
Basically, it does not exposed the buses to guest, it exposed an index
to guest.
Here is the process to get all the zpci device for a guest.
For example: we have 10 zpci devices, and the batch size for list zpci
instruction is 4.
First, qemu will return devices 0-3, index of list zpci is 0
Second, qemu will return device 4-7, index of list zpci is 4
Third, qemu will return device 8-9, index of list zpci is 8
We have device id, but list zpci does not use that as a flag to get
next batch, it use an index instead.
This process is defined by s390 arch, we can't change it.
So no matter how we organize zpci devices in qemu, slot or link list.
We could not get rid of the index issue.

How about I add a flag to identify whether the link list
is valid or not. When a hotplug/unplug event occurred, I will
reset the index, and make the guest refetch the zpci devices
from the beginning.
Michael S. Tsirkin July 1, 2015, 11:23 a.m. UTC | #11
On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
> 
> 
> On 7/1/2015 18:36, Michael S. Tsirkin wrote:
> >On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
> >>
> >>On 7/1/2015 17:22, Michael S. Tsirkin wrote:
> >>>On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
> >>>>On 7/1/2015 16:05, Michael S. Tsirkin wrote:
> >>>>>On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
> >>>>>>On 7/1/2015 14:22, Michael S. Tsirkin wrote:
> >>>>>>>On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
> >>>>>>>>On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> >>>>>>>>>On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>This patch introduce a new facility(and bus)
> >>>>>>>>>>to hold devices representing information actually
> >>>>>>>>>>provided by s390 firmware and I/O configuration.
> >>>>>>>>>>usage example:
> >>>>>>>>>>-device s390-pcihost
> >>>>>>>>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>>>>>>>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >>>>>>>>>>
> >>>>>>>>>>The first line will create a s390 pci host bridge
> >>>>>>>>>>and init the root bus. The second line will create
> >>>>>>>>>>a standard vfio pci device, and attach it to the
> >>>>>>>>>>root bus. These are similiar to the standard process
> >>>>>>>>>>to define a pci device on other platform.
> >>>>>>>>>>
> >>>>>>>>>>The third line will create a s390 pci device to
> >>>>>>>>>>store s390 specific information, and references
> >>>>>>>>>>the corresponding vfio pci device via device id.
> >>>>>>>>>>We create a s390 pci facility bus to hold all the
> >>>>>>>>>>zpci devices.
> >>>>>>>>>>
> >>>>>>>>>>Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> >>>>>>>>>It's mostly up to s390 maintainers, but I'd like to note
> >>>>>>>>>one thing below
> >>>>>>>>>
> >>>>>>>>>>---
> >>>>>>>>>>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
> >>>>>>>>>>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
> >>>>>>>>>>  hw/s390x/s390-pci-inst.c   |   4 +-
> >>>>>>>>>>  hw/s390x/s390-virtio-ccw.c |   5 +-
> >>>>>>>>>>  4 files changed, 283 insertions(+), 88 deletions(-)
> >>>>>>>>>>
> >>>>>>>>>>diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>index 560b66a..d5e7b2e 100644
> >>>>>>>>>>--- a/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>+++ b/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>>>      PciCcdfErr *eccdf;
> >>>>>>>>>>      int rc = 1;
> >>>>>>>>>>      SeiContainer *sei_cont;
> >>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>      if (!s) {
> >>>>>>>>>>          return rc;
> >>>>>>>>>>@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>>>  int chsc_sei_nt2_have_event(void)
> >>>>>>>>>>  {
> >>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>      if (!s) {
> >>>>>>>>>>          return 0;
> >>>>>>>>>>@@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
> >>>>>>>>>>      return !QTAILQ_EMPTY(&s->pending_sei);
> >>>>>>>>>>  }
> >>>>>>>>>>+void s390_pci_device_enable(S390PCIBusDevice *zpci)
> >>>>>>>>>>+{
> >>>>>>>>>>+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> >>>>>>>>>>+}
> >>>>>>>>>>+
> >>>>>>>>>>+void s390_pci_device_disable(S390PCIBusDevice *zpci)
> >>>>>>>>>>+{
> >>>>>>>>>>+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>>>>>>>>>+    if (zpci->is_unplugged)
> >>>>>>>>>>+        object_unparent(OBJECT(zpci));
> >>>>>>>>>>+}
> >>>>>>>>>>+
> >>>>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
> >>>>>>>>>>  {
> >>>>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>>>-    int i;
> >>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>+    BusChild *kid;
> >>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>      if (!s) {
> >>>>>>>>>>          return NULL;
> >>>>>>>>>>      }
> >>>>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>>>-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> >>>>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>>>+        if (pbdev->fid == fid) {
> >>>>>>>>>>              return pbdev;
> >>>>>>>>>>          }
> >>>>>>>>>>      }
> >>>>>>>>>>@@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
> >>>>>>>>>>      return;
> >>>>>>>>>>  }
> >>>>>>>>>>-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> >>>>>>>>>>-{
> >>>>>>>>>>-    return PCI_SLOT(pdev->devfn);
> >>>>>>>>>>-}
> >>>>>>>>>>-
> >>>>>>>>>>-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> >>>>>>>>>>-{
> >>>>>>>>>>-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> >>>>>>>>>>-}
> >>>>>>>>>>-
> >>>>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>>>>>>>>>  {
> >>>>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>>>-    int i;
> >>>>>>>>>>-    int j = 0;
> >>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>+    BusChild *kid;
> >>>>>>>>>>+    int i = 0;
> >>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>      if (!s) {
> >>>>>>>>>>          return NULL;
> >>>>>>>>>>      }
> >>>>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>>>-
> >>>>>>>>>>-        if (pbdev->fh == 0) {
> >>>>>>>>>>-            continue;
> >>>>>>>>>>-        }
> >>>>>>>>>>-
> >>>>>>>>>>-        if (j == idx) {
> >>>>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>>>+        if (i == idx) {
> >>>>>>>>>>              return pbdev;
> >>>>>>>>>>          }
> >>>>>>>>>>-        j++;
> >>>>>>>>>>+        i++;
> >>>>>>>>>>      }
> >>>>>>>>>>      return NULL;
> >>>>>>>>>This relies on the order of children on the qbus, that's wrong I think.
> >>>>>>>>>Generally I'm not sure why do you convert all slot lookups to child
> >>>>>>>>>lookups: more code to achieve the same effect?
> >>>>>>>>Thank you Michael.
> >>>>>>>>I do the change due to two reasons:
> >>>>>>>>1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
> >>>>>>>>slots at most. So when it comes to multiple s390 pci root buses, the old code
> >>>>>>>>does not work.
> >>>>>>>>2. Now the zpci device "S390PCIBusDevice" is only a structure to store
> >>>>>>>>s390 specific information, so we can attach all the zpci devices to a
> >>>>>>>>s390 pci facility bus. Since these zpci device has no relation with the "slot",
> >>>>>>>>so the order of them does not matter.
> >>>>>>>But you make this order guest-visible which seems wrong.
> >>>>>>>
> >>>>>>The guest uses a s390 specific "list pci" instruction to get all the zpci
> >>>>>>devices, and will
> >>>>>>create a root s390 pci bus for each device.  So the order has no relation
> >>>>>>with the pci
> >>>>>>topology on guest.
> >>>>>>
> >>>>>>If we assign  too many zpci devices to one guest, the "list pci" instruction
> >>>>>>will use a
> >>>>>>resume token to get all the zpci devices. For example, first time we return
> >>>>>>32 zpci
> >>>>>>devices to guest. Next time we'll return another 32 zpci devices. The resume
> >>>>>>token
> >>>>>>is used to store the beginning of zpci devices that will be returned to
> >>>>>>guest at next time.
> >>>>>>
> >>>>>>So, if we change the order of the zpci device on s390 facility bus, it may
> >>>>>>change the
> >>>>>>"batch" in which this device be returned to guest. But this will not change
> >>>>>>the  pci
> >>>>>>topology on guest.
> >>>>>Yes but that's still guest visible, and will break
> >>>>>for example if guest is migrated between qemu instances
> >>>>>where list order is different precisely when
> >>>>>it's enumerating the bus.
> >>>>>
> >>>>Yes, and the list order is not the only s390 specific information that
> >>>>exposed to
> >>>>guest. Besides that,  we need to migrate all other zpci information. For
> >>>>now,
> >>>>we have no plan to support zpci migration yet.
> >>>BTW how will hotplug work? If it happens while guest
> >>>enumerates the bus the naturally all index values
> >>>become invalid.
> >>The list zpci only happen when the guest doing pci_base_init() for s390.
> >>At that moment,  hotplug does not work yet.
> >You can't prevent this: user can request hotplug at this time.
> >
> >>And assume we have
> >>that case, we still have the index issue even when scan standard pci
> >>bus. Please see my following words.
> >>
> >>>Just don't expose internal qdev data structures to guest.
> >>>It's not by chance that we don't have a look up by index
> >>>capability, it's an attempt to enfoce sane usage.
> >>>You are misusing the API with your hack.
> >>The resume token of list zpci is indeed an index of iteration:(
> >>
> >>>PCI has standard ways to enumerate the bus, maybe you
> >>>should emulate it.  Or find some other way that works.
> >>>The idea to poke at s->fbus->qbus and count things there
> >>>is a bad one.
> >>>
> >>I can define multiple zpci buses, and attach zpci device to a slot of a root
> >>bus.
> >>Then I need to add a api to the common pci code to do the scan of all the
> >>pci host bridges. And in this way, it still has the index issue. I need to
> >>scan
> >>from the first bus to count the index. So any suggestion from you?
> >>
> >OK, I looked at arch/s390/pci/pci.c.
> >First of all, it seems to run the regular PCI thing on bridges.
> >
> >         zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
> >                                       zdev, &resources);
> 
> At this moment, the guest has got all the zpci devices through clp list zpci
> instruction. For each device, in the pci_scan_root_bus(), it will create
> a root bus. So for s390, we get pci devices first, then create a new root bus
> for it.

I don't see this in guest code.

I looked at pci_scan_root_bus and it's completely generic.
It sets up the bus:
        b = pci_create_root_bus(parent, bus, ops, sysdata, resources);

then it scans it:
        max = pci_scan_child_bus(b);


that one does
        /* Go find them, Rover! */
        for (devfn = 0; devfn < 0x100; devfn += 8)
                pci_scan_slot(bus, devfn);

next
        dev = pci_scan_single_device(bus, devfn);

and so on. Eventually you get
        if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
                return NULL;

and that one does the clp thing using zpci_cfg_load.




> 
> >
> >so to me, it looks like there's no need to expose
> >non-root buses through special means.
> >
> >What to do for root buses is a different question but again,
> >you definitely do not want to rely on the order of things
> >on that linked list.
> >The simplest thing is to ask user to give them unique
> >numbers, or find some stable way to sort them that
> >does not rely on order of initialization (e.g. device IDs?).
> >
> >But again, this only works ok for root buses.
> >
> Basically, it does not exposed the buses to guest, it exposed an index
> to guest.
> Here is the process to get all the zpci device for a guest.
> For example: we have 10 zpci devices, and the batch size for list zpci
> instruction is 4.
> First, qemu will return devices 0-3, index of list zpci is 0
> Second, qemu will return device 4-7, index of list zpci is 4
> Third, qemu will return device 8-9, index of list zpci is 8
> We have device id, but list zpci does not use that as a flag to get
> next batch, it use an index instead.
> This process is defined by s390 arch, we can't change it.
> So no matter how we organize zpci devices in qemu, slot or link list.
> We could not get rid of the index issue.
> 
> How about I add a flag to identify whether the link list
> is valid or not. When a hotplug/unplug event occurred, I will
> reset the index, and make the guest refetch the zpci devices
> from the beginning.
> 
> 
> 
> 

You should just use something stable for IDs.
And avoid doing it for anything that isn't a root or maybe a bridge
since it'll just cause everyone maintainance problems down the road.
Hong Bo Li July 1, 2015, 11:46 a.m. UTC | #12
On 7/1/2015 19:23, Michael S. Tsirkin wrote:
> On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 18:36, Michael S. Tsirkin wrote:
>>> On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
>>>> On 7/1/2015 17:22, Michael S. Tsirkin wrote:
>>>>> On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
>>>>>> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
>>>>>>> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>>>>>>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>>>>>>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>>>>>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>>>>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>> This patch introduce a new facility(and bus)
>>>>>>>>>>>> to hold devices representing information actually
>>>>>>>>>>>> provided by s390 firmware and I/O configuration.
>>>>>>>>>>>> usage example:
>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>>>>>>>
>>>>>>>>>>>> The first line will create a s390 pci host bridge
>>>>>>>>>>>> and init the root bus. The second line will create
>>>>>>>>>>>> a standard vfio pci device, and attach it to the
>>>>>>>>>>>> root bus. These are similiar to the standard process
>>>>>>>>>>>> to define a pci device on other platform.
>>>>>>>>>>>>
>>>>>>>>>>>> The third line will create a s390 pci device to
>>>>>>>>>>>> store s390 specific information, and references
>>>>>>>>>>>> the corresponding vfio pci device via device id.
>>>>>>>>>>>> We create a s390 pci facility bus to hold all the
>>>>>>>>>>>> zpci devices.
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>>>>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>>>>>>>> one thing below
>>>>>>>>>>>
>>>>>>>>>>>> ---
>>>>>>>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>> index 560b66a..d5e7b2e 100644
>>>>>>>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>       PciCcdfErr *eccdf;
>>>>>>>>>>>>       int rc = 1;
>>>>>>>>>>>>       SeiContainer *sei_cont;
>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>           return rc;
>>>>>>>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>   {
>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>           return 0;
>>>>>>>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>>>>>>>   }
>>>>>>>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>>>>>>>> +    if (zpci->is_unplugged)
>>>>>>>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>>>>>>>   {
>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>> -    int i;
>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>       }
>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>> +        if (pbdev->fid == fid) {
>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>           }
>>>>>>>>>>>>       }
>>>>>>>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>>>>>>>       return;
>>>>>>>>>>>>   }
>>>>>>>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>>>>>>>> -{
>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>>>>>>>> -}
>>>>>>>>>>>> -
>>>>>>>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>>>>>>>> -{
>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>>>>>>>> -}
>>>>>>>>>>>> -
>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>>>>>>>   {
>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>> -    int i;
>>>>>>>>>>>> -    int j = 0;
>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>> +    int i = 0;
>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>       }
>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>> -
>>>>>>>>>>>> -        if (pbdev->fh == 0) {
>>>>>>>>>>>> -            continue;
>>>>>>>>>>>> -        }
>>>>>>>>>>>> -
>>>>>>>>>>>> -        if (j == idx) {
>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>> +        if (i == idx) {
>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>           }
>>>>>>>>>>>> -        j++;
>>>>>>>>>>>> +        i++;
>>>>>>>>>>>>       }
>>>>>>>>>>>>       return NULL;
>>>>>>>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>>>>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>>>>>>>> lookups: more code to achieve the same effect?
>>>>>>>>>> Thank you Michael.
>>>>>>>>>> I do the change due to two reasons:
>>>>>>>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>>>>>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>>>>>>>> does not work.
>>>>>>>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>>>>>>>> s390 specific information, so we can attach all the zpci devices to a
>>>>>>>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>>>>>>>> so the order of them does not matter.
>>>>>>>>> But you make this order guest-visible which seems wrong.
>>>>>>>>>
>>>>>>>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>>>>>>>> devices, and will
>>>>>>>> create a root s390 pci bus for each device.  So the order has no relation
>>>>>>>> with the pci
>>>>>>>> topology on guest.
>>>>>>>>
>>>>>>>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>>>>>>>> will use a
>>>>>>>> resume token to get all the zpci devices. For example, first time we return
>>>>>>>> 32 zpci
>>>>>>>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>>>>>>>> token
>>>>>>>> is used to store the beginning of zpci devices that will be returned to
>>>>>>>> guest at next time.
>>>>>>>>
>>>>>>>> So, if we change the order of the zpci device on s390 facility bus, it may
>>>>>>>> change the
>>>>>>>> "batch" in which this device be returned to guest. But this will not change
>>>>>>>> the  pci
>>>>>>>> topology on guest.
>>>>>>> Yes but that's still guest visible, and will break
>>>>>>> for example if guest is migrated between qemu instances
>>>>>>> where list order is different precisely when
>>>>>>> it's enumerating the bus.
>>>>>>>
>>>>>> Yes, and the list order is not the only s390 specific information that
>>>>>> exposed to
>>>>>> guest. Besides that,  we need to migrate all other zpci information. For
>>>>>> now,
>>>>>> we have no plan to support zpci migration yet.
>>>>> BTW how will hotplug work? If it happens while guest
>>>>> enumerates the bus the naturally all index values
>>>>> become invalid.
>>>> The list zpci only happen when the guest doing pci_base_init() for s390.
>>>> At that moment,  hotplug does not work yet.
>>> You can't prevent this: user can request hotplug at this time.
>>>
>>>> And assume we have
>>>> that case, we still have the index issue even when scan standard pci
>>>> bus. Please see my following words.
>>>>
>>>>> Just don't expose internal qdev data structures to guest.
>>>>> It's not by chance that we don't have a look up by index
>>>>> capability, it's an attempt to enfoce sane usage.
>>>>> You are misusing the API with your hack.
>>>> The resume token of list zpci is indeed an index of iteration:(
>>>>
>>>>> PCI has standard ways to enumerate the bus, maybe you
>>>>> should emulate it.  Or find some other way that works.
>>>>> The idea to poke at s->fbus->qbus and count things there
>>>>> is a bad one.
>>>>>
>>>> I can define multiple zpci buses, and attach zpci device to a slot of a root
>>>> bus.
>>>> Then I need to add a api to the common pci code to do the scan of all the
>>>> pci host bridges. And in this way, it still has the index issue. I need to
>>>> scan
>>> >from the first bus to count the index. So any suggestion from you?
>>> OK, I looked at arch/s390/pci/pci.c.
>>> First of all, it seems to run the regular PCI thing on bridges.
>>>
>>>          zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>                                        zdev, &resources);
>> At this moment, the guest has got all the zpci devices through clp list zpci
>> instruction. For each device, in the pci_scan_root_bus(), it will create
>> a root bus. So for s390, we get pci devices first, then create a new root bus
>> for it.
> I don't see this in guest code.
>
> I looked at pci_scan_root_bus and it's completely generic.
> It sets up the bus:
>          b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
>
> then it scans it:
>          max = pci_scan_child_bus(b);
>
>
> that one does
>          /* Go find them, Rover! */
>          for (devfn = 0; devfn < 0x100; devfn += 8)
>                  pci_scan_slot(bus, devfn);
>
> next
>          dev = pci_scan_single_device(bus, devfn);
>
> and so on. Eventually you get
>          if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
>                  return NULL;
>
> and that one does the clp thing using zpci_cfg_load.
>
pci_base_init()-> clp_scan_pci_devices():
     rc = clp_list_pci(rrb, __clp_add);
In this function, there is a while loop to get all the zpci devices by 
means of
resume token(index). And for each device,
     __clp_add()-> clp_add_pci_device();
In clp_add_pci_device(), we use the zpci information to create a struct 
zpci_dev zdev.
Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
     zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
                       zdev, &resources);
So, you see, each zdev has its own root bus. And there is no child bus 
under that root bus.
>
>
>>> so to me, it looks like there's no need to expose
>>> non-root buses through special means.
>>>
>>> What to do for root buses is a different question but again,
>>> you definitely do not want to rely on the order of things
>>> on that linked list.
>>> The simplest thing is to ask user to give them unique
>>> numbers, or find some stable way to sort them that
>>> does not rely on order of initialization (e.g. device IDs?).
>>>
>>> But again, this only works ok for root buses.
>>>
>> Basically, it does not exposed the buses to guest, it exposed an index
>> to guest.
>> Here is the process to get all the zpci device for a guest.
>> For example: we have 10 zpci devices, and the batch size for list zpci
>> instruction is 4.
>> First, qemu will return devices 0-3, index of list zpci is 0
>> Second, qemu will return device 4-7, index of list zpci is 4
>> Third, qemu will return device 8-9, index of list zpci is 8
>> We have device id, but list zpci does not use that as a flag to get
>> next batch, it use an index instead.
>> This process is defined by s390 arch, we can't change it.
>> So no matter how we organize zpci devices in qemu, slot or link list.
>> We could not get rid of the index issue.
>>
>> How about I add a flag to identify whether the link list
>> is valid or not. When a hotplug/unplug event occurred, I will
>> reset the index, and make the guest refetch the zpci devices
>> from the beginning.
>>
>>
>>
>>
> You should just use something stable for IDs.
> And avoid doing it for anything that isn't a root or maybe a bridge
> since it'll just cause everyone maintainance problems down the road.
>
The list zpci instruction is defined by arch, not a software thing, I 
could not
change it to use a ID instead...
Michael S. Tsirkin July 1, 2015, 11:57 a.m. UTC | #13
On Wed, Jul 01, 2015 at 07:46:01PM +0800, Hong Bo Li wrote:
> 
> 
> On 7/1/2015 19:23, Michael S. Tsirkin wrote:
> >On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
> >>
> >>On 7/1/2015 18:36, Michael S. Tsirkin wrote:
> >>>On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
> >>>>On 7/1/2015 17:22, Michael S. Tsirkin wrote:
> >>>>>On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
> >>>>>>On 7/1/2015 16:05, Michael S. Tsirkin wrote:
> >>>>>>>On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
> >>>>>>>>On 7/1/2015 14:22, Michael S. Tsirkin wrote:
> >>>>>>>>>On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> >>>>>>>>>>>On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>>>This patch introduce a new facility(and bus)
> >>>>>>>>>>>>to hold devices representing information actually
> >>>>>>>>>>>>provided by s390 firmware and I/O configuration.
> >>>>>>>>>>>>usage example:
> >>>>>>>>>>>>-device s390-pcihost
> >>>>>>>>>>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>>>>>>>>>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >>>>>>>>>>>>
> >>>>>>>>>>>>The first line will create a s390 pci host bridge
> >>>>>>>>>>>>and init the root bus. The second line will create
> >>>>>>>>>>>>a standard vfio pci device, and attach it to the
> >>>>>>>>>>>>root bus. These are similiar to the standard process
> >>>>>>>>>>>>to define a pci device on other platform.
> >>>>>>>>>>>>
> >>>>>>>>>>>>The third line will create a s390 pci device to
> >>>>>>>>>>>>store s390 specific information, and references
> >>>>>>>>>>>>the corresponding vfio pci device via device id.
> >>>>>>>>>>>>We create a s390 pci facility bus to hold all the
> >>>>>>>>>>>>zpci devices.
> >>>>>>>>>>>>
> >>>>>>>>>>>>Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> >>>>>>>>>>>It's mostly up to s390 maintainers, but I'd like to note
> >>>>>>>>>>>one thing below
> >>>>>>>>>>>
> >>>>>>>>>>>>---
> >>>>>>>>>>>>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
> >>>>>>>>>>>>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
> >>>>>>>>>>>>  hw/s390x/s390-pci-inst.c   |   4 +-
> >>>>>>>>>>>>  hw/s390x/s390-virtio-ccw.c |   5 +-
> >>>>>>>>>>>>  4 files changed, 283 insertions(+), 88 deletions(-)
> >>>>>>>>>>>>
> >>>>>>>>>>>>diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>index 560b66a..d5e7b2e 100644
> >>>>>>>>>>>>--- a/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>+++ b/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>>>>>      PciCcdfErr *eccdf;
> >>>>>>>>>>>>      int rc = 1;
> >>>>>>>>>>>>      SeiContainer *sei_cont;
> >>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>          return rc;
> >>>>>>>>>>>>@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>>>>>  int chsc_sei_nt2_have_event(void)
> >>>>>>>>>>>>  {
> >>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>          return 0;
> >>>>>>>>>>>>@@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
> >>>>>>>>>>>>      return !QTAILQ_EMPTY(&s->pending_sei);
> >>>>>>>>>>>>  }
> >>>>>>>>>>>>+void s390_pci_device_enable(S390PCIBusDevice *zpci)
> >>>>>>>>>>>>+{
> >>>>>>>>>>>>+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> >>>>>>>>>>>>+}
> >>>>>>>>>>>>+
> >>>>>>>>>>>>+void s390_pci_device_disable(S390PCIBusDevice *zpci)
> >>>>>>>>>>>>+{
> >>>>>>>>>>>>+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>>>>>>>>>>>+    if (zpci->is_unplugged)
> >>>>>>>>>>>>+        object_unparent(OBJECT(zpci));
> >>>>>>>>>>>>+}
> >>>>>>>>>>>>+
> >>>>>>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
> >>>>>>>>>>>>  {
> >>>>>>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>>>>>-    int i;
> >>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>+    BusChild *kid;
> >>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>          return NULL;
> >>>>>>>>>>>>      }
> >>>>>>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>>>>>-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> >>>>>>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>>>>>+        if (pbdev->fid == fid) {
> >>>>>>>>>>>>              return pbdev;
> >>>>>>>>>>>>          }
> >>>>>>>>>>>>      }
> >>>>>>>>>>>>@@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
> >>>>>>>>>>>>      return;
> >>>>>>>>>>>>  }
> >>>>>>>>>>>>-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> >>>>>>>>>>>>-{
> >>>>>>>>>>>>-    return PCI_SLOT(pdev->devfn);
> >>>>>>>>>>>>-}
> >>>>>>>>>>>>-
> >>>>>>>>>>>>-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> >>>>>>>>>>>>-{
> >>>>>>>>>>>>-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> >>>>>>>>>>>>-}
> >>>>>>>>>>>>-
> >>>>>>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>>>>>>>>>>>  {
> >>>>>>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>>>>>-    int i;
> >>>>>>>>>>>>-    int j = 0;
> >>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>+    BusChild *kid;
> >>>>>>>>>>>>+    int i = 0;
> >>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>          return NULL;
> >>>>>>>>>>>>      }
> >>>>>>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>>>>>-
> >>>>>>>>>>>>-        if (pbdev->fh == 0) {
> >>>>>>>>>>>>-            continue;
> >>>>>>>>>>>>-        }
> >>>>>>>>>>>>-
> >>>>>>>>>>>>-        if (j == idx) {
> >>>>>>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>>>>>+        if (i == idx) {
> >>>>>>>>>>>>              return pbdev;
> >>>>>>>>>>>>          }
> >>>>>>>>>>>>-        j++;
> >>>>>>>>>>>>+        i++;
> >>>>>>>>>>>>      }
> >>>>>>>>>>>>      return NULL;
> >>>>>>>>>>>This relies on the order of children on the qbus, that's wrong I think.
> >>>>>>>>>>>Generally I'm not sure why do you convert all slot lookups to child
> >>>>>>>>>>>lookups: more code to achieve the same effect?
> >>>>>>>>>>Thank you Michael.
> >>>>>>>>>>I do the change due to two reasons:
> >>>>>>>>>>1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
> >>>>>>>>>>slots at most. So when it comes to multiple s390 pci root buses, the old code
> >>>>>>>>>>does not work.
> >>>>>>>>>>2. Now the zpci device "S390PCIBusDevice" is only a structure to store
> >>>>>>>>>>s390 specific information, so we can attach all the zpci devices to a
> >>>>>>>>>>s390 pci facility bus. Since these zpci device has no relation with the "slot",
> >>>>>>>>>>so the order of them does not matter.
> >>>>>>>>>But you make this order guest-visible which seems wrong.
> >>>>>>>>>
> >>>>>>>>The guest uses a s390 specific "list pci" instruction to get all the zpci
> >>>>>>>>devices, and will
> >>>>>>>>create a root s390 pci bus for each device.  So the order has no relation
> >>>>>>>>with the pci
> >>>>>>>>topology on guest.
> >>>>>>>>
> >>>>>>>>If we assign  too many zpci devices to one guest, the "list pci" instruction
> >>>>>>>>will use a
> >>>>>>>>resume token to get all the zpci devices. For example, first time we return
> >>>>>>>>32 zpci
> >>>>>>>>devices to guest. Next time we'll return another 32 zpci devices. The resume
> >>>>>>>>token
> >>>>>>>>is used to store the beginning of zpci devices that will be returned to
> >>>>>>>>guest at next time.
> >>>>>>>>
> >>>>>>>>So, if we change the order of the zpci device on s390 facility bus, it may
> >>>>>>>>change the
> >>>>>>>>"batch" in which this device be returned to guest. But this will not change
> >>>>>>>>the  pci
> >>>>>>>>topology on guest.
> >>>>>>>Yes but that's still guest visible, and will break
> >>>>>>>for example if guest is migrated between qemu instances
> >>>>>>>where list order is different precisely when
> >>>>>>>it's enumerating the bus.
> >>>>>>>
> >>>>>>Yes, and the list order is not the only s390 specific information that
> >>>>>>exposed to
> >>>>>>guest. Besides that,  we need to migrate all other zpci information. For
> >>>>>>now,
> >>>>>>we have no plan to support zpci migration yet.
> >>>>>BTW how will hotplug work? If it happens while guest
> >>>>>enumerates the bus the naturally all index values
> >>>>>become invalid.
> >>>>The list zpci only happen when the guest doing pci_base_init() for s390.
> >>>>At that moment,  hotplug does not work yet.
> >>>You can't prevent this: user can request hotplug at this time.
> >>>
> >>>>And assume we have
> >>>>that case, we still have the index issue even when scan standard pci
> >>>>bus. Please see my following words.
> >>>>
> >>>>>Just don't expose internal qdev data structures to guest.
> >>>>>It's not by chance that we don't have a look up by index
> >>>>>capability, it's an attempt to enfoce sane usage.
> >>>>>You are misusing the API with your hack.
> >>>>The resume token of list zpci is indeed an index of iteration:(
> >>>>
> >>>>>PCI has standard ways to enumerate the bus, maybe you
> >>>>>should emulate it.  Or find some other way that works.
> >>>>>The idea to poke at s->fbus->qbus and count things there
> >>>>>is a bad one.
> >>>>>
> >>>>I can define multiple zpci buses, and attach zpci device to a slot of a root
> >>>>bus.
> >>>>Then I need to add a api to the common pci code to do the scan of all the
> >>>>pci host bridges. And in this way, it still has the index issue. I need to
> >>>>scan
> >>>>from the first bus to count the index. So any suggestion from you?
> >>>OK, I looked at arch/s390/pci/pci.c.
> >>>First of all, it seems to run the regular PCI thing on bridges.
> >>>
> >>>         zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
> >>>                                       zdev, &resources);
> >>At this moment, the guest has got all the zpci devices through clp list zpci
> >>instruction. For each device, in the pci_scan_root_bus(), it will create
> >>a root bus. So for s390, we get pci devices first, then create a new root bus
> >>for it.
> >I don't see this in guest code.
> >
> >I looked at pci_scan_root_bus and it's completely generic.
> >It sets up the bus:
> >         b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
> >
> >then it scans it:
> >         max = pci_scan_child_bus(b);
> >
> >
> >that one does
> >         /* Go find them, Rover! */
> >         for (devfn = 0; devfn < 0x100; devfn += 8)
> >                 pci_scan_slot(bus, devfn);
> >
> >next
> >         dev = pci_scan_single_device(bus, devfn);
> >
> >and so on. Eventually you get
> >         if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
> >                 return NULL;
> >
> >and that one does the clp thing using zpci_cfg_load.
> >
> pci_base_init()-> clp_scan_pci_devices():
>     rc = clp_list_pci(rrb, __clp_add);
> In this function, there is a while loop to get all the zpci devices by means
> of
> resume token(index). And for each device,
>     __clp_add()-> clp_add_pci_device();
> In clp_add_pci_device(), we use the zpci information to create a struct
> zpci_dev zdev.
> Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
>     zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>                       zdev, &resources);
> So, you see, each zdev has its own root bus. And there is no child bus under
> that root bus.

Right - zdev *is* the root. But there are pci devices hanging off it.

So why not model it like this?

vfio should attach to zdev, zdev is the pci host.

Also, you can stick a pci to pci bridge under the root, and
everything will just work.





> >
> >
> >>>so to me, it looks like there's no need to expose
> >>>non-root buses through special means.
> >>>
> >>>What to do for root buses is a different question but again,
> >>>you definitely do not want to rely on the order of things
> >>>on that linked list.
> >>>The simplest thing is to ask user to give them unique
> >>>numbers, or find some stable way to sort them that
> >>>does not rely on order of initialization (e.g. device IDs?).
> >>>
> >>>But again, this only works ok for root buses.
> >>>
> >>Basically, it does not exposed the buses to guest, it exposed an index
> >>to guest.
> >>Here is the process to get all the zpci device for a guest.
> >>For example: we have 10 zpci devices, and the batch size for list zpci
> >>instruction is 4.
> >>First, qemu will return devices 0-3, index of list zpci is 0
> >>Second, qemu will return device 4-7, index of list zpci is 4
> >>Third, qemu will return device 8-9, index of list zpci is 8
> >>We have device id, but list zpci does not use that as a flag to get
> >>next batch, it use an index instead.
> >>This process is defined by s390 arch, we can't change it.
> >>So no matter how we organize zpci devices in qemu, slot or link list.
> >>We could not get rid of the index issue.
> >>
> >>How about I add a flag to identify whether the link list
> >>is valid or not. When a hotplug/unplug event occurred, I will
> >>reset the index, and make the guest refetch the zpci devices
> >>from the beginning.
> >>
> >>
> >>
> >>
> >You should just use something stable for IDs.
> >And avoid doing it for anything that isn't a root or maybe a bridge
> >since it'll just cause everyone maintainance problems down the road.
> >
> The list zpci instruction is defined by arch, not a software thing, I could
> not
> change it to use a ID instead...
Hong Bo Li July 1, 2015, 12:30 p.m. UTC | #14
On 7/1/2015 19:57, Michael S. Tsirkin wrote:
> On Wed, Jul 01, 2015 at 07:46:01PM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 19:23, Michael S. Tsirkin wrote:
>>> On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
>>>> On 7/1/2015 18:36, Michael S. Tsirkin wrote:
>>>>> On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
>>>>>> On 7/1/2015 17:22, Michael S. Tsirkin wrote:
>>>>>>> On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
>>>>>>>> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
>>>>>>>>> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>>>>>>>>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>>>>>>>>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>>>>>>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>> This patch introduce a new facility(and bus)
>>>>>>>>>>>>>> to hold devices representing information actually
>>>>>>>>>>>>>> provided by s390 firmware and I/O configuration.
>>>>>>>>>>>>>> usage example:
>>>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> The first line will create a s390 pci host bridge
>>>>>>>>>>>>>> and init the root bus. The second line will create
>>>>>>>>>>>>>> a standard vfio pci device, and attach it to the
>>>>>>>>>>>>>> root bus. These are similiar to the standard process
>>>>>>>>>>>>>> to define a pci device on other platform.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> The third line will create a s390 pci device to
>>>>>>>>>>>>>> store s390 specific information, and references
>>>>>>>>>>>>>> the corresponding vfio pci device via device id.
>>>>>>>>>>>>>> We create a s390 pci facility bus to hold all the
>>>>>>>>>>>>>> zpci devices.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>>>>>>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>>>>>>>>>> one thing below
>>>>>>>>>>>>>
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>>>>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>>>>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>>>>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>> index 560b66a..d5e7b2e 100644
>>>>>>>>>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>       PciCcdfErr *eccdf;
>>>>>>>>>>>>>>       int rc = 1;
>>>>>>>>>>>>>>       SeiContainer *sei_cont;
>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>           return rc;
>>>>>>>>>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>           return 0;
>>>>>>>>>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>>>>>>>>>> +    if (zpci->is_unplugged)
>>>>>>>>>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>> +        if (pbdev->fid == fid) {
>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>>>>>>>>>       return;
>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>> -    int j = 0;
>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>> +    int i = 0;
>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>> -        if (pbdev->fh == 0) {
>>>>>>>>>>>>>> -            continue;
>>>>>>>>>>>>>> -        }
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>> -        if (j == idx) {
>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>> +        if (i == idx) {
>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>> -        j++;
>>>>>>>>>>>>>> +        i++;
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>       return NULL;
>>>>>>>>>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>>>>>>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>>>>>>>>>> lookups: more code to achieve the same effect?
>>>>>>>>>>>> Thank you Michael.
>>>>>>>>>>>> I do the change due to two reasons:
>>>>>>>>>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>>>>>>>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>>>>>>>>>> does not work.
>>>>>>>>>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>>>>>>>>>> s390 specific information, so we can attach all the zpci devices to a
>>>>>>>>>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>>>>>>>>>> so the order of them does not matter.
>>>>>>>>>>> But you make this order guest-visible which seems wrong.
>>>>>>>>>>>
>>>>>>>>>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>>>>>>>>>> devices, and will
>>>>>>>>>> create a root s390 pci bus for each device.  So the order has no relation
>>>>>>>>>> with the pci
>>>>>>>>>> topology on guest.
>>>>>>>>>>
>>>>>>>>>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>>>>>>>>>> will use a
>>>>>>>>>> resume token to get all the zpci devices. For example, first time we return
>>>>>>>>>> 32 zpci
>>>>>>>>>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>>>>>>>>>> token
>>>>>>>>>> is used to store the beginning of zpci devices that will be returned to
>>>>>>>>>> guest at next time.
>>>>>>>>>>
>>>>>>>>>> So, if we change the order of the zpci device on s390 facility bus, it may
>>>>>>>>>> change the
>>>>>>>>>> "batch" in which this device be returned to guest. But this will not change
>>>>>>>>>> the  pci
>>>>>>>>>> topology on guest.
>>>>>>>>> Yes but that's still guest visible, and will break
>>>>>>>>> for example if guest is migrated between qemu instances
>>>>>>>>> where list order is different precisely when
>>>>>>>>> it's enumerating the bus.
>>>>>>>>>
>>>>>>>> Yes, and the list order is not the only s390 specific information that
>>>>>>>> exposed to
>>>>>>>> guest. Besides that,  we need to migrate all other zpci information. For
>>>>>>>> now,
>>>>>>>> we have no plan to support zpci migration yet.
>>>>>>> BTW how will hotplug work? If it happens while guest
>>>>>>> enumerates the bus the naturally all index values
>>>>>>> become invalid.
>>>>>> The list zpci only happen when the guest doing pci_base_init() for s390.
>>>>>> At that moment,  hotplug does not work yet.
>>>>> You can't prevent this: user can request hotplug at this time.
>>>>>
>>>>>> And assume we have
>>>>>> that case, we still have the index issue even when scan standard pci
>>>>>> bus. Please see my following words.
>>>>>>
>>>>>>> Just don't expose internal qdev data structures to guest.
>>>>>>> It's not by chance that we don't have a look up by index
>>>>>>> capability, it's an attempt to enfoce sane usage.
>>>>>>> You are misusing the API with your hack.
>>>>>> The resume token of list zpci is indeed an index of iteration:(
>>>>>>
>>>>>>> PCI has standard ways to enumerate the bus, maybe you
>>>>>>> should emulate it.  Or find some other way that works.
>>>>>>> The idea to poke at s->fbus->qbus and count things there
>>>>>>> is a bad one.
>>>>>>>
>>>>>> I can define multiple zpci buses, and attach zpci device to a slot of a root
>>>>>> bus.
>>>>>> Then I need to add a api to the common pci code to do the scan of all the
>>>>>> pci host bridges. And in this way, it still has the index issue. I need to
>>>>>> scan
>>>>> >from the first bus to count the index. So any suggestion from you?
>>>>> OK, I looked at arch/s390/pci/pci.c.
>>>>> First of all, it seems to run the regular PCI thing on bridges.
>>>>>
>>>>>          zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>>>                                        zdev, &resources);
>>>> At this moment, the guest has got all the zpci devices through clp list zpci
>>>> instruction. For each device, in the pci_scan_root_bus(), it will create
>>>> a root bus. So for s390, we get pci devices first, then create a new root bus
>>>> for it.
>>> I don't see this in guest code.
>>>
>>> I looked at pci_scan_root_bus and it's completely generic.
>>> It sets up the bus:
>>>          b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
>>>
>>> then it scans it:
>>>          max = pci_scan_child_bus(b);
>>>
>>>
>>> that one does
>>>          /* Go find them, Rover! */
>>>          for (devfn = 0; devfn < 0x100; devfn += 8)
>>>                  pci_scan_slot(bus, devfn);
>>>
>>> next
>>>          dev = pci_scan_single_device(bus, devfn);
>>>
>>> and so on. Eventually you get
>>>          if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
>>>                  return NULL;
>>>
>>> and that one does the clp thing using zpci_cfg_load.
>>>
>> pci_base_init()-> clp_scan_pci_devices():
>>      rc = clp_list_pci(rrb, __clp_add);
>> In this function, there is a while loop to get all the zpci devices by means
>> of
>> resume token(index). And for each device,
>>      __clp_add()-> clp_add_pci_device();
>> In clp_add_pci_device(), we use the zpci information to create a struct
>> zpci_dev zdev.
>> Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
>>      zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>                        zdev, &resources);
>> So, you see, each zdev has its own root bus. And there is no child bus under
>> that root bus.
> Right - zdev *is* the root. But there are pci devices hanging off it.
>
> So why not model it like this?
>
> vfio should attach to zdev, zdev is the pci host.
>
> Also, you can stick a pci to pci bridge under the root, and
> everything will just work.
>
You mean create a host bridge for each zpci device, like what the kernel do?
  It looks like Frank's option 2:
-device s390-pcihost,fid=16,uid=2216 #create a zdev
-device vfio-pci,host=0000:00:00.0,bus=pci.0
-device s390-pcihost,fid=17,uid=2217
-device vfio-pci,host=0001:00:00.0,bus=pci.1

Then scan all these host bridges to get all the zpci devices?

>
>
>
>>>
>>>>> so to me, it looks like there's no need to expose
>>>>> non-root buses through special means.
>>>>>
>>>>> What to do for root buses is a different question but again,
>>>>> you definitely do not want to rely on the order of things
>>>>> on that linked list.
>>>>> The simplest thing is to ask user to give them unique
>>>>> numbers, or find some stable way to sort them that
>>>>> does not rely on order of initialization (e.g. device IDs?).
>>>>>
>>>>> But again, this only works ok for root buses.
>>>>>
>>>> Basically, it does not exposed the buses to guest, it exposed an index
>>>> to guest.
>>>> Here is the process to get all the zpci device for a guest.
>>>> For example: we have 10 zpci devices, and the batch size for list zpci
>>>> instruction is 4.
>>>> First, qemu will return devices 0-3, index of list zpci is 0
>>>> Second, qemu will return device 4-7, index of list zpci is 4
>>>> Third, qemu will return device 8-9, index of list zpci is 8
>>>> We have device id, but list zpci does not use that as a flag to get
>>>> next batch, it use an index instead.
>>>> This process is defined by s390 arch, we can't change it.
>>>> So no matter how we organize zpci devices in qemu, slot or link list.
>>>> We could not get rid of the index issue.
>>>>
>>>> How about I add a flag to identify whether the link list
>>>> is valid or not. When a hotplug/unplug event occurred, I will
>>>> reset the index, and make the guest refetch the zpci devices
>>> >from the beginning.
>>>>
>>>>
>>>>
>>> You should just use something stable for IDs.
>>> And avoid doing it for anything that isn't a root or maybe a bridge
>>> since it'll just cause everyone maintainance problems down the road.
>>>
>> The list zpci instruction is defined by arch, not a software thing, I could
>> not
>> change it to use a ID instead...
Hong Bo Li July 1, 2015, 12:42 p.m. UTC | #15
On 7/1/2015 19:57, Michael S. Tsirkin wrote:
> On Wed, Jul 01, 2015 at 07:46:01PM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 19:23, Michael S. Tsirkin wrote:
>>> On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
>>>> On 7/1/2015 18:36, Michael S. Tsirkin wrote:
>>>>> On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
>>>>>> On 7/1/2015 17:22, Michael S. Tsirkin wrote:
>>>>>>> On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
>>>>>>>> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
>>>>>>>>> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>>>>>>>>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>>>>>>>>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>>>>>>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>> This patch introduce a new facility(and bus)
>>>>>>>>>>>>>> to hold devices representing information actually
>>>>>>>>>>>>>> provided by s390 firmware and I/O configuration.
>>>>>>>>>>>>>> usage example:
>>>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> The first line will create a s390 pci host bridge
>>>>>>>>>>>>>> and init the root bus. The second line will create
>>>>>>>>>>>>>> a standard vfio pci device, and attach it to the
>>>>>>>>>>>>>> root bus. These are similiar to the standard process
>>>>>>>>>>>>>> to define a pci device on other platform.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> The third line will create a s390 pci device to
>>>>>>>>>>>>>> store s390 specific information, and references
>>>>>>>>>>>>>> the corresponding vfio pci device via device id.
>>>>>>>>>>>>>> We create a s390 pci facility bus to hold all the
>>>>>>>>>>>>>> zpci devices.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>>>>>>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>>>>>>>>>> one thing below
>>>>>>>>>>>>>
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>>>>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>>>>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>>>>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>> index 560b66a..d5e7b2e 100644
>>>>>>>>>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>       PciCcdfErr *eccdf;
>>>>>>>>>>>>>>       int rc = 1;
>>>>>>>>>>>>>>       SeiContainer *sei_cont;
>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>           return rc;
>>>>>>>>>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>           return 0;
>>>>>>>>>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>>>>>>>>>> +    if (zpci->is_unplugged)
>>>>>>>>>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>> +        if (pbdev->fid == fid) {
>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>>>>>>>>>       return;
>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>> -    int j = 0;
>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>> +    int i = 0;
>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>> -        if (pbdev->fh == 0) {
>>>>>>>>>>>>>> -            continue;
>>>>>>>>>>>>>> -        }
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>> -        if (j == idx) {
>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>> +        if (i == idx) {
>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>> -        j++;
>>>>>>>>>>>>>> +        i++;
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>       return NULL;
>>>>>>>>>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>>>>>>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>>>>>>>>>> lookups: more code to achieve the same effect?
>>>>>>>>>>>> Thank you Michael.
>>>>>>>>>>>> I do the change due to two reasons:
>>>>>>>>>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>>>>>>>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>>>>>>>>>> does not work.
>>>>>>>>>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>>>>>>>>>> s390 specific information, so we can attach all the zpci devices to a
>>>>>>>>>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>>>>>>>>>> so the order of them does not matter.
>>>>>>>>>>> But you make this order guest-visible which seems wrong.
>>>>>>>>>>>
>>>>>>>>>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>>>>>>>>>> devices, and will
>>>>>>>>>> create a root s390 pci bus for each device.  So the order has no relation
>>>>>>>>>> with the pci
>>>>>>>>>> topology on guest.
>>>>>>>>>>
>>>>>>>>>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>>>>>>>>>> will use a
>>>>>>>>>> resume token to get all the zpci devices. For example, first time we return
>>>>>>>>>> 32 zpci
>>>>>>>>>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>>>>>>>>>> token
>>>>>>>>>> is used to store the beginning of zpci devices that will be returned to
>>>>>>>>>> guest at next time.
>>>>>>>>>>
>>>>>>>>>> So, if we change the order of the zpci device on s390 facility bus, it may
>>>>>>>>>> change the
>>>>>>>>>> "batch" in which this device be returned to guest. But this will not change
>>>>>>>>>> the  pci
>>>>>>>>>> topology on guest.
>>>>>>>>> Yes but that's still guest visible, and will break
>>>>>>>>> for example if guest is migrated between qemu instances
>>>>>>>>> where list order is different precisely when
>>>>>>>>> it's enumerating the bus.
>>>>>>>>>
>>>>>>>> Yes, and the list order is not the only s390 specific information that
>>>>>>>> exposed to
>>>>>>>> guest. Besides that,  we need to migrate all other zpci information. For
>>>>>>>> now,
>>>>>>>> we have no plan to support zpci migration yet.
>>>>>>> BTW how will hotplug work? If it happens while guest
>>>>>>> enumerates the bus the naturally all index values
>>>>>>> become invalid.
>>>>>> The list zpci only happen when the guest doing pci_base_init() for s390.
>>>>>> At that moment,  hotplug does not work yet.
>>>>> You can't prevent this: user can request hotplug at this time.
>>>>>
>>>>>> And assume we have
>>>>>> that case, we still have the index issue even when scan standard pci
>>>>>> bus. Please see my following words.
>>>>>>
>>>>>>> Just don't expose internal qdev data structures to guest.
>>>>>>> It's not by chance that we don't have a look up by index
>>>>>>> capability, it's an attempt to enfoce sane usage.
>>>>>>> You are misusing the API with your hack.
>>>>>> The resume token of list zpci is indeed an index of iteration:(
>>>>>>
>>>>>>> PCI has standard ways to enumerate the bus, maybe you
>>>>>>> should emulate it.  Or find some other way that works.
>>>>>>> The idea to poke at s->fbus->qbus and count things there
>>>>>>> is a bad one.
>>>>>>>
>>>>>> I can define multiple zpci buses, and attach zpci device to a slot of a root
>>>>>> bus.
>>>>>> Then I need to add a api to the common pci code to do the scan of all the
>>>>>> pci host bridges. And in this way, it still has the index issue. I need to
>>>>>> scan
>>>>> >from the first bus to count the index. So any suggestion from you?
>>>>> OK, I looked at arch/s390/pci/pci.c.
>>>>> First of all, it seems to run the regular PCI thing on bridges.
>>>>>
>>>>>          zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>>>                                        zdev, &resources);
>>>> At this moment, the guest has got all the zpci devices through clp list zpci
>>>> instruction. For each device, in the pci_scan_root_bus(), it will create
>>>> a root bus. So for s390, we get pci devices first, then create a new root bus
>>>> for it.
>>> I don't see this in guest code.
>>>
>>> I looked at pci_scan_root_bus and it's completely generic.
>>> It sets up the bus:
>>>          b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
>>>
>>> then it scans it:
>>>          max = pci_scan_child_bus(b);
>>>
>>>
>>> that one does
>>>          /* Go find them, Rover! */
>>>          for (devfn = 0; devfn < 0x100; devfn += 8)
>>>                  pci_scan_slot(bus, devfn);
>>>
>>> next
>>>          dev = pci_scan_single_device(bus, devfn);
>>>
>>> and so on. Eventually you get
>>>          if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
>>>                  return NULL;
>>>
>>> and that one does the clp thing using zpci_cfg_load.
>>>
>> pci_base_init()-> clp_scan_pci_devices():
>>      rc = clp_list_pci(rrb, __clp_add);
>> In this function, there is a while loop to get all the zpci devices by means
>> of
>> resume token(index). And for each device,
>>      __clp_add()-> clp_add_pci_device();
>> In clp_add_pci_device(), we use the zpci information to create a struct
>> zpci_dev zdev.
>> Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
>>      zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>                        zdev, &resources);
>> So, you see, each zdev has its own root bus. And there is no child bus under
>> that root bus.
> Right - zdev *is* the root. But there are pci devices hanging off it.
We have multiple zdevs in kernel, and each zdev only has one pci device 
attached to it.

>
> So why not model it like this?
>
> vfio should attach to zdev, zdev is the pci host.
>
> Also, you can stick a pci to pci bridge under the root, and
> everything will just work.
>
>
>
>
>
>>>
>>>>> so to me, it looks like there's no need to expose
>>>>> non-root buses through special means.
>>>>>
>>>>> What to do for root buses is a different question but again,
>>>>> you definitely do not want to rely on the order of things
>>>>> on that linked list.
>>>>> The simplest thing is to ask user to give them unique
>>>>> numbers, or find some stable way to sort them that
>>>>> does not rely on order of initialization (e.g. device IDs?).
>>>>>
>>>>> But again, this only works ok for root buses.
>>>>>
>>>> Basically, it does not exposed the buses to guest, it exposed an index
>>>> to guest.
>>>> Here is the process to get all the zpci device for a guest.
>>>> For example: we have 10 zpci devices, and the batch size for list zpci
>>>> instruction is 4.
>>>> First, qemu will return devices 0-3, index of list zpci is 0
>>>> Second, qemu will return device 4-7, index of list zpci is 4
>>>> Third, qemu will return device 8-9, index of list zpci is 8
>>>> We have device id, but list zpci does not use that as a flag to get
>>>> next batch, it use an index instead.
>>>> This process is defined by s390 arch, we can't change it.
>>>> So no matter how we organize zpci devices in qemu, slot or link list.
>>>> We could not get rid of the index issue.
>>>>
>>>> How about I add a flag to identify whether the link list
>>>> is valid or not. When a hotplug/unplug event occurred, I will
>>>> reset the index, and make the guest refetch the zpci devices
>>> >from the beginning.
>>>>
>>>>
>>>>
>>> You should just use something stable for IDs.
>>> And avoid doing it for anything that isn't a root or maybe a bridge
>>> since it'll just cause everyone maintainance problems down the road.
>>>
>> The list zpci instruction is defined by arch, not a software thing, I could
>> not
>> change it to use a ID instead...
Michael S. Tsirkin July 1, 2015, 1:37 p.m. UTC | #16
On Wed, Jul 01, 2015 at 08:42:52PM +0800, Hong Bo Li wrote:
> 
> 
> On 7/1/2015 19:57, Michael S. Tsirkin wrote:
> >On Wed, Jul 01, 2015 at 07:46:01PM +0800, Hong Bo Li wrote:
> >>
> >>On 7/1/2015 19:23, Michael S. Tsirkin wrote:
> >>>On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
> >>>>On 7/1/2015 18:36, Michael S. Tsirkin wrote:
> >>>>>On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
> >>>>>>On 7/1/2015 17:22, Michael S. Tsirkin wrote:
> >>>>>>>On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
> >>>>>>>>On 7/1/2015 16:05, Michael S. Tsirkin wrote:
> >>>>>>>>>On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>On 7/1/2015 14:22, Michael S. Tsirkin wrote:
> >>>>>>>>>>>On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>>>On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> >>>>>>>>>>>>>On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>>>>>This patch introduce a new facility(and bus)
> >>>>>>>>>>>>>>to hold devices representing information actually
> >>>>>>>>>>>>>>provided by s390 firmware and I/O configuration.
> >>>>>>>>>>>>>>usage example:
> >>>>>>>>>>>>>>-device s390-pcihost
> >>>>>>>>>>>>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>>>>>>>>>>>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>The first line will create a s390 pci host bridge
> >>>>>>>>>>>>>>and init the root bus. The second line will create
> >>>>>>>>>>>>>>a standard vfio pci device, and attach it to the
> >>>>>>>>>>>>>>root bus. These are similiar to the standard process
> >>>>>>>>>>>>>>to define a pci device on other platform.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>The third line will create a s390 pci device to
> >>>>>>>>>>>>>>store s390 specific information, and references
> >>>>>>>>>>>>>>the corresponding vfio pci device via device id.
> >>>>>>>>>>>>>>We create a s390 pci facility bus to hold all the
> >>>>>>>>>>>>>>zpci devices.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> >>>>>>>>>>>>>It's mostly up to s390 maintainers, but I'd like to note
> >>>>>>>>>>>>>one thing below
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>>---
> >>>>>>>>>>>>>>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
> >>>>>>>>>>>>>>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
> >>>>>>>>>>>>>>  hw/s390x/s390-pci-inst.c   |   4 +-
> >>>>>>>>>>>>>>  hw/s390x/s390-virtio-ccw.c |   5 +-
> >>>>>>>>>>>>>>  4 files changed, 283 insertions(+), 88 deletions(-)
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>>>index 560b66a..d5e7b2e 100644
> >>>>>>>>>>>>>>--- a/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>>>+++ b/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>>>@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>>>>>>>      PciCcdfErr *eccdf;
> >>>>>>>>>>>>>>      int rc = 1;
> >>>>>>>>>>>>>>      SeiContainer *sei_cont;
> >>>>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>>>          return rc;
> >>>>>>>>>>>>>>@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>>>>>>>  int chsc_sei_nt2_have_event(void)
> >>>>>>>>>>>>>>  {
> >>>>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>>>          return 0;
> >>>>>>>>>>>>>>@@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
> >>>>>>>>>>>>>>      return !QTAILQ_EMPTY(&s->pending_sei);
> >>>>>>>>>>>>>>  }
> >>>>>>>>>>>>>>+void s390_pci_device_enable(S390PCIBusDevice *zpci)
> >>>>>>>>>>>>>>+{
> >>>>>>>>>>>>>>+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> >>>>>>>>>>>>>>+}
> >>>>>>>>>>>>>>+
> >>>>>>>>>>>>>>+void s390_pci_device_disable(S390PCIBusDevice *zpci)
> >>>>>>>>>>>>>>+{
> >>>>>>>>>>>>>>+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>>>>>>>>>>>>>+    if (zpci->is_unplugged)
> >>>>>>>>>>>>>>+        object_unparent(OBJECT(zpci));
> >>>>>>>>>>>>>>+}
> >>>>>>>>>>>>>>+
> >>>>>>>>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
> >>>>>>>>>>>>>>  {
> >>>>>>>>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>>>>>>>-    int i;
> >>>>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>>>+    BusChild *kid;
> >>>>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>>>          return NULL;
> >>>>>>>>>>>>>>      }
> >>>>>>>>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>>>>>>>-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> >>>>>>>>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>>>>>>>+        if (pbdev->fid == fid) {
> >>>>>>>>>>>>>>              return pbdev;
> >>>>>>>>>>>>>>          }
> >>>>>>>>>>>>>>      }
> >>>>>>>>>>>>>>@@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
> >>>>>>>>>>>>>>      return;
> >>>>>>>>>>>>>>  }
> >>>>>>>>>>>>>>-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> >>>>>>>>>>>>>>-{
> >>>>>>>>>>>>>>-    return PCI_SLOT(pdev->devfn);
> >>>>>>>>>>>>>>-}
> >>>>>>>>>>>>>>-
> >>>>>>>>>>>>>>-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> >>>>>>>>>>>>>>-{
> >>>>>>>>>>>>>>-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> >>>>>>>>>>>>>>-}
> >>>>>>>>>>>>>>-
> >>>>>>>>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>>>>>>>>>>>>>  {
> >>>>>>>>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>>>>>>>-    int i;
> >>>>>>>>>>>>>>-    int j = 0;
> >>>>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>>>+    BusChild *kid;
> >>>>>>>>>>>>>>+    int i = 0;
> >>>>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>>>          return NULL;
> >>>>>>>>>>>>>>      }
> >>>>>>>>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>>>>>>>-
> >>>>>>>>>>>>>>-        if (pbdev->fh == 0) {
> >>>>>>>>>>>>>>-            continue;
> >>>>>>>>>>>>>>-        }
> >>>>>>>>>>>>>>-
> >>>>>>>>>>>>>>-        if (j == idx) {
> >>>>>>>>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>>>>>>>+        if (i == idx) {
> >>>>>>>>>>>>>>              return pbdev;
> >>>>>>>>>>>>>>          }
> >>>>>>>>>>>>>>-        j++;
> >>>>>>>>>>>>>>+        i++;
> >>>>>>>>>>>>>>      }
> >>>>>>>>>>>>>>      return NULL;
> >>>>>>>>>>>>>This relies on the order of children on the qbus, that's wrong I think.
> >>>>>>>>>>>>>Generally I'm not sure why do you convert all slot lookups to child
> >>>>>>>>>>>>>lookups: more code to achieve the same effect?
> >>>>>>>>>>>>Thank you Michael.
> >>>>>>>>>>>>I do the change due to two reasons:
> >>>>>>>>>>>>1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
> >>>>>>>>>>>>slots at most. So when it comes to multiple s390 pci root buses, the old code
> >>>>>>>>>>>>does not work.
> >>>>>>>>>>>>2. Now the zpci device "S390PCIBusDevice" is only a structure to store
> >>>>>>>>>>>>s390 specific information, so we can attach all the zpci devices to a
> >>>>>>>>>>>>s390 pci facility bus. Since these zpci device has no relation with the "slot",
> >>>>>>>>>>>>so the order of them does not matter.
> >>>>>>>>>>>But you make this order guest-visible which seems wrong.
> >>>>>>>>>>>
> >>>>>>>>>>The guest uses a s390 specific "list pci" instruction to get all the zpci
> >>>>>>>>>>devices, and will
> >>>>>>>>>>create a root s390 pci bus for each device.  So the order has no relation
> >>>>>>>>>>with the pci
> >>>>>>>>>>topology on guest.
> >>>>>>>>>>
> >>>>>>>>>>If we assign  too many zpci devices to one guest, the "list pci" instruction
> >>>>>>>>>>will use a
> >>>>>>>>>>resume token to get all the zpci devices. For example, first time we return
> >>>>>>>>>>32 zpci
> >>>>>>>>>>devices to guest. Next time we'll return another 32 zpci devices. The resume
> >>>>>>>>>>token
> >>>>>>>>>>is used to store the beginning of zpci devices that will be returned to
> >>>>>>>>>>guest at next time.
> >>>>>>>>>>
> >>>>>>>>>>So, if we change the order of the zpci device on s390 facility bus, it may
> >>>>>>>>>>change the
> >>>>>>>>>>"batch" in which this device be returned to guest. But this will not change
> >>>>>>>>>>the  pci
> >>>>>>>>>>topology on guest.
> >>>>>>>>>Yes but that's still guest visible, and will break
> >>>>>>>>>for example if guest is migrated between qemu instances
> >>>>>>>>>where list order is different precisely when
> >>>>>>>>>it's enumerating the bus.
> >>>>>>>>>
> >>>>>>>>Yes, and the list order is not the only s390 specific information that
> >>>>>>>>exposed to
> >>>>>>>>guest. Besides that,  we need to migrate all other zpci information. For
> >>>>>>>>now,
> >>>>>>>>we have no plan to support zpci migration yet.
> >>>>>>>BTW how will hotplug work? If it happens while guest
> >>>>>>>enumerates the bus the naturally all index values
> >>>>>>>become invalid.
> >>>>>>The list zpci only happen when the guest doing pci_base_init() for s390.
> >>>>>>At that moment,  hotplug does not work yet.
> >>>>>You can't prevent this: user can request hotplug at this time.
> >>>>>
> >>>>>>And assume we have
> >>>>>>that case, we still have the index issue even when scan standard pci
> >>>>>>bus. Please see my following words.
> >>>>>>
> >>>>>>>Just don't expose internal qdev data structures to guest.
> >>>>>>>It's not by chance that we don't have a look up by index
> >>>>>>>capability, it's an attempt to enfoce sane usage.
> >>>>>>>You are misusing the API with your hack.
> >>>>>>The resume token of list zpci is indeed an index of iteration:(
> >>>>>>
> >>>>>>>PCI has standard ways to enumerate the bus, maybe you
> >>>>>>>should emulate it.  Or find some other way that works.
> >>>>>>>The idea to poke at s->fbus->qbus and count things there
> >>>>>>>is a bad one.
> >>>>>>>
> >>>>>>I can define multiple zpci buses, and attach zpci device to a slot of a root
> >>>>>>bus.
> >>>>>>Then I need to add a api to the common pci code to do the scan of all the
> >>>>>>pci host bridges. And in this way, it still has the index issue. I need to
> >>>>>>scan
> >>>>>>from the first bus to count the index. So any suggestion from you?
> >>>>>OK, I looked at arch/s390/pci/pci.c.
> >>>>>First of all, it seems to run the regular PCI thing on bridges.
> >>>>>
> >>>>>         zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
> >>>>>                                       zdev, &resources);
> >>>>At this moment, the guest has got all the zpci devices through clp list zpci
> >>>>instruction. For each device, in the pci_scan_root_bus(), it will create
> >>>>a root bus. So for s390, we get pci devices first, then create a new root bus
> >>>>for it.
> >>>I don't see this in guest code.
> >>>
> >>>I looked at pci_scan_root_bus and it's completely generic.
> >>>It sets up the bus:
> >>>         b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
> >>>
> >>>then it scans it:
> >>>         max = pci_scan_child_bus(b);
> >>>
> >>>
> >>>that one does
> >>>         /* Go find them, Rover! */
> >>>         for (devfn = 0; devfn < 0x100; devfn += 8)
> >>>                 pci_scan_slot(bus, devfn);
> >>>
> >>>next
> >>>         dev = pci_scan_single_device(bus, devfn);
> >>>
> >>>and so on. Eventually you get
> >>>         if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
> >>>                 return NULL;
> >>>
> >>>and that one does the clp thing using zpci_cfg_load.
> >>>
> >>pci_base_init()-> clp_scan_pci_devices():
> >>     rc = clp_list_pci(rrb, __clp_add);
> >>In this function, there is a while loop to get all the zpci devices by means
> >>of
> >>resume token(index). And for each device,
> >>     __clp_add()-> clp_add_pci_device();
> >>In clp_add_pci_device(), we use the zpci information to create a struct
> >>zpci_dev zdev.
> >>Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
> >>     zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
> >>                       zdev, &resources);
> >>So, you see, each zdev has its own root bus. And there is no child bus under
> >>that root bus.
> >Right - zdev *is* the root. But there are pci devices hanging off it.
> We have multiple zdevs in kernel, and each zdev only has one pci device
> attached to it.

I see. It's nasty. Is it too late to fix in guest?
Supporting bridges should just be a question of passing
bus numbers to host.


I guess you need to support old guests too, so this
justifies some code in qemu. But you still need something
stable to sort by, that does not depend on the order
of initialization of devices. If all else fails, ask user
to give you numbers.

And I'm still confused by this:
>>>>>>>>>>>>>>>-device s390-pcihost
>>>>>>>>>>>>>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1

why isn't vfio connected to zpci? why is it the other way around?

> >
> >So why not model it like this?
> >
> >vfio should attach to zdev, zdev is the pci host.
> >
> >Also, you can stick a pci to pci bridge under the root, and
> >everything will just work.
> >
> >
> >
> >
> >
> >>>
> >>>>>so to me, it looks like there's no need to expose
> >>>>>non-root buses through special means.
> >>>>>
> >>>>>What to do for root buses is a different question but again,
> >>>>>you definitely do not want to rely on the order of things
> >>>>>on that linked list.
> >>>>>The simplest thing is to ask user to give them unique
> >>>>>numbers, or find some stable way to sort them that
> >>>>>does not rely on order of initialization (e.g. device IDs?).
> >>>>>
> >>>>>But again, this only works ok for root buses.
> >>>>>
> >>>>Basically, it does not exposed the buses to guest, it exposed an index
> >>>>to guest.
> >>>>Here is the process to get all the zpci device for a guest.
> >>>>For example: we have 10 zpci devices, and the batch size for list zpci
> >>>>instruction is 4.
> >>>>First, qemu will return devices 0-3, index of list zpci is 0
> >>>>Second, qemu will return device 4-7, index of list zpci is 4
> >>>>Third, qemu will return device 8-9, index of list zpci is 8
> >>>>We have device id, but list zpci does not use that as a flag to get
> >>>>next batch, it use an index instead.
> >>>>This process is defined by s390 arch, we can't change it.
> >>>>So no matter how we organize zpci devices in qemu, slot or link list.
> >>>>We could not get rid of the index issue.
> >>>>
> >>>>How about I add a flag to identify whether the link list
> >>>>is valid or not. When a hotplug/unplug event occurred, I will
> >>>>reset the index, and make the guest refetch the zpci devices
> >>>>from the beginning.
> >>>>
> >>>>
> >>>>
> >>>You should just use something stable for IDs.
> >>>And avoid doing it for anything that isn't a root or maybe a bridge
> >>>since it'll just cause everyone maintainance problems down the road.
> >>>
> >>The list zpci instruction is defined by arch, not a software thing, I could
> >>not
> >>change it to use a ID instead...
Hong Bo Li July 2, 2015, 2:57 a.m. UTC | #17
On 7/1/2015 21:37, Michael S. Tsirkin wrote:
> On Wed, Jul 01, 2015 at 08:42:52PM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 19:57, Michael S. Tsirkin wrote:
>>> On Wed, Jul 01, 2015 at 07:46:01PM +0800, Hong Bo Li wrote:
>>>> On 7/1/2015 19:23, Michael S. Tsirkin wrote:
>>>>> On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
>>>>>> On 7/1/2015 18:36, Michael S. Tsirkin wrote:
>>>>>>> On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
>>>>>>>> On 7/1/2015 17:22, Michael S. Tsirkin wrote:
>>>>>>>>> On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
>>>>>>>>>> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
>>>>>>>>>>> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>>>>>>>>>>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>>>>>>>>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>>>> This patch introduce a new facility(and bus)
>>>>>>>>>>>>>>>> to hold devices representing information actually
>>>>>>>>>>>>>>>> provided by s390 firmware and I/O configuration.
>>>>>>>>>>>>>>>> usage example:
>>>>>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> The first line will create a s390 pci host bridge
>>>>>>>>>>>>>>>> and init the root bus. The second line will create
>>>>>>>>>>>>>>>> a standard vfio pci device, and attach it to the
>>>>>>>>>>>>>>>> root bus. These are similiar to the standard process
>>>>>>>>>>>>>>>> to define a pci device on other platform.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> The third line will create a s390 pci device to
>>>>>>>>>>>>>>>> store s390 specific information, and references
>>>>>>>>>>>>>>>> the corresponding vfio pci device via device id.
>>>>>>>>>>>>>>>> We create a s390 pci facility bus to hold all the
>>>>>>>>>>>>>>>> zpci devices.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>>>>>>>>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>>>>>>>>>>>> one thing below
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>>>>>>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>>>>>>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>> index 560b66a..d5e7b2e 100644
>>>>>>>>>>>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>>>       PciCcdfErr *eccdf;
>>>>>>>>>>>>>>>>       int rc = 1;
>>>>>>>>>>>>>>>>       SeiContainer *sei_cont;
>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>           return rc;
>>>>>>>>>>>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>           return 0;
>>>>>>>>>>>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>>>>>>>>>>>> +    if (zpci->is_unplugged)
>>>>>>>>>>>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>>>> +        if (pbdev->fid == fid) {
>>>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>>>>>>>>>>>       return;
>>>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>>>> -    int j = 0;
>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>>>> +    int i = 0;
>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>> -        if (pbdev->fh == 0) {
>>>>>>>>>>>>>>>> -            continue;
>>>>>>>>>>>>>>>> -        }
>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>> -        if (j == idx) {
>>>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>>>> +        if (i == idx) {
>>>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>> -        j++;
>>>>>>>>>>>>>>>> +        i++;
>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>       return NULL;
>>>>>>>>>>>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>>>>>>>>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>>>>>>>>>>>> lookups: more code to achieve the same effect?
>>>>>>>>>>>>>> Thank you Michael.
>>>>>>>>>>>>>> I do the change due to two reasons:
>>>>>>>>>>>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>>>>>>>>>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>>>>>>>>>>>> does not work.
>>>>>>>>>>>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>>>>>>>>>>>> s390 specific information, so we can attach all the zpci devices to a
>>>>>>>>>>>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>>>>>>>>>>>> so the order of them does not matter.
>>>>>>>>>>>>> But you make this order guest-visible which seems wrong.
>>>>>>>>>>>>>
>>>>>>>>>>>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>>>>>>>>>>>> devices, and will
>>>>>>>>>>>> create a root s390 pci bus for each device.  So the order has no relation
>>>>>>>>>>>> with the pci
>>>>>>>>>>>> topology on guest.
>>>>>>>>>>>>
>>>>>>>>>>>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>>>>>>>>>>>> will use a
>>>>>>>>>>>> resume token to get all the zpci devices. For example, first time we return
>>>>>>>>>>>> 32 zpci
>>>>>>>>>>>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>>>>>>>>>>>> token
>>>>>>>>>>>> is used to store the beginning of zpci devices that will be returned to
>>>>>>>>>>>> guest at next time.
>>>>>>>>>>>>
>>>>>>>>>>>> So, if we change the order of the zpci device on s390 facility bus, it may
>>>>>>>>>>>> change the
>>>>>>>>>>>> "batch" in which this device be returned to guest. But this will not change
>>>>>>>>>>>> the  pci
>>>>>>>>>>>> topology on guest.
>>>>>>>>>>> Yes but that's still guest visible, and will break
>>>>>>>>>>> for example if guest is migrated between qemu instances
>>>>>>>>>>> where list order is different precisely when
>>>>>>>>>>> it's enumerating the bus.
>>>>>>>>>>>
>>>>>>>>>> Yes, and the list order is not the only s390 specific information that
>>>>>>>>>> exposed to
>>>>>>>>>> guest. Besides that,  we need to migrate all other zpci information. For
>>>>>>>>>> now,
>>>>>>>>>> we have no plan to support zpci migration yet.
>>>>>>>>> BTW how will hotplug work? If it happens while guest
>>>>>>>>> enumerates the bus the naturally all index values
>>>>>>>>> become invalid.
>>>>>>>> The list zpci only happen when the guest doing pci_base_init() for s390.
>>>>>>>> At that moment,  hotplug does not work yet.
>>>>>>> You can't prevent this: user can request hotplug at this time.
>>>>>>>
>>>>>>>> And assume we have
>>>>>>>> that case, we still have the index issue even when scan standard pci
>>>>>>>> bus. Please see my following words.
>>>>>>>>
>>>>>>>>> Just don't expose internal qdev data structures to guest.
>>>>>>>>> It's not by chance that we don't have a look up by index
>>>>>>>>> capability, it's an attempt to enfoce sane usage.
>>>>>>>>> You are misusing the API with your hack.
>>>>>>>> The resume token of list zpci is indeed an index of iteration:(
>>>>>>>>
>>>>>>>>> PCI has standard ways to enumerate the bus, maybe you
>>>>>>>>> should emulate it.  Or find some other way that works.
>>>>>>>>> The idea to poke at s->fbus->qbus and count things there
>>>>>>>>> is a bad one.
>>>>>>>>>
>>>>>>>> I can define multiple zpci buses, and attach zpci device to a slot of a root
>>>>>>>> bus.
>>>>>>>> Then I need to add a api to the common pci code to do the scan of all the
>>>>>>>> pci host bridges. And in this way, it still has the index issue. I need to
>>>>>>>> scan
>>>>>>> >from the first bus to count the index. So any suggestion from you?
>>>>>>> OK, I looked at arch/s390/pci/pci.c.
>>>>>>> First of all, it seems to run the regular PCI thing on bridges.
>>>>>>>
>>>>>>>          zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>>>>>                                        zdev, &resources);
>>>>>> At this moment, the guest has got all the zpci devices through clp list zpci
>>>>>> instruction. For each device, in the pci_scan_root_bus(), it will create
>>>>>> a root bus. So for s390, we get pci devices first, then create a new root bus
>>>>>> for it.
>>>>> I don't see this in guest code.
>>>>>
>>>>> I looked at pci_scan_root_bus and it's completely generic.
>>>>> It sets up the bus:
>>>>>          b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
>>>>>
>>>>> then it scans it:
>>>>>          max = pci_scan_child_bus(b);
>>>>>
>>>>>
>>>>> that one does
>>>>>          /* Go find them, Rover! */
>>>>>          for (devfn = 0; devfn < 0x100; devfn += 8)
>>>>>                  pci_scan_slot(bus, devfn);
>>>>>
>>>>> next
>>>>>          dev = pci_scan_single_device(bus, devfn);
>>>>>
>>>>> and so on. Eventually you get
>>>>>          if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
>>>>>                  return NULL;
>>>>>
>>>>> and that one does the clp thing using zpci_cfg_load.
>>>>>
>>>> pci_base_init()-> clp_scan_pci_devices():
>>>>      rc = clp_list_pci(rrb, __clp_add);
>>>> In this function, there is a while loop to get all the zpci devices by means
>>>> of
>>>> resume token(index). And for each device,
>>>>      __clp_add()-> clp_add_pci_device();
>>>> In clp_add_pci_device(), we use the zpci information to create a struct
>>>> zpci_dev zdev.
>>>> Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
>>>>      zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>>                        zdev, &resources);
>>>> So, you see, each zdev has its own root bus. And there is no child bus under
>>>> that root bus.
>>> Right - zdev *is* the root. But there are pci devices hanging off it.
>> We have multiple zdevs in kernel, and each zdev only has one pci device
>> attached to it.
> I see. It's nasty. Is it too late to fix in guest?
> Supporting bridges should just be a question of passing
> bus numbers to host.

On the Linux OS level, there is no pci to pci bridge on s390, the bus 
number,
slot number are all virtual and has no meaning, like these:
0000:00:00.0
0001:00:00.0
0002:00:00.0
......
Each zpci device is in a separate domain.
I add Sebastian to the list, he is the owner of s390 pci. I think he
could give some reasons why s390 pci implemented in this way.
>
> I guess you need to support old guests too, so this
> justifies some code in qemu. But you still need something
> stable to sort by, that does not depend on the order
> of initialization of devices. If all else fails, ask user
> to give you numbers.

Thank you,  it's a good idea, I can sort the devices by fid or uid.

> And I'm still confused by this:
>>>>>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> why isn't vfio connected to zpci? why is it the other way around?

I implement the hotplug in s390_pci_device_hot_plug()  in the patch,
not the s390_pcihost_hot_plug(). It will do some s390 specific action.
If we define zpci first, then I need to do the real hotplug when hotplugging
a vfio-pci device.  I think both of them are ok, you prefer the later one?

>>> So why not model it like this?
>>>
>>> vfio should attach to zdev, zdev is the pci host.
>>>
>>> Also, you can stick a pci to pci bridge under the root, and
>>> everything will just work.
>>>
>>>
>>>
>>>
>>>
>>>>>>> so to me, it looks like there's no need to expose
>>>>>>> non-root buses through special means.
>>>>>>>
>>>>>>> What to do for root buses is a different question but again,
>>>>>>> you definitely do not want to rely on the order of things
>>>>>>> on that linked list.
>>>>>>> The simplest thing is to ask user to give them unique
>>>>>>> numbers, or find some stable way to sort them that
>>>>>>> does not rely on order of initialization (e.g. device IDs?).
>>>>>>>
>>>>>>> But again, this only works ok for root buses.
>>>>>>>
>>>>>> Basically, it does not exposed the buses to guest, it exposed an index
>>>>>> to guest.
>>>>>> Here is the process to get all the zpci device for a guest.
>>>>>> For example: we have 10 zpci devices, and the batch size for list zpci
>>>>>> instruction is 4.
>>>>>> First, qemu will return devices 0-3, index of list zpci is 0
>>>>>> Second, qemu will return device 4-7, index of list zpci is 4
>>>>>> Third, qemu will return device 8-9, index of list zpci is 8
>>>>>> We have device id, but list zpci does not use that as a flag to get
>>>>>> next batch, it use an index instead.
>>>>>> This process is defined by s390 arch, we can't change it.
>>>>>> So no matter how we organize zpci devices in qemu, slot or link list.
>>>>>> We could not get rid of the index issue.
>>>>>>
>>>>>> How about I add a flag to identify whether the link list
>>>>>> is valid or not. When a hotplug/unplug event occurred, I will
>>>>>> reset the index, and make the guest refetch the zpci devices
>>>>> >from the beginning.
>>>>>>
>>>>>>
>>>>> You should just use something stable for IDs.
>>>>> And avoid doing it for anything that isn't a root or maybe a bridge
>>>>> since it'll just cause everyone maintainance problems down the road.
>>>>>
>>>> The list zpci instruction is defined by arch, not a software thing, I could
>>>> not
>>>> change it to use a ID instead...
Michael S. Tsirkin July 2, 2015, 5:13 a.m. UTC | #18
On Thu, Jul 02, 2015 at 10:57:34AM +0800, Hong Bo Li wrote:
> 
> 
> On 7/1/2015 21:37, Michael S. Tsirkin wrote:
> >On Wed, Jul 01, 2015 at 08:42:52PM +0800, Hong Bo Li wrote:
> >>
> >>On 7/1/2015 19:57, Michael S. Tsirkin wrote:
> >>>On Wed, Jul 01, 2015 at 07:46:01PM +0800, Hong Bo Li wrote:
> >>>>On 7/1/2015 19:23, Michael S. Tsirkin wrote:
> >>>>>On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
> >>>>>>On 7/1/2015 18:36, Michael S. Tsirkin wrote:
> >>>>>>>On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
> >>>>>>>>On 7/1/2015 17:22, Michael S. Tsirkin wrote:
> >>>>>>>>>On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>On 7/1/2015 16:05, Michael S. Tsirkin wrote:
> >>>>>>>>>>>On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>>>On 7/1/2015 14:22, Michael S. Tsirkin wrote:
> >>>>>>>>>>>>>On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>>>>>On 6/29/2015 18:01, Michael S. Tsirkin wrote:
> >>>>>>>>>>>>>>>On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
> >>>>>>>>>>>>>>>>This patch introduce a new facility(and bus)
> >>>>>>>>>>>>>>>>to hold devices representing information actually
> >>>>>>>>>>>>>>>>provided by s390 firmware and I/O configuration.
> >>>>>>>>>>>>>>>>usage example:
> >>>>>>>>>>>>>>>>-device s390-pcihost
> >>>>>>>>>>>>>>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>>>>>>>>>>>>>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>The first line will create a s390 pci host bridge
> >>>>>>>>>>>>>>>>and init the root bus. The second line will create
> >>>>>>>>>>>>>>>>a standard vfio pci device, and attach it to the
> >>>>>>>>>>>>>>>>root bus. These are similiar to the standard process
> >>>>>>>>>>>>>>>>to define a pci device on other platform.
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>The third line will create a s390 pci device to
> >>>>>>>>>>>>>>>>store s390 specific information, and references
> >>>>>>>>>>>>>>>>the corresponding vfio pci device via device id.
> >>>>>>>>>>>>>>>>We create a s390 pci facility bus to hold all the
> >>>>>>>>>>>>>>>>zpci devices.
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
> >>>>>>>>>>>>>>>It's mostly up to s390 maintainers, but I'd like to note
> >>>>>>>>>>>>>>>one thing below
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>---
> >>>>>>>>>>>>>>>>  hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
> >>>>>>>>>>>>>>>>  hw/s390x/s390-pci-bus.h    |  48 ++++++-
> >>>>>>>>>>>>>>>>  hw/s390x/s390-pci-inst.c   |   4 +-
> >>>>>>>>>>>>>>>>  hw/s390x/s390-virtio-ccw.c |   5 +-
> >>>>>>>>>>>>>>>>  4 files changed, 283 insertions(+), 88 deletions(-)
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>>>>>index 560b66a..d5e7b2e 100644
> >>>>>>>>>>>>>>>>--- a/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>>>>>+++ b/hw/s390x/s390-pci-bus.c
> >>>>>>>>>>>>>>>>@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>>>>>>>>>      PciCcdfErr *eccdf;
> >>>>>>>>>>>>>>>>      int rc = 1;
> >>>>>>>>>>>>>>>>      SeiContainer *sei_cont;
> >>>>>>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>>>>>          return rc;
> >>>>>>>>>>>>>>>>@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
> >>>>>>>>>>>>>>>>  int chsc_sei_nt2_have_event(void)
> >>>>>>>>>>>>>>>>  {
> >>>>>>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>>>>>          return 0;
> >>>>>>>>>>>>>>>>@@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
> >>>>>>>>>>>>>>>>      return !QTAILQ_EMPTY(&s->pending_sei);
> >>>>>>>>>>>>>>>>  }
> >>>>>>>>>>>>>>>>+void s390_pci_device_enable(S390PCIBusDevice *zpci)
> >>>>>>>>>>>>>>>>+{
> >>>>>>>>>>>>>>>>+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
> >>>>>>>>>>>>>>>>+}
> >>>>>>>>>>>>>>>>+
> >>>>>>>>>>>>>>>>+void s390_pci_device_disable(S390PCIBusDevice *zpci)
> >>>>>>>>>>>>>>>>+{
> >>>>>>>>>>>>>>>>+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
> >>>>>>>>>>>>>>>>+    if (zpci->is_unplugged)
> >>>>>>>>>>>>>>>>+        object_unparent(OBJECT(zpci));
> >>>>>>>>>>>>>>>>+}
> >>>>>>>>>>>>>>>>+
> >>>>>>>>>>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
> >>>>>>>>>>>>>>>>  {
> >>>>>>>>>>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>>>>>>>>>-    int i;
> >>>>>>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>>>>>+    BusChild *kid;
> >>>>>>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>>>>>          return NULL;
> >>>>>>>>>>>>>>>>      }
> >>>>>>>>>>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>>>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>>>>>>>>>-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
> >>>>>>>>>>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>>>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>>>>>>>>>+        if (pbdev->fid == fid) {
> >>>>>>>>>>>>>>>>              return pbdev;
> >>>>>>>>>>>>>>>>          }
> >>>>>>>>>>>>>>>>      }
> >>>>>>>>>>>>>>>>@@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
> >>>>>>>>>>>>>>>>      return;
> >>>>>>>>>>>>>>>>  }
> >>>>>>>>>>>>>>>>-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
> >>>>>>>>>>>>>>>>-{
> >>>>>>>>>>>>>>>>-    return PCI_SLOT(pdev->devfn);
> >>>>>>>>>>>>>>>>-}
> >>>>>>>>>>>>>>>>-
> >>>>>>>>>>>>>>>>-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
> >>>>>>>>>>>>>>>>-{
> >>>>>>>>>>>>>>>>-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
> >>>>>>>>>>>>>>>>-}
> >>>>>>>>>>>>>>>>-
> >>>>>>>>>>>>>>>>  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
> >>>>>>>>>>>>>>>>  {
> >>>>>>>>>>>>>>>>      S390PCIBusDevice *pbdev;
> >>>>>>>>>>>>>>>>-    int i;
> >>>>>>>>>>>>>>>>-    int j = 0;
> >>>>>>>>>>>>>>>>-    S390pciState *s = S390_PCI_HOST_BRIDGE(
> >>>>>>>>>>>>>>>>-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
> >>>>>>>>>>>>>>>>+    BusChild *kid;
> >>>>>>>>>>>>>>>>+    int i = 0;
> >>>>>>>>>>>>>>>>+    S390PCIFacility *s = S390_PCI_FACILITY(
> >>>>>>>>>>>>>>>>+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
> >>>>>>>>>>>>>>>>      if (!s) {
> >>>>>>>>>>>>>>>>          return NULL;
> >>>>>>>>>>>>>>>>      }
> >>>>>>>>>>>>>>>>-    for (i = 0; i < PCI_SLOT_MAX; i++) {
> >>>>>>>>>>>>>>>>-        pbdev = &s->pbdev[i];
> >>>>>>>>>>>>>>>>-
> >>>>>>>>>>>>>>>>-        if (pbdev->fh == 0) {
> >>>>>>>>>>>>>>>>-            continue;
> >>>>>>>>>>>>>>>>-        }
> >>>>>>>>>>>>>>>>-
> >>>>>>>>>>>>>>>>-        if (j == idx) {
> >>>>>>>>>>>>>>>>+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
> >>>>>>>>>>>>>>>>+        pbdev = (S390PCIBusDevice *)kid->child;
> >>>>>>>>>>>>>>>>+        if (i == idx) {
> >>>>>>>>>>>>>>>>              return pbdev;
> >>>>>>>>>>>>>>>>          }
> >>>>>>>>>>>>>>>>-        j++;
> >>>>>>>>>>>>>>>>+        i++;
> >>>>>>>>>>>>>>>>      }
> >>>>>>>>>>>>>>>>      return NULL;
> >>>>>>>>>>>>>>>This relies on the order of children on the qbus, that's wrong I think.
> >>>>>>>>>>>>>>>Generally I'm not sure why do you convert all slot lookups to child
> >>>>>>>>>>>>>>>lookups: more code to achieve the same effect?
> >>>>>>>>>>>>>>Thank you Michael.
> >>>>>>>>>>>>>>I do the change due to two reasons:
> >>>>>>>>>>>>>>1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
> >>>>>>>>>>>>>>slots at most. So when it comes to multiple s390 pci root buses, the old code
> >>>>>>>>>>>>>>does not work.
> >>>>>>>>>>>>>>2. Now the zpci device "S390PCIBusDevice" is only a structure to store
> >>>>>>>>>>>>>>s390 specific information, so we can attach all the zpci devices to a
> >>>>>>>>>>>>>>s390 pci facility bus. Since these zpci device has no relation with the "slot",
> >>>>>>>>>>>>>>so the order of them does not matter.
> >>>>>>>>>>>>>But you make this order guest-visible which seems wrong.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>The guest uses a s390 specific "list pci" instruction to get all the zpci
> >>>>>>>>>>>>devices, and will
> >>>>>>>>>>>>create a root s390 pci bus for each device.  So the order has no relation
> >>>>>>>>>>>>with the pci
> >>>>>>>>>>>>topology on guest.
> >>>>>>>>>>>>
> >>>>>>>>>>>>If we assign  too many zpci devices to one guest, the "list pci" instruction
> >>>>>>>>>>>>will use a
> >>>>>>>>>>>>resume token to get all the zpci devices. For example, first time we return
> >>>>>>>>>>>>32 zpci
> >>>>>>>>>>>>devices to guest. Next time we'll return another 32 zpci devices. The resume
> >>>>>>>>>>>>token
> >>>>>>>>>>>>is used to store the beginning of zpci devices that will be returned to
> >>>>>>>>>>>>guest at next time.
> >>>>>>>>>>>>
> >>>>>>>>>>>>So, if we change the order of the zpci device on s390 facility bus, it may
> >>>>>>>>>>>>change the
> >>>>>>>>>>>>"batch" in which this device be returned to guest. But this will not change
> >>>>>>>>>>>>the  pci
> >>>>>>>>>>>>topology on guest.
> >>>>>>>>>>>Yes but that's still guest visible, and will break
> >>>>>>>>>>>for example if guest is migrated between qemu instances
> >>>>>>>>>>>where list order is different precisely when
> >>>>>>>>>>>it's enumerating the bus.
> >>>>>>>>>>>
> >>>>>>>>>>Yes, and the list order is not the only s390 specific information that
> >>>>>>>>>>exposed to
> >>>>>>>>>>guest. Besides that,  we need to migrate all other zpci information. For
> >>>>>>>>>>now,
> >>>>>>>>>>we have no plan to support zpci migration yet.
> >>>>>>>>>BTW how will hotplug work? If it happens while guest
> >>>>>>>>>enumerates the bus the naturally all index values
> >>>>>>>>>become invalid.
> >>>>>>>>The list zpci only happen when the guest doing pci_base_init() for s390.
> >>>>>>>>At that moment,  hotplug does not work yet.
> >>>>>>>You can't prevent this: user can request hotplug at this time.
> >>>>>>>
> >>>>>>>>And assume we have
> >>>>>>>>that case, we still have the index issue even when scan standard pci
> >>>>>>>>bus. Please see my following words.
> >>>>>>>>
> >>>>>>>>>Just don't expose internal qdev data structures to guest.
> >>>>>>>>>It's not by chance that we don't have a look up by index
> >>>>>>>>>capability, it's an attempt to enfoce sane usage.
> >>>>>>>>>You are misusing the API with your hack.
> >>>>>>>>The resume token of list zpci is indeed an index of iteration:(
> >>>>>>>>
> >>>>>>>>>PCI has standard ways to enumerate the bus, maybe you
> >>>>>>>>>should emulate it.  Or find some other way that works.
> >>>>>>>>>The idea to poke at s->fbus->qbus and count things there
> >>>>>>>>>is a bad one.
> >>>>>>>>>
> >>>>>>>>I can define multiple zpci buses, and attach zpci device to a slot of a root
> >>>>>>>>bus.
> >>>>>>>>Then I need to add a api to the common pci code to do the scan of all the
> >>>>>>>>pci host bridges. And in this way, it still has the index issue. I need to
> >>>>>>>>scan
> >>>>>>>>from the first bus to count the index. So any suggestion from you?
> >>>>>>>OK, I looked at arch/s390/pci/pci.c.
> >>>>>>>First of all, it seems to run the regular PCI thing on bridges.
> >>>>>>>
> >>>>>>>         zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
> >>>>>>>                                       zdev, &resources);
> >>>>>>At this moment, the guest has got all the zpci devices through clp list zpci
> >>>>>>instruction. For each device, in the pci_scan_root_bus(), it will create
> >>>>>>a root bus. So for s390, we get pci devices first, then create a new root bus
> >>>>>>for it.
> >>>>>I don't see this in guest code.
> >>>>>
> >>>>>I looked at pci_scan_root_bus and it's completely generic.
> >>>>>It sets up the bus:
> >>>>>         b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
> >>>>>
> >>>>>then it scans it:
> >>>>>         max = pci_scan_child_bus(b);
> >>>>>
> >>>>>
> >>>>>that one does
> >>>>>         /* Go find them, Rover! */
> >>>>>         for (devfn = 0; devfn < 0x100; devfn += 8)
> >>>>>                 pci_scan_slot(bus, devfn);
> >>>>>
> >>>>>next
> >>>>>         dev = pci_scan_single_device(bus, devfn);
> >>>>>
> >>>>>and so on. Eventually you get
> >>>>>         if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
> >>>>>                 return NULL;
> >>>>>
> >>>>>and that one does the clp thing using zpci_cfg_load.
> >>>>>
> >>>>pci_base_init()-> clp_scan_pci_devices():
> >>>>     rc = clp_list_pci(rrb, __clp_add);
> >>>>In this function, there is a while loop to get all the zpci devices by means
> >>>>of
> >>>>resume token(index). And for each device,
> >>>>     __clp_add()-> clp_add_pci_device();
> >>>>In clp_add_pci_device(), we use the zpci information to create a struct
> >>>>zpci_dev zdev.
> >>>>Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
> >>>>     zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
> >>>>                       zdev, &resources);
> >>>>So, you see, each zdev has its own root bus. And there is no child bus under
> >>>>that root bus.
> >>>Right - zdev *is* the root. But there are pci devices hanging off it.
> >>We have multiple zdevs in kernel, and each zdev only has one pci device
> >>attached to it.
> >I see. It's nasty. Is it too late to fix in guest?
> >Supporting bridges should just be a question of passing
> >bus numbers to host.
> 
> On the Linux OS level, there is no pci to pci bridge on s390, the bus
> number,
> slot number are all virtual and has no meaning, like these:
> 0000:00:00.0
> 0001:00:00.0
> 0002:00:00.0
> ......
> Each zpci device is in a separate domain.
> I add Sebastian to the list, he is the owner of s390 pci. I think he
> could give some reasons why s390 pci implemented in this way.
> >
> >I guess you need to support old guests too, so this
> >justifies some code in qemu. But you still need something
> >stable to sort by, that does not depend on the order
> >of initialization of devices. If all else fails, ask user
> >to give you numbers.
> 
> Thank you,  it's a good idea, I can sort the devices by fid or uid.
> 
> >And I'm still confused by this:
> >>>>>>>>>>>>>>>>-device s390-pcihost
> >>>>>>>>>>>>>>>>-device vfio-pci,host=0000:00:00.0,id=vpci1
> >>>>>>>>>>>>>>>>-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
> >why isn't vfio connected to zpci? why is it the other way around?
> 
> I implement the hotplug in s390_pci_device_hot_plug()  in the patch,
> not the s390_pcihost_hot_plug(). It will do some s390 specific action.
> If we define zpci first, then I need to do the real hotplug when hotplugging
> a vfio-pci device.  I think both of them are ok, you prefer the later one?

I prefer sane modeling, it shouldn't be driven by implementation detail.

But I would like to note that pci device drivers require driver handshake
before device goes away.
IIUC s390 hotplug is immediate, which is a problem.
Maybe doing the change will help make sure device removal is acked
by guest before it happens?


> >>>So why not model it like this?
> >>>
> >>>vfio should attach to zdev, zdev is the pci host.
> >>>
> >>>Also, you can stick a pci to pci bridge under the root, and
> >>>everything will just work.
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>>>>>so to me, it looks like there's no need to expose
> >>>>>>>non-root buses through special means.
> >>>>>>>
> >>>>>>>What to do for root buses is a different question but again,
> >>>>>>>you definitely do not want to rely on the order of things
> >>>>>>>on that linked list.
> >>>>>>>The simplest thing is to ask user to give them unique
> >>>>>>>numbers, or find some stable way to sort them that
> >>>>>>>does not rely on order of initialization (e.g. device IDs?).
> >>>>>>>
> >>>>>>>But again, this only works ok for root buses.
> >>>>>>>
> >>>>>>Basically, it does not exposed the buses to guest, it exposed an index
> >>>>>>to guest.
> >>>>>>Here is the process to get all the zpci device for a guest.
> >>>>>>For example: we have 10 zpci devices, and the batch size for list zpci
> >>>>>>instruction is 4.
> >>>>>>First, qemu will return devices 0-3, index of list zpci is 0
> >>>>>>Second, qemu will return device 4-7, index of list zpci is 4
> >>>>>>Third, qemu will return device 8-9, index of list zpci is 8
> >>>>>>We have device id, but list zpci does not use that as a flag to get
> >>>>>>next batch, it use an index instead.
> >>>>>>This process is defined by s390 arch, we can't change it.
> >>>>>>So no matter how we organize zpci devices in qemu, slot or link list.
> >>>>>>We could not get rid of the index issue.
> >>>>>>
> >>>>>>How about I add a flag to identify whether the link list
> >>>>>>is valid or not. When a hotplug/unplug event occurred, I will
> >>>>>>reset the index, and make the guest refetch the zpci devices
> >>>>>>from the beginning.
> >>>>>>
> >>>>>>
> >>>>>You should just use something stable for IDs.
> >>>>>And avoid doing it for anything that isn't a root or maybe a bridge
> >>>>>since it'll just cause everyone maintainance problems down the road.
> >>>>>
> >>>>The list zpci instruction is defined by arch, not a software thing, I could
> >>>>not
> >>>>change it to use a ID instead...
Hong Bo Li July 2, 2015, 5:26 a.m. UTC | #19
On 7/2/2015 13:13, Michael S. Tsirkin wrote:
> On Thu, Jul 02, 2015 at 10:57:34AM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 21:37, Michael S. Tsirkin wrote:
>>> On Wed, Jul 01, 2015 at 08:42:52PM +0800, Hong Bo Li wrote:
>>>> On 7/1/2015 19:57, Michael S. Tsirkin wrote:
>>>>> On Wed, Jul 01, 2015 at 07:46:01PM +0800, Hong Bo Li wrote:
>>>>>> On 7/1/2015 19:23, Michael S. Tsirkin wrote:
>>>>>>> On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
>>>>>>>> On 7/1/2015 18:36, Michael S. Tsirkin wrote:
>>>>>>>>> On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
>>>>>>>>>> On 7/1/2015 17:22, Michael S. Tsirkin wrote:
>>>>>>>>>>> On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
>>>>>>>>>>>>> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>>>>>>>>>>>>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>>>>>>>>>>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>>>>>> This patch introduce a new facility(and bus)
>>>>>>>>>>>>>>>>>> to hold devices representing information actually
>>>>>>>>>>>>>>>>>> provided by s390 firmware and I/O configuration.
>>>>>>>>>>>>>>>>>> usage example:
>>>>>>>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> The first line will create a s390 pci host bridge
>>>>>>>>>>>>>>>>>> and init the root bus. The second line will create
>>>>>>>>>>>>>>>>>> a standard vfio pci device, and attach it to the
>>>>>>>>>>>>>>>>>> root bus. These are similiar to the standard process
>>>>>>>>>>>>>>>>>> to define a pci device on other platform.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> The third line will create a s390 pci device to
>>>>>>>>>>>>>>>>>> store s390 specific information, and references
>>>>>>>>>>>>>>>>>> the corresponding vfio pci device via device id.
>>>>>>>>>>>>>>>>>> We create a s390 pci facility bus to hold all the
>>>>>>>>>>>>>>>>>> zpci devices.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>>>>>>>>>>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>>>>>>>>>>>>>> one thing below
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>>>>>>>>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>>>>>>>>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>>>> index 560b66a..d5e7b2e 100644
>>>>>>>>>>>>>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>>>>>       PciCcdfErr *eccdf;
>>>>>>>>>>>>>>>>>>       int rc = 1;
>>>>>>>>>>>>>>>>>>       SeiContainer *sei_cont;
>>>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>>>           return rc;
>>>>>>>>>>>>>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>>>           return 0;
>>>>>>>>>>>>>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>>>>>>>>>>>>>> +    if (zpci->is_unplugged)
>>>>>>>>>>>>>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>>>>>> +        if (pbdev->fid == fid) {
>>>>>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>>>>>>>>>>>>>       return;
>>>>>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>>>>>> -    int j = 0;
>>>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>>>>>> +    int i = 0;
>>>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>>> -        if (pbdev->fh == 0) {
>>>>>>>>>>>>>>>>>> -            continue;
>>>>>>>>>>>>>>>>>> -        }
>>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>>> -        if (j == idx) {
>>>>>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>>>>>> +        if (i == idx) {
>>>>>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>>> -        j++;
>>>>>>>>>>>>>>>>>> +        i++;
>>>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>>>       return NULL;
>>>>>>>>>>>>>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>>>>>>>>>>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>>>>>>>>>>>>>> lookups: more code to achieve the same effect?
>>>>>>>>>>>>>>>> Thank you Michael.
>>>>>>>>>>>>>>>> I do the change due to two reasons:
>>>>>>>>>>>>>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>>>>>>>>>>>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>>>>>>>>>>>>>> does not work.
>>>>>>>>>>>>>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>>>>>>>>>>>>>> s390 specific information, so we can attach all the zpci devices to a
>>>>>>>>>>>>>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>>>>>>>>>>>>>> so the order of them does not matter.
>>>>>>>>>>>>>>> But you make this order guest-visible which seems wrong.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>>>>>>>>>>>>>> devices, and will
>>>>>>>>>>>>>> create a root s390 pci bus for each device.  So the order has no relation
>>>>>>>>>>>>>> with the pci
>>>>>>>>>>>>>> topology on guest.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>>>>>>>>>>>>>> will use a
>>>>>>>>>>>>>> resume token to get all the zpci devices. For example, first time we return
>>>>>>>>>>>>>> 32 zpci
>>>>>>>>>>>>>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>>>>>>>>>>>>>> token
>>>>>>>>>>>>>> is used to store the beginning of zpci devices that will be returned to
>>>>>>>>>>>>>> guest at next time.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> So, if we change the order of the zpci device on s390 facility bus, it may
>>>>>>>>>>>>>> change the
>>>>>>>>>>>>>> "batch" in which this device be returned to guest. But this will not change
>>>>>>>>>>>>>> the  pci
>>>>>>>>>>>>>> topology on guest.
>>>>>>>>>>>>> Yes but that's still guest visible, and will break
>>>>>>>>>>>>> for example if guest is migrated between qemu instances
>>>>>>>>>>>>> where list order is different precisely when
>>>>>>>>>>>>> it's enumerating the bus.
>>>>>>>>>>>>>
>>>>>>>>>>>> Yes, and the list order is not the only s390 specific information that
>>>>>>>>>>>> exposed to
>>>>>>>>>>>> guest. Besides that,  we need to migrate all other zpci information. For
>>>>>>>>>>>> now,
>>>>>>>>>>>> we have no plan to support zpci migration yet.
>>>>>>>>>>> BTW how will hotplug work? If it happens while guest
>>>>>>>>>>> enumerates the bus the naturally all index values
>>>>>>>>>>> become invalid.
>>>>>>>>>> The list zpci only happen when the guest doing pci_base_init() for s390.
>>>>>>>>>> At that moment,  hotplug does not work yet.
>>>>>>>>> You can't prevent this: user can request hotplug at this time.
>>>>>>>>>
>>>>>>>>>> And assume we have
>>>>>>>>>> that case, we still have the index issue even when scan standard pci
>>>>>>>>>> bus. Please see my following words.
>>>>>>>>>>
>>>>>>>>>>> Just don't expose internal qdev data structures to guest.
>>>>>>>>>>> It's not by chance that we don't have a look up by index
>>>>>>>>>>> capability, it's an attempt to enfoce sane usage.
>>>>>>>>>>> You are misusing the API with your hack.
>>>>>>>>>> The resume token of list zpci is indeed an index of iteration:(
>>>>>>>>>>
>>>>>>>>>>> PCI has standard ways to enumerate the bus, maybe you
>>>>>>>>>>> should emulate it.  Or find some other way that works.
>>>>>>>>>>> The idea to poke at s->fbus->qbus and count things there
>>>>>>>>>>> is a bad one.
>>>>>>>>>>>
>>>>>>>>>> I can define multiple zpci buses, and attach zpci device to a slot of a root
>>>>>>>>>> bus.
>>>>>>>>>> Then I need to add a api to the common pci code to do the scan of all the
>>>>>>>>>> pci host bridges. And in this way, it still has the index issue. I need to
>>>>>>>>>> scan
>>>>>>>>> >from the first bus to count the index. So any suggestion from you?
>>>>>>>>> OK, I looked at arch/s390/pci/pci.c.
>>>>>>>>> First of all, it seems to run the regular PCI thing on bridges.
>>>>>>>>>
>>>>>>>>>          zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>>>>>>>                                        zdev, &resources);
>>>>>>>> At this moment, the guest has got all the zpci devices through clp list zpci
>>>>>>>> instruction. For each device, in the pci_scan_root_bus(), it will create
>>>>>>>> a root bus. So for s390, we get pci devices first, then create a new root bus
>>>>>>>> for it.
>>>>>>> I don't see this in guest code.
>>>>>>>
>>>>>>> I looked at pci_scan_root_bus and it's completely generic.
>>>>>>> It sets up the bus:
>>>>>>>          b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
>>>>>>>
>>>>>>> then it scans it:
>>>>>>>          max = pci_scan_child_bus(b);
>>>>>>>
>>>>>>>
>>>>>>> that one does
>>>>>>>          /* Go find them, Rover! */
>>>>>>>          for (devfn = 0; devfn < 0x100; devfn += 8)
>>>>>>>                  pci_scan_slot(bus, devfn);
>>>>>>>
>>>>>>> next
>>>>>>>          dev = pci_scan_single_device(bus, devfn);
>>>>>>>
>>>>>>> and so on. Eventually you get
>>>>>>>          if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
>>>>>>>                  return NULL;
>>>>>>>
>>>>>>> and that one does the clp thing using zpci_cfg_load.
>>>>>>>
>>>>>> pci_base_init()-> clp_scan_pci_devices():
>>>>>>      rc = clp_list_pci(rrb, __clp_add);
>>>>>> In this function, there is a while loop to get all the zpci devices by means
>>>>>> of
>>>>>> resume token(index). And for each device,
>>>>>>      __clp_add()-> clp_add_pci_device();
>>>>>> In clp_add_pci_device(), we use the zpci information to create a struct
>>>>>> zpci_dev zdev.
>>>>>> Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
>>>>>>      zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>>>>                        zdev, &resources);
>>>>>> So, you see, each zdev has its own root bus. And there is no child bus under
>>>>>> that root bus.
>>>>> Right - zdev *is* the root. But there are pci devices hanging off it.
>>>> We have multiple zdevs in kernel, and each zdev only has one pci device
>>>> attached to it.
>>> I see. It's nasty. Is it too late to fix in guest?
>>> Supporting bridges should just be a question of passing
>>> bus numbers to host.
>> On the Linux OS level, there is no pci to pci bridge on s390, the bus
>> number,
>> slot number are all virtual and has no meaning, like these:
>> 0000:00:00.0
>> 0001:00:00.0
>> 0002:00:00.0
>> ......
>> Each zpci device is in a separate domain.
>> I add Sebastian to the list, he is the owner of s390 pci. I think he
>> could give some reasons why s390 pci implemented in this way.
>>> I guess you need to support old guests too, so this
>>> justifies some code in qemu. But you still need something
>>> stable to sort by, that does not depend on the order
>>> of initialization of devices. If all else fails, ask user
>>> to give you numbers.
>> Thank you,  it's a good idea, I can sort the devices by fid or uid.
>>
>>> And I'm still confused by this:
>>>>>>>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>> why isn't vfio connected to zpci? why is it the other way around?
>> I implement the hotplug in s390_pci_device_hot_plug()  in the patch,
>> not the s390_pcihost_hot_plug(). It will do some s390 specific action.
>> If we define zpci first, then I need to do the real hotplug when hotplugging
>> a vfio-pci device.  I think both of them are ok, you prefer the later one?
> I prefer sane modeling, it shouldn't be driven by implementation detail.
>
> But I would like to note that pci device drivers require driver handshake
> before device goes away.
> IIUC s390 hotplug is immediate, which is a problem.
> Maybe doing the change will help make sure device removal is acked
> by guest before it happens?
>
Right, I did not see this point, will do some research on it, thank you!

>>>>> So why not model it like this?
>>>>>
>>>>> vfio should attach to zdev, zdev is the pci host.
>>>>>
>>>>> Also, you can stick a pci to pci bridge under the root, and
>>>>> everything will just work.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>>>>> so to me, it looks like there's no need to expose
>>>>>>>>> non-root buses through special means.
>>>>>>>>>
>>>>>>>>> What to do for root buses is a different question but again,
>>>>>>>>> you definitely do not want to rely on the order of things
>>>>>>>>> on that linked list.
>>>>>>>>> The simplest thing is to ask user to give them unique
>>>>>>>>> numbers, or find some stable way to sort them that
>>>>>>>>> does not rely on order of initialization (e.g. device IDs?).
>>>>>>>>>
>>>>>>>>> But again, this only works ok for root buses.
>>>>>>>>>
>>>>>>>> Basically, it does not exposed the buses to guest, it exposed an index
>>>>>>>> to guest.
>>>>>>>> Here is the process to get all the zpci device for a guest.
>>>>>>>> For example: we have 10 zpci devices, and the batch size for list zpci
>>>>>>>> instruction is 4.
>>>>>>>> First, qemu will return devices 0-3, index of list zpci is 0
>>>>>>>> Second, qemu will return device 4-7, index of list zpci is 4
>>>>>>>> Third, qemu will return device 8-9, index of list zpci is 8
>>>>>>>> We have device id, but list zpci does not use that as a flag to get
>>>>>>>> next batch, it use an index instead.
>>>>>>>> This process is defined by s390 arch, we can't change it.
>>>>>>>> So no matter how we organize zpci devices in qemu, slot or link list.
>>>>>>>> We could not get rid of the index issue.
>>>>>>>>
>>>>>>>> How about I add a flag to identify whether the link list
>>>>>>>> is valid or not. When a hotplug/unplug event occurred, I will
>>>>>>>> reset the index, and make the guest refetch the zpci devices
>>>>>>> >from the beginning.
>>>>>>>>
>>>>>>> You should just use something stable for IDs.
>>>>>>> And avoid doing it for anything that isn't a root or maybe a bridge
>>>>>>> since it'll just cause everyone maintainance problems down the road.
>>>>>>>
>>>>>> The list zpci instruction is defined by arch, not a software thing, I could
>>>>>> not
>>>>>> change it to use a ID instead...
Hong Bo Li July 3, 2015, 11:09 a.m. UTC | #20
On 7/2/2015 13:13, Michael S. Tsirkin wrote:
> On Thu, Jul 02, 2015 at 10:57:34AM +0800, Hong Bo Li wrote:
>>
>> On 7/1/2015 21:37, Michael S. Tsirkin wrote:
>>> On Wed, Jul 01, 2015 at 08:42:52PM +0800, Hong Bo Li wrote:
>>>> On 7/1/2015 19:57, Michael S. Tsirkin wrote:
>>>>> On Wed, Jul 01, 2015 at 07:46:01PM +0800, Hong Bo Li wrote:
>>>>>> On 7/1/2015 19:23, Michael S. Tsirkin wrote:
>>>>>>> On Wed, Jul 01, 2015 at 07:11:38PM +0800, Hong Bo Li wrote:
>>>>>>>> On 7/1/2015 18:36, Michael S. Tsirkin wrote:
>>>>>>>>> On Wed, Jul 01, 2015 at 06:04:24PM +0800, Hong Bo Li wrote:
>>>>>>>>>> On 7/1/2015 17:22, Michael S. Tsirkin wrote:
>>>>>>>>>>> On Wed, Jul 01, 2015 at 05:13:11PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>> On 7/1/2015 16:05, Michael S. Tsirkin wrote:
>>>>>>>>>>>>> On Wed, Jul 01, 2015 at 03:56:25PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>> On 7/1/2015 14:22, Michael S. Tsirkin wrote:
>>>>>>>>>>>>>>> On Tue, Jun 30, 2015 at 02:16:59PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>>>> On 6/29/2015 18:01, Michael S. Tsirkin wrote:
>>>>>>>>>>>>>>>>> On Mon, Jun 29, 2015 at 05:24:53PM +0800, Hong Bo Li wrote:
>>>>>>>>>>>>>>>>>> This patch introduce a new facility(and bus)
>>>>>>>>>>>>>>>>>> to hold devices representing information actually
>>>>>>>>>>>>>>>>>> provided by s390 firmware and I/O configuration.
>>>>>>>>>>>>>>>>>> usage example:
>>>>>>>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> The first line will create a s390 pci host bridge
>>>>>>>>>>>>>>>>>> and init the root bus. The second line will create
>>>>>>>>>>>>>>>>>> a standard vfio pci device, and attach it to the
>>>>>>>>>>>>>>>>>> root bus. These are similiar to the standard process
>>>>>>>>>>>>>>>>>> to define a pci device on other platform.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> The third line will create a s390 pci device to
>>>>>>>>>>>>>>>>>> store s390 specific information, and references
>>>>>>>>>>>>>>>>>> the corresponding vfio pci device via device id.
>>>>>>>>>>>>>>>>>> We create a s390 pci facility bus to hold all the
>>>>>>>>>>>>>>>>>> zpci devices.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Signed-off-by: Hong Bo Li <lihbbj@linux.vnet.ibm.com>
>>>>>>>>>>>>>>>>> It's mostly up to s390 maintainers, but I'd like to note
>>>>>>>>>>>>>>>>> one thing below
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.c    | 314 +++++++++++++++++++++++++++++++++------------
>>>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-bus.h    |  48 ++++++-
>>>>>>>>>>>>>>>>>>   hw/s390x/s390-pci-inst.c   |   4 +-
>>>>>>>>>>>>>>>>>>   hw/s390x/s390-virtio-ccw.c |   5 +-
>>>>>>>>>>>>>>>>>>   4 files changed, 283 insertions(+), 88 deletions(-)
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>>>> index 560b66a..d5e7b2e 100644
>>>>>>>>>>>>>>>>>> --- a/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>>>> +++ b/hw/s390x/s390-pci-bus.c
>>>>>>>>>>>>>>>>>> @@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>>>>>       PciCcdfErr *eccdf;
>>>>>>>>>>>>>>>>>>       int rc = 1;
>>>>>>>>>>>>>>>>>>       SeiContainer *sei_cont;
>>>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>>>           return rc;
>>>>>>>>>>>>>>>>>> @@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
>>>>>>>>>>>>>>>>>>   int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>>>           return 0;
>>>>>>>>>>>>>>>>>> @@ -82,20 +82,32 @@ int chsc_sei_nt2_have_event(void)
>>>>>>>>>>>>>>>>>>       return !QTAILQ_EMPTY(&s->pending_sei);
>>>>>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>>>>>> +void s390_pci_device_enable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>>>>> +    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
>>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>> +void s390_pci_device_disable(S390PCIBusDevice *zpci)
>>>>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>>>>> +    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
>>>>>>>>>>>>>>>>>> +    if (zpci->is_unplugged)
>>>>>>>>>>>>>>>>>> +        object_unparent(OBJECT(zpci));
>>>>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
>>>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>>>>>> -        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
>>>>>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>>>>>> +        if (pbdev->fid == fid) {
>>>>>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>>> @@ -126,39 +138,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
>>>>>>>>>>>>>>>>>>       return;
>>>>>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
>>>>>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn);
>>>>>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>>> -static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
>>>>>>>>>>>>>>>>>> -{
>>>>>>>>>>>>>>>>>> -    return PCI_SLOT(pdev->devfn) | FH_VIRT;
>>>>>>>>>>>>>>>>>> -}
>>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>>>   S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
>>>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>>>>       S390PCIBusDevice *pbdev;
>>>>>>>>>>>>>>>>>> -    int i;
>>>>>>>>>>>>>>>>>> -    int j = 0;
>>>>>>>>>>>>>>>>>> -    S390pciState *s = S390_PCI_HOST_BRIDGE(
>>>>>>>>>>>>>>>>>> -        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
>>>>>>>>>>>>>>>>>> +    BusChild *kid;
>>>>>>>>>>>>>>>>>> +    int i = 0;
>>>>>>>>>>>>>>>>>> +    S390PCIFacility *s = S390_PCI_FACILITY(
>>>>>>>>>>>>>>>>>> +        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
>>>>>>>>>>>>>>>>>>       if (!s) {
>>>>>>>>>>>>>>>>>>           return NULL;
>>>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>>> -    for (i = 0; i < PCI_SLOT_MAX; i++) {
>>>>>>>>>>>>>>>>>> -        pbdev = &s->pbdev[i];
>>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>>> -        if (pbdev->fh == 0) {
>>>>>>>>>>>>>>>>>> -            continue;
>>>>>>>>>>>>>>>>>> -        }
>>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>>> -        if (j == idx) {
>>>>>>>>>>>>>>>>>> +    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
>>>>>>>>>>>>>>>>>> +        pbdev = (S390PCIBusDevice *)kid->child;
>>>>>>>>>>>>>>>>>> +        if (i == idx) {
>>>>>>>>>>>>>>>>>>               return pbdev;
>>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>>> -        j++;
>>>>>>>>>>>>>>>>>> +        i++;
>>>>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>>>>       return NULL;
>>>>>>>>>>>>>>>>> This relies on the order of children on the qbus, that's wrong I think.
>>>>>>>>>>>>>>>>> Generally I'm not sure why do you convert all slot lookups to child
>>>>>>>>>>>>>>>>> lookups: more code to achieve the same effect?
>>>>>>>>>>>>>>>> Thank you Michael.
>>>>>>>>>>>>>>>> I do the change due to two reasons:
>>>>>>>>>>>>>>>> 1. The old implement only supports one s390 pci root bus, and 32(PCI_SLOT_MAX)
>>>>>>>>>>>>>>>> slots at most. So when it comes to multiple s390 pci root buses, the old code
>>>>>>>>>>>>>>>> does not work.
>>>>>>>>>>>>>>>> 2. Now the zpci device "S390PCIBusDevice" is only a structure to store
>>>>>>>>>>>>>>>> s390 specific information, so we can attach all the zpci devices to a
>>>>>>>>>>>>>>>> s390 pci facility bus. Since these zpci device has no relation with the "slot",
>>>>>>>>>>>>>>>> so the order of them does not matter.
>>>>>>>>>>>>>>> But you make this order guest-visible which seems wrong.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>> The guest uses a s390 specific "list pci" instruction to get all the zpci
>>>>>>>>>>>>>> devices, and will
>>>>>>>>>>>>>> create a root s390 pci bus for each device.  So the order has no relation
>>>>>>>>>>>>>> with the pci
>>>>>>>>>>>>>> topology on guest.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> If we assign  too many zpci devices to one guest, the "list pci" instruction
>>>>>>>>>>>>>> will use a
>>>>>>>>>>>>>> resume token to get all the zpci devices. For example, first time we return
>>>>>>>>>>>>>> 32 zpci
>>>>>>>>>>>>>> devices to guest. Next time we'll return another 32 zpci devices. The resume
>>>>>>>>>>>>>> token
>>>>>>>>>>>>>> is used to store the beginning of zpci devices that will be returned to
>>>>>>>>>>>>>> guest at next time.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> So, if we change the order of the zpci device on s390 facility bus, it may
>>>>>>>>>>>>>> change the
>>>>>>>>>>>>>> "batch" in which this device be returned to guest. But this will not change
>>>>>>>>>>>>>> the  pci
>>>>>>>>>>>>>> topology on guest.
>>>>>>>>>>>>> Yes but that's still guest visible, and will break
>>>>>>>>>>>>> for example if guest is migrated between qemu instances
>>>>>>>>>>>>> where list order is different precisely when
>>>>>>>>>>>>> it's enumerating the bus.
>>>>>>>>>>>>>
>>>>>>>>>>>> Yes, and the list order is not the only s390 specific information that
>>>>>>>>>>>> exposed to
>>>>>>>>>>>> guest. Besides that,  we need to migrate all other zpci information. For
>>>>>>>>>>>> now,
>>>>>>>>>>>> we have no plan to support zpci migration yet.
>>>>>>>>>>> BTW how will hotplug work? If it happens while guest
>>>>>>>>>>> enumerates the bus the naturally all index values
>>>>>>>>>>> become invalid.
>>>>>>>>>> The list zpci only happen when the guest doing pci_base_init() for s390.
>>>>>>>>>> At that moment,  hotplug does not work yet.
>>>>>>>>> You can't prevent this: user can request hotplug at this time.
>>>>>>>>>
>>>>>>>>>> And assume we have
>>>>>>>>>> that case, we still have the index issue even when scan standard pci
>>>>>>>>>> bus. Please see my following words.
>>>>>>>>>>
>>>>>>>>>>> Just don't expose internal qdev data structures to guest.
>>>>>>>>>>> It's not by chance that we don't have a look up by index
>>>>>>>>>>> capability, it's an attempt to enfoce sane usage.
>>>>>>>>>>> You are misusing the API with your hack.
>>>>>>>>>> The resume token of list zpci is indeed an index of iteration:(
>>>>>>>>>>
>>>>>>>>>>> PCI has standard ways to enumerate the bus, maybe you
>>>>>>>>>>> should emulate it.  Or find some other way that works.
>>>>>>>>>>> The idea to poke at s->fbus->qbus and count things there
>>>>>>>>>>> is a bad one.
>>>>>>>>>>>
>>>>>>>>>> I can define multiple zpci buses, and attach zpci device to a slot of a root
>>>>>>>>>> bus.
>>>>>>>>>> Then I need to add a api to the common pci code to do the scan of all the
>>>>>>>>>> pci host bridges. And in this way, it still has the index issue. I need to
>>>>>>>>>> scan
>>>>>>>>> >from the first bus to count the index. So any suggestion from you?
>>>>>>>>> OK, I looked at arch/s390/pci/pci.c.
>>>>>>>>> First of all, it seems to run the regular PCI thing on bridges.
>>>>>>>>>
>>>>>>>>>          zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>>>>>>>                                        zdev, &resources);
>>>>>>>> At this moment, the guest has got all the zpci devices through clp list zpci
>>>>>>>> instruction. For each device, in the pci_scan_root_bus(), it will create
>>>>>>>> a root bus. So for s390, we get pci devices first, then create a new root bus
>>>>>>>> for it.
>>>>>>> I don't see this in guest code.
>>>>>>>
>>>>>>> I looked at pci_scan_root_bus and it's completely generic.
>>>>>>> It sets up the bus:
>>>>>>>          b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
>>>>>>>
>>>>>>> then it scans it:
>>>>>>>          max = pci_scan_child_bus(b);
>>>>>>>
>>>>>>>
>>>>>>> that one does
>>>>>>>          /* Go find them, Rover! */
>>>>>>>          for (devfn = 0; devfn < 0x100; devfn += 8)
>>>>>>>                  pci_scan_slot(bus, devfn);
>>>>>>>
>>>>>>> next
>>>>>>>          dev = pci_scan_single_device(bus, devfn);
>>>>>>>
>>>>>>> and so on. Eventually you get
>>>>>>>          if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
>>>>>>>                  return NULL;
>>>>>>>
>>>>>>> and that one does the clp thing using zpci_cfg_load.
>>>>>>>
>>>>>> pci_base_init()-> clp_scan_pci_devices():
>>>>>>      rc = clp_list_pci(rrb, __clp_add);
>>>>>> In this function, there is a while loop to get all the zpci devices by means
>>>>>> of
>>>>>> resume token(index). And for each device,
>>>>>>      __clp_add()-> clp_add_pci_device();
>>>>>> In clp_add_pci_device(), we use the zpci information to create a struct
>>>>>> zpci_dev zdev.
>>>>>> Then zpci_create_device()->zpci_scan_bus()->pci_scan_root_bus()
>>>>>>      zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
>>>>>>                        zdev, &resources);
>>>>>> So, you see, each zdev has its own root bus. And there is no child bus under
>>>>>> that root bus.
>>>>> Right - zdev *is* the root. But there are pci devices hanging off it.
>>>> We have multiple zdevs in kernel, and each zdev only has one pci device
>>>> attached to it.
>>> I see. It's nasty. Is it too late to fix in guest?
>>> Supporting bridges should just be a question of passing
>>> bus numbers to host.
>> On the Linux OS level, there is no pci to pci bridge on s390, the bus
>> number,
>> slot number are all virtual and has no meaning, like these:
>> 0000:00:00.0
>> 0001:00:00.0
>> 0002:00:00.0
>> ......
>> Each zpci device is in a separate domain.
>> I add Sebastian to the list, he is the owner of s390 pci. I think he
>> could give some reasons why s390 pci implemented in this way.
>>> I guess you need to support old guests too, so this
>>> justifies some code in qemu. But you still need something
>>> stable to sort by, that does not depend on the order
>>> of initialization of devices. If all else fails, ask user
>>> to give you numbers.
>> Thank you,  it's a good idea, I can sort the devices by fid or uid.
>>
>>> And I'm still confused by this:
>>>>>>>>>>>>>>>>>> -device s390-pcihost
>>>>>>>>>>>>>>>>>> -device vfio-pci,host=0000:00:00.0,id=vpci1
>>>>>>>>>>>>>>>>>> -device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
>>> why isn't vfio connected to zpci? why is it the other way around?
>> I implement the hotplug in s390_pci_device_hot_plug()  in the patch,
>> not the s390_pcihost_hot_plug(). It will do some s390 specific action.
>> If we define zpci first, then I need to do the real hotplug when hotplugging
>> a vfio-pci device.  I think both of them are ok, you prefer the later one?
> I prefer sane modeling, it shouldn't be driven by implementation detail.
>
> But I would like to note that pci device drivers require driver handshake
> before device goes away.
> IIUC s390 hotplug is immediate, which is a problem.
> Maybe doing the change will help make sure device removal is acked
> by guest before it happens?
>

I did some prototype today. If define zpci first, the progress of unplug
will get complicated. So I prefer defining vfio pci first.
And it looks like the vfio pci is the basic device, if we want this
vfio pci to work on s390, we have to define a zpci device to give some
additional information to it.


>>>>> So why not model it like this?
>>>>>
>>>>> vfio should attach to zdev, zdev is the pci host.
>>>>>
>>>>> Also, you can stick a pci to pci bridge under the root, and
>>>>> everything will just work.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>>>>> so to me, it looks like there's no need to expose
>>>>>>>>> non-root buses through special means.
>>>>>>>>>
>>>>>>>>> What to do for root buses is a different question but again,
>>>>>>>>> you definitely do not want to rely on the order of things
>>>>>>>>> on that linked list.
>>>>>>>>> The simplest thing is to ask user to give them unique
>>>>>>>>> numbers, or find some stable way to sort them that
>>>>>>>>> does not rely on order of initialization (e.g. device IDs?).
>>>>>>>>>
>>>>>>>>> But again, this only works ok for root buses.
>>>>>>>>>
>>>>>>>> Basically, it does not exposed the buses to guest, it exposed an index
>>>>>>>> to guest.
>>>>>>>> Here is the process to get all the zpci device for a guest.
>>>>>>>> For example: we have 10 zpci devices, and the batch size for list zpci
>>>>>>>> instruction is 4.
>>>>>>>> First, qemu will return devices 0-3, index of list zpci is 0
>>>>>>>> Second, qemu will return device 4-7, index of list zpci is 4
>>>>>>>> Third, qemu will return device 8-9, index of list zpci is 8
>>>>>>>> We have device id, but list zpci does not use that as a flag to get
>>>>>>>> next batch, it use an index instead.
>>>>>>>> This process is defined by s390 arch, we can't change it.
>>>>>>>> So no matter how we organize zpci devices in qemu, slot or link list.
>>>>>>>> We could not get rid of the index issue.
>>>>>>>>
>>>>>>>> How about I add a flag to identify whether the link list
>>>>>>>> is valid or not. When a hotplug/unplug event occurred, I will
>>>>>>>> reset the index, and make the guest refetch the zpci devices
>>>>>>> >from the beginning.
>>>>>>>>
>>>>>>> You should just use something stable for IDs.
>>>>>>> And avoid doing it for anything that isn't a root or maybe a bridge
>>>>>>> since it'll just cause everyone maintainance problems down the road.
>>>>>>>
>>>>>> The list zpci instruction is defined by arch, not a software thing, I could
>>>>>> not
>>>>>> change it to use a ID instead...
Michael S. Tsirkin July 4, 2015, 6:25 p.m. UTC | #21
On Fri, Jul 03, 2015 at 07:09:59PM +0800, Hong Bo Li wrote:
> >But I would like to note that pci device drivers require driver handshake
> >before device goes away.
> >IIUC s390 hotplug is immediate, which is a problem.
> >Maybe doing the change will help make sure device removal is acked
> >by guest before it happens?
> >
> 
> I did some prototype today. If define zpci first, the progress of unplug
> will get complicated.

The point is that you don't have to remove the zpci device at all.
Remove pci device from zpci.

I think the complication you refer to is the guest ack of
the removal, isn't it?
It's complicated, but it has a chance to actually work with
pci device drivers.

This, as opposed to just removing the device whenever host
tells us to.

> So I prefer defining vfio pci first.
> And it looks like the vfio pci is the basic device, if we want this
> vfio pci to work on s390, we have to define a zpci device to give some
> additional information to it.

if vfio connects to the bus internal to zpci, it can get
things from the bus in a natural way.

If zpci is connected to vfio, it becomes much messier.
Hong Bo Li July 6, 2015, 2:06 a.m. UTC | #22
On 7/5/2015 2:25, Michael S. Tsirkin wrote:
> On Fri, Jul 03, 2015 at 07:09:59PM +0800, Hong Bo Li wrote:
>>> But I would like to note that pci device drivers require driver handshake
>>> before device goes away.
>>> IIUC s390 hotplug is immediate, which is a problem.
>>> Maybe doing the change will help make sure device removal is acked
>>> by guest before it happens?
>>>
>> I did some prototype today. If define zpci first, the progress of unplug
>> will get complicated.
> The point is that you don't have to remove the zpci device at all.
> Remove pci device from zpci.
>
> I think the complication you refer to is the guest ack of
> the removal, isn't it?
> It's complicated, but it has a chance to actually work with
> pci device drivers.
>
> This, as opposed to just removing the device whenever host
> tells us to.

This patch supports the ack in this way:
After unplugging, the guest will do some cleanup work and disable the zpci device.
The "is_unplugged" flag in this patch is used to do this ack. Only after the device
be disabled, we can remove the zpci device from list and do unparent.

The complication I mean is:
1. If we define zpci first, the user can unplug a s390 pci device in two ways:
     a) unplug the vfio pci device first, unplug the zpci device second.
         If the user only tell us to unplug the vfio pci, after the ack, we will
	still need to wait for the unplug zpci cmd from user,  before that,
	we have to maintain a useless zpci in list.

     b) Unplug the zpci device directly. This will cause the unplugging of vfio pci
	automatically. Then on s390, we have a different unplug cmd comparing to
	other platform.

2. If we define vfio pci first,  the user can unplug a s390 pci device in two ways:
     a) Unplug the zpci first, unplug the vfio pci device second.
        We don't need to maintain the extra s390 zpci structure, after ack, we can
	remove the zpci from list and do unparent.
     b) Unplug the vfio pci directly. This will cause the unplugging of zpci
        automatically.  Then on s390, we have a same unplug cmd comparing to
        other platform.

The ack of these two methods are the same.


>> So I prefer defining vfio pci first.
>> And it looks like the vfio pci is the basic device, if we want this
>> vfio pci to work on s390, we have to define a zpci device to give some
>> additional information to it.
> if vfio connects to the bus internal to zpci, it can get
> things from the bus in a natural way.
>
> If zpci is connected to vfio, it becomes much messier.
>

For these two ways, the vfio pci both connect to the s390 pci root bus.
And zpci devices connect to the s390-pci-fac-bus, there is no difference.
Michael S. Tsirkin July 6, 2015, 10:56 a.m. UTC | #23
On Mon, Jul 06, 2015 at 10:06:50AM +0800, Hong Bo Li wrote:
> 
> 
> On 7/5/2015 2:25, Michael S. Tsirkin wrote:
> >On Fri, Jul 03, 2015 at 07:09:59PM +0800, Hong Bo Li wrote:
> >>>But I would like to note that pci device drivers require driver handshake
> >>>before device goes away.
> >>>IIUC s390 hotplug is immediate, which is a problem.
> >>>Maybe doing the change will help make sure device removal is acked
> >>>by guest before it happens?
> >>>
> >>I did some prototype today. If define zpci first, the progress of unplug
> >>will get complicated.
> >The point is that you don't have to remove the zpci device at all.
> >Remove pci device from zpci.
> >
> >I think the complication you refer to is the guest ack of
> >the removal, isn't it?
> >It's complicated, but it has a chance to actually work with
> >pci device drivers.
> >
> >This, as opposed to just removing the device whenever host
> >tells us to.
> 
> This patch supports the ack in this way:
> After unplugging, the guest will do some cleanup work and disable the zpci device.
> The "is_unplugged" flag in this patch is used to do this ack. Only after the device
> be disabled, we can remove the zpci device from list and do unparent.
> 
> The complication I mean is:
> 1. If we define zpci first, the user can unplug a s390 pci device in two ways:
>     a) unplug the vfio pci device first, unplug the zpci device second.
>         If the user only tell us to unplug the vfio pci, after the ack, we will
> 	still need to wait for the unplug zpci cmd from user,  before that,
> 	we have to maintain a useless zpci in list.
> 
>     b) Unplug the zpci device directly. This will cause the unplugging of vfio pci
> 	automatically. Then on s390, we have a different unplug cmd comparing to
> 	other platform.
> 
> 2. If we define vfio pci first,  the user can unplug a s390 pci device in two ways:
>     a) Unplug the zpci first, unplug the vfio pci device second.
>        We don't need to maintain the extra s390 zpci structure, after ack, we can
> 	remove the zpci from list and do unparent.
>     b) Unplug the vfio pci directly. This will cause the unplugging of zpci
>        automatically.  Then on s390, we have a same unplug cmd comparing to
>        other platform.

You can do the automatic unplug of zpci in 1 as well, can you not?


> The ack of these two methods are the same.
> 
> 
> >>So I prefer defining vfio pci first.
> >>And it looks like the vfio pci is the basic device, if we want this
> >>vfio pci to work on s390, we have to define a zpci device to give some
> >>additional information to it.
> >if vfio connects to the bus internal to zpci, it can get
> >things from the bus in a natural way.
> >
> >If zpci is connected to vfio, it becomes much messier.
> >
> 
> For these two ways, the vfio pci both connect to the s390 pci root bus.
> And zpci devices connect to the s390-pci-fac-bus, there is no difference.

What if you don't specify zpci? Does vfio still work?
If no then we have to conclude that vfio is connected through
zpci not alongside it.
Hong Bo Li July 6, 2015, 12:09 p.m. UTC | #24
On 7/6/2015 18:56, Michael S. Tsirkin wrote:
> On Mon, Jul 06, 2015 at 10:06:50AM +0800, Hong Bo Li wrote:
>>
>> On 7/5/2015 2:25, Michael S. Tsirkin wrote:
>>> On Fri, Jul 03, 2015 at 07:09:59PM +0800, Hong Bo Li wrote:
>>>>> But I would like to note that pci device drivers require driver handshake
>>>>> before device goes away.
>>>>> IIUC s390 hotplug is immediate, which is a problem.
>>>>> Maybe doing the change will help make sure device removal is acked
>>>>> by guest before it happens?
>>>>>
>>>> I did some prototype today. If define zpci first, the progress of unplug
>>>> will get complicated.
>>> The point is that you don't have to remove the zpci device at all.
>>> Remove pci device from zpci.
>>>
>>> I think the complication you refer to is the guest ack of
>>> the removal, isn't it?
>>> It's complicated, but it has a chance to actually work with
>>> pci device drivers.
>>>
>>> This, as opposed to just removing the device whenever host
>>> tells us to.
>> This patch supports the ack in this way:
>> After unplugging, the guest will do some cleanup work and disable the zpci device.
>> The "is_unplugged" flag in this patch is used to do this ack. Only after the device
>> be disabled, we can remove the zpci device from list and do unparent.
>>
>> The complication I mean is:
>> 1. If we define zpci first, the user can unplug a s390 pci device in two ways:
>>      a) unplug the vfio pci device first, unplug the zpci device second.
>>          If the user only tell us to unplug the vfio pci, after the ack, we will
>> 	still need to wait for the unplug zpci cmd from user,  before that,
>> 	we have to maintain a useless zpci in list.
>>
>>      b) Unplug the zpci device directly. This will cause the unplugging of vfio pci
>> 	automatically. Then on s390, we have a different unplug cmd comparing to
>> 	other platform.
>>
>> 2. If we define vfio pci first,  the user can unplug a s390 pci device in two ways:
>>      a) Unplug the zpci first, unplug the vfio pci device second.
>>         We don't need to maintain the extra s390 zpci structure, after ack, we can
>> 	remove the zpci from list and do unparent.
>>      b) Unplug the vfio pci directly. This will cause the unplugging of zpci
>>         automatically.  Then on s390, we have a same unplug cmd comparing to
>>         other platform.
> You can do the automatic unplug of zpci in 1 as well, can you not?

ok, got your point, will make a patch to verify that. Thank you!

>
>> The ack of these two methods are the same.
>>
>>
>>>> So I prefer defining vfio pci first.
>>>> And it looks like the vfio pci is the basic device, if we want this
>>>> vfio pci to work on s390, we have to define a zpci device to give some
>>>> additional information to it.
>>> if vfio connects to the bus internal to zpci, it can get
>>> things from the bus in a natural way.
>>>
>>> If zpci is connected to vfio, it becomes much messier.
>>>
>> For these two ways, the vfio pci both connect to the s390 pci root bus.
>> And zpci devices connect to the s390-pci-fac-bus, there is no difference.
> What if you don't specify zpci? Does vfio still work?
> If no then we have to conclude that vfio is connected through
> zpci not alongside it.
>
diff mbox

Patch

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 560b66a..d5e7b2e 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -32,8 +32,8 @@  int chsc_sei_nt2_get_event(void *res)
     PciCcdfErr *eccdf;
     int rc = 1;
     SeiContainer *sei_cont;
-    S390pciState *s = S390_PCI_HOST_BRIDGE(
-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+    S390PCIFacility *s = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
     if (!s) {
         return rc;
@@ -72,8 +72,8 @@  int chsc_sei_nt2_get_event(void *res)
 
 int chsc_sei_nt2_have_event(void)
 {
-    S390pciState *s = S390_PCI_HOST_BRIDGE(
-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+    S390PCIFacility *s = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
     if (!s) {
         return 0;
@@ -82,20 +82,32 @@  int chsc_sei_nt2_have_event(void)
     return !QTAILQ_EMPTY(&s->pending_sei);
 }
 
+void s390_pci_device_enable(S390PCIBusDevice *zpci)
+{
+    zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
+}
+
+void s390_pci_device_disable(S390PCIBusDevice *zpci)
+{
+    zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
+    if (zpci->is_unplugged)
+        object_unparent(OBJECT(zpci));
+}
+
 S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
 {
     S390PCIBusDevice *pbdev;
-    int i;
-    S390pciState *s = S390_PCI_HOST_BRIDGE(
-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+    BusChild *kid;
+    S390PCIFacility *s = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
     if (!s) {
         return NULL;
     }
 
-    for (i = 0; i < PCI_SLOT_MAX; i++) {
-        pbdev = &s->pbdev[i];
-        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
+        pbdev = (S390PCIBusDevice *)kid->child;
+        if (pbdev->fid == fid) {
             return pbdev;
         }
     }
@@ -126,39 +138,24 @@  void s390_pci_sclp_configure(int configure, SCCB *sccb)
     return;
 }
 
-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
-{
-    return PCI_SLOT(pdev->devfn);
-}
-
-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
-{
-    return PCI_SLOT(pdev->devfn) | FH_VIRT;
-}
-
 S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
 {
     S390PCIBusDevice *pbdev;
-    int i;
-    int j = 0;
-    S390pciState *s = S390_PCI_HOST_BRIDGE(
-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+    BusChild *kid;
+    int i = 0;
+    S390PCIFacility *s = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
     if (!s) {
         return NULL;
     }
 
-    for (i = 0; i < PCI_SLOT_MAX; i++) {
-        pbdev = &s->pbdev[i];
-
-        if (pbdev->fh == 0) {
-            continue;
-        }
-
-        if (j == idx) {
+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
+        pbdev = (S390PCIBusDevice *)kid->child;
+        if (i == idx) {
             return pbdev;
         }
-        j++;
+        i++;
     }
 
     return NULL;
@@ -167,16 +164,16 @@  S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
 S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
 {
     S390PCIBusDevice *pbdev;
-    int i;
-    S390pciState *s = S390_PCI_HOST_BRIDGE(
-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+    BusChild *kid;
+    S390PCIFacility *s = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
     if (!s || !fh) {
         return NULL;
     }
 
-    for (i = 0; i < PCI_SLOT_MAX; i++) {
-        pbdev = &s->pbdev[i];
+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
+        pbdev = (S390PCIBusDevice *)kid->child;
         if (pbdev->fh == fh) {
             return pbdev;
         }
@@ -185,12 +182,33 @@  S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
     return NULL;
 }
 
+static S390PCIBusDevice *s390_pci_find_dev_by_pdev(PCIDevice *pdev)
+{
+    S390PCIBusDevice *pbdev;
+    BusChild *kid;
+    S390PCIFacility *s = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
+
+    if (!s || !pdev) {
+        return NULL;
+    }
+
+    QTAILQ_FOREACH(kid, &s->fbus->qbus.children, sibling) {
+        pbdev = (S390PCIBusDevice *)kid->child;
+        if (pbdev->pdev == pdev) {
+            return pbdev;
+        }
+    }
+
+    return NULL;
+}
+
 static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
                                     uint32_t fid, uint64_t faddr, uint32_t e)
 {
     SeiContainer *sei_cont;
-    S390pciState *s = S390_PCI_HOST_BRIDGE(
-        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+    S390PCIFacility *s = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
     if (!s) {
         return;
@@ -308,7 +326,10 @@  static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
 {
     uint64_t pte;
     uint32_t flags;
-    S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr);
+    S390PCIDeviceConn *conn = container_of(iommu, S390PCIDeviceConn,
+                                           iommu_mr);
+    S390PCIBusDevice *pbdev = conn->zpci;
+
     S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)
                                            ->qbus.parent);
     IOMMUTLBEntry ret = {
@@ -319,8 +340,14 @@  static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
         .perm = IOMMU_NONE,
     };
 
+    if (!pbdev) {
+        return ret;
+    }
+
     DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
 
+    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent);
+
     /* s390 does not have an APIC mapped to main storage so we use
      * a separate AddressSpace only for msix notifications
      */
@@ -382,7 +409,7 @@  static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 {
     S390pciState *s = opaque;
 
-    return &s->pbdev[PCI_SLOT(devfn)].as;
+    return &s->conn[PCI_SLOT(devfn)].iommu_as;
 }
 
 static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
@@ -455,9 +482,10 @@  static void s390_pcihost_init_as(S390pciState *s)
     int i;
 
     for (i = 0; i < PCI_SLOT_MAX; i++) {
-        memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s),
+        memory_region_init_iommu(&s->conn[i].iommu_mr, OBJECT(s),
                                  &s390_iommu_ops, "iommu-s390", UINT64_MAX);
-        address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci");
+        address_space_init(&s->conn[i].iommu_as, &s->conn[i].iommu_mr,
+                           "iommu-pci");
     }
 
     memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
@@ -484,7 +512,7 @@  static int s390_pcihost_init(SysBusDevice *dev)
     bus = BUS(b);
     qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
     phb->bus = b;
-    QTAILQ_INIT(&s->pending_sei);
+
     return 0;
 }
 
@@ -519,26 +547,6 @@  static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
 static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
                                   DeviceState *dev, Error **errp)
 {
-    PCIDevice *pci_dev = PCI_DEVICE(dev);
-    S390PCIBusDevice *pbdev;
-    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
-                                           ->qbus.parent);
-
-    pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
-
-    pbdev->fid = s390_pci_get_pfid(pci_dev);
-    pbdev->pdev = pci_dev;
-    pbdev->configured = true;
-    pbdev->fh = s390_pci_get_pfh(pci_dev);
-
-    s390_pcihost_setup_msix(pbdev);
-
-    if (dev->hotplugged) {
-        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
-                                     pbdev->fh, pbdev->fid);
-        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
-                                     pbdev->fh, pbdev->fid);
-    }
     return;
 }
 
@@ -546,31 +554,30 @@  static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
                                     DeviceState *dev, Error **errp)
 {
     PCIDevice *pci_dev = PCI_DEVICE(dev);
-    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
-                                           ->qbus.parent);
-    S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
-
-    if (pbdev->configured) {
-        pbdev->configured = false;
-        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
-                                     pbdev->fh, pbdev->fid);
+    S390PCIBusDevice *pbdev;
+    HotplugHandler *hotplug_ctrl;
+    S390PCIFacility *f = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
+    S390PCIFacilityClass *k = S390_PCI_FACILITY_GET_CLASS(f);
+    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_CLASS(k);
+
+    /* unplug corresponding zpci device */
+    pbdev = s390_pci_find_dev_by_pdev(pci_dev);
+    if (pbdev) {
+        hotplug_ctrl = pbdev->qdev.parent_bus->hotplug_handler;
+        if (hdc->unplug_request) {
+            hdc->unplug_request(hotplug_ctrl, &pbdev->qdev, errp);
+        }
     }
 
-    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
-                                 pbdev->fh, pbdev->fid);
-    pbdev->fh = 0;
-    pbdev->fid = 0;
-    pbdev->pdev = NULL;
     object_unparent(OBJECT(pci_dev));
 }
 
 static void s390_pcihost_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
-    DeviceClass *dc = DEVICE_CLASS(klass);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
-    dc->cannot_instantiate_with_device_add_yet = true;
     k->init = s390_pcihost_init;
     hc->plug = s390_pcihost_hot_plug;
     hc->unplug = s390_pcihost_hot_unplug;
@@ -588,9 +595,156 @@  static const TypeInfo s390_pcihost_info = {
     }
 };
 
+static void s390_pci_device_hot_plug(HotplugHandler *hotplug_dev,
+                                     DeviceState *dev, Error **errp)
+{
+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
+
+    zpci->configured = true;
+
+    if (dev->hotplugged) {
+        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
+                                     zpci->fh, zpci->fid);
+        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
+                                     zpci->fh, zpci->fid);
+    }
+}
+
+static void s390_pci_device_hot_unplug_request(HotplugHandler *hotplug_dev,
+                                       DeviceState *dev, Error **errp)
+{
+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
+
+    if (zpci->configured) {
+        zpci->configured = false;
+        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
+                                     zpci->fh, zpci->fid);
+    }
+
+    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
+                                 zpci->fh, zpci->fid);
+
+    zpci->is_unplugged = true;
+}
+
+static const TypeInfo s390_pci_fac_bus_info = {
+    .name = TYPE_S390_PCI_FAC_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = sizeof(S390PCIFacBus),
+};
+
+static int s390_pci_facility_init(S390PCIFacility *f)
+{
+    DeviceState *dev = DEVICE(f);
+
+    QTAILQ_INIT(&f->pending_sei);
+    msi_supported = true;
+    f->fbus = S390_PCI_FAC_BUS(qbus_create(TYPE_S390_PCI_FAC_BUS, dev, NULL));
+    qbus_set_hotplug_handler(BUS(&f->fbus->qbus), DEVICE(dev), NULL);
+
+    return 0;
+}
+
+static void s390_pci_facility_class_init(ObjectClass *klass, void *data)
+{
+    S390PCIFacilityClass *k = S390_PCI_FACILITY_CLASS(klass);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(k);
+
+    k->init = s390_pci_facility_init;
+    hc->plug = s390_pci_device_hot_plug;
+    hc->unplug_request = s390_pci_device_hot_unplug_request;
+}
+
+static const TypeInfo s390_pci_facility_info = {
+    .name          = TYPE_S390_PCI_FACILITY,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(S390PCIFacility),
+    .class_init    = s390_pci_facility_class_init,
+    .class_size    = sizeof(S390PCIFacilityClass),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+static void s390_pci_device_realize(DeviceState *dev, Error **errp)
+{
+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
+    S390PCIBusDevice *tmp;
+    S390pciState *s;
+    BusChild *kid;
+    PCIDevice *pdev;
+    int ret;
+    S390PCIFacility *f = S390_PCI_FACILITY(
+        object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
+
+    ret = pci_qdev_find_device(zpci->pci_id, &pdev);
+    if (ret < 0) {
+        error_setg(errp, "vfio pci device %s not found", zpci->pci_id);
+        return;
+    }
+
+    QTAILQ_FOREACH(kid, &f->fbus->qbus.children, sibling) {
+        tmp = (S390PCIBusDevice *)kid->child;
+        if (tmp == zpci) {
+            continue;
+        }
+
+        if (tmp->fid == zpci->fid || tmp->uid == zpci->uid ||
+            !strcmp(tmp->pci_id, zpci->pci_id)) {
+            error_setg(errp, "zpci needs unique fid, uid and pci_id");
+            return;
+        }
+    }
+
+    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pdev)->qbus.parent);
+    s->conn[PCI_SLOT(pdev->devfn)].zpci = zpci;
+
+    zpci->pdev = pdev;
+    zpci->fh = zpci->fid | FH_VIRT;
+    s390_pcihost_setup_msix(zpci);
+}
+
+static void s390_pci_device_unrealize(DeviceState *dev, Error **errp)
+{
+    S390PCIBusDevice *zpci = S390_PCI_DEVICE(dev);
+
+    zpci->fh = 0;
+    zpci->fid = 0;
+    zpci->pdev = NULL;
+}
+
+static Property s390_pci_device_properties[] = {
+    DEFINE_PROP_UINT32("fid", S390PCIBusDevice, fid, 0),
+    DEFINE_PROP_UINT32("uid", S390PCIBusDevice, uid, 0),
+    DEFINE_PROP_STRING("pci_id", S390PCIBusDevice, pci_id),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void s390_pci_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "s390 pci device";
+    dc->bus_type = TYPE_S390_PCI_FAC_BUS;
+    dc->realize = s390_pci_device_realize;
+    dc->unrealize = s390_pci_device_unrealize;
+    dc->props = s390_pci_device_properties;
+}
+
+static const TypeInfo s390_pci_device_type_info = {
+    .name = TYPE_S390_PCI_DEVICE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(S390PCIBusDevice),
+    .class_init = s390_pci_device_class_init,
+};
+
 static void s390_pci_register_types(void)
 {
     type_register_static(&s390_pcihost_info);
+    type_register_static(&s390_pci_facility_info);
+    type_register_static(&s390_pci_fac_bus_info);
+    type_register_static(&s390_pci_device_type_info);
 }
 
 type_init(s390_pci_register_types)
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 464a92e..5bf3913 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -149,6 +149,21 @@  enum ZpciIoatDtype {
 #define ZPCI_TABLE_VALID_MASK           0x20
 #define ZPCI_TABLE_PROT_MASK            0x200
 
+#define TYPE_S390_PCI_FACILITY "s390-pci-facility"
+#define TYPE_S390_PCI_FAC_BUS "s390-pci-fac-bus"
+#define TYPE_S390_PCI_DEVICE "zpci"
+
+#define S390_PCI_FACILITY(obj) \
+    OBJECT_CHECK(S390PCIFacility, (obj), TYPE_S390_PCI_FACILITY)
+#define S390_PCI_FAC_BUS(obj) \
+    OBJECT_CHECK(S390PCIFacBus, (obj), TYPE_S390_PCI_FAC_BUS)
+#define S390_PCI_FACILITY_CLASS(klass) \
+    OBJECT_CLASS_CHECK(S390PCIFacilityClass, (klass), TYPE_S390_PCI_FACILITY)
+#define S390_PCI_DEVICE(obj) \
+    OBJECT_CHECK(S390PCIBusDevice, (obj), TYPE_S390_PCI_DEVICE)
+#define S390_PCI_FACILITY_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(S390PCIFacilityClass, (obj), TYPE_S390_PCI_FACILITY)
+
 typedef struct SeiContainer {
     QTAILQ_ENTRY(SeiContainer) link;
     uint32_t fid;
@@ -214,12 +229,16 @@  typedef struct S390MsixInfo {
 } S390MsixInfo;
 
 typedef struct S390PCIBusDevice {
+    DeviceState qdev;
     PCIDevice *pdev;
     bool configured;
+    bool is_unplugged;
     bool error_state;
     bool lgstg_blocked;
     uint32_t fh;
     uint32_t fid;
+    uint32_t uid;
+    char *pci_id;
     uint64_t g_iota;
     uint64_t pba;
     uint64_t pal;
@@ -229,21 +248,42 @@  typedef struct S390PCIBusDevice {
     uint8_t sum;
     S390MsixInfo msix;
     AdapterRoutes routes;
-    AddressSpace as;
-    MemoryRegion mr;
+    QLIST_ENTRY(S390PCIDevice) entry;
 } S390PCIBusDevice;
 
+typedef struct S390PCIDeviceConn {
+    S390PCIBusDevice *zpci;
+    AddressSpace iommu_as;
+    MemoryRegion iommu_mr;
+} S390PCIDeviceConn;
+
 typedef struct S390pciState {
     PCIHostState parent_obj;
-    S390PCIBusDevice pbdev[PCI_SLOT_MAX];
+    S390PCIDeviceConn conn[PCI_SLOT_MAX];
     AddressSpace msix_notify_as;
     MemoryRegion msix_notify_mr;
-    QTAILQ_HEAD(, SeiContainer) pending_sei;
 } S390pciState;
 
+typedef struct S390PCIFacBus {
+    BusState qbus;
+} S390PCIFacBus;
+
+typedef struct S390PCIFacility {
+    SysBusDevice parent_obj;
+    S390PCIFacBus *fbus;
+    QTAILQ_HEAD(, SeiContainer) pending_sei;
+} S390PCIFacility;
+
+typedef struct S390PCIFacilityClass {
+    DeviceClass parent_class;
+    int (*init)(S390PCIFacility *f);
+} S390PCIFacilityClass;
+
 int chsc_sei_nt2_get_event(void *res);
 int chsc_sei_nt2_have_event(void);
 void s390_pci_sclp_configure(int configure, SCCB *sccb);
+void s390_pci_device_enable(S390PCIBusDevice *zpci);
+void s390_pci_device_disable(S390PCIBusDevice *zpci);
 S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
 S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
 S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index f9151a9..2977e9c 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -208,12 +208,12 @@  int clp_service_call(S390CPU *cpu, uint8_t r2)
 
         switch (reqsetpci->oc) {
         case CLP_SET_ENABLE_PCI_FN:
-            pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET;
+            s390_pci_device_enable(pbdev);
             stl_p(&ressetpci->fh, pbdev->fh);
             stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
             break;
         case CLP_SET_DISABLE_PCI_FN:
-            pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET);
+            s390_pci_device_disable(pbdev);
             pbdev->error_state = false;
             pbdev->lgstg_blocked = false;
             stl_p(&ressetpci->fh, pbdev->fh);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index a3b14b5..56940e8 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -125,8 +125,8 @@  static void ccw_init(MachineState *machine)
                       machine->initrd_filename, "s390-ccw.img", true);
     s390_flic_init();
 
-    dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
-    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
+    dev = qdev_create(NULL, TYPE_S390_PCI_FACILITY);
+    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_FACILITY,
                               OBJECT(dev), NULL);
     qdev_init_nofail(dev);
 
@@ -173,6 +173,7 @@  static void ccw_machine_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = 255;
     mc->hot_add_cpu = ccw_hot_add_cpu;
     mc->is_default = 1;
+    mc->has_dynamic_sysbus = true;
     nc->nmi_monitor_handler = s390_nmi;
 }