Patchwork [v3,08/13] pcie root port: implement pcie root port.

login
register
mail settings
Submitter Isaku Yamahata
Date Sept. 15, 2010, 5:38 a.m.
Message ID <bc0b875bfb85b10d95a268cf6630963c23a779df.1284528424.git.yamahata@valinux.co.jp>
Download mbox | patch
Permalink /patch/64786/
State New
Headers show

Comments

Isaku Yamahata - Sept. 15, 2010, 5:38 a.m.
pcie root port.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
Changes v2 -> v3:
- compilation adjustment.
---
 Makefile.objs  |    2 +-
 hw/pcie_root.c |  240 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pcie_root.h |   32 ++++++++
 3 files changed, 273 insertions(+), 1 deletions(-)
 create mode 100644 hw/pcie_root.c
 create mode 100644 hw/pcie_root.h
Michael S. Tsirkin - Sept. 22, 2010, 11:25 a.m.
On Wed, Sep 15, 2010 at 02:38:21PM +0900, Isaku Yamahata wrote:
> pcie root port.
> 
> Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
> ---
> Changes v2 -> v3:
> - compilation adjustment.

so this is a specific intel root port, lets
name file and rotines appropriately.

> ---
>  Makefile.objs  |    2 +-
>  hw/pcie_root.c |  240 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/pcie_root.h |   32 ++++++++
>  3 files changed, 273 insertions(+), 1 deletions(-)
>  create mode 100644 hw/pcie_root.c
>  create mode 100644 hw/pcie_root.h
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index 6c3b84a..7e81b57 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -186,7 +186,7 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o
>  # PCI watchdog devices
>  hw-obj-y += wdt_i6300esb.o
>  
> -hw-obj-y += pcie.o pcie_aer.o pcie_port.o
> +hw-obj-y += pcie.o pcie_aer.o pcie_port.o pcie_root.o
>  hw-obj-y += msix.o msi.o
>  
>  # PCI network cards
> diff --git a/hw/pcie_root.c b/hw/pcie_root.c
> new file mode 100644
> index 0000000..9255bed
> --- /dev/null
> +++ b/hw/pcie_root.c
> @@ -0,0 +1,240 @@
> +/*
> + * pcie_root.c
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "pci_ids.h"
> +#include "msi.h"
> +#include "pcie.h"
> +#include "pcie_root.h"
> +
> +/* For now, Intel X58 IOH corporate deice exporess* root port.
> +   need to get its own id? */
> +#define PCI_DEVICE_ID_IOH_EPORT         0x3420  /* D0:F0 express mode */
> +#define PCI_DEVICE_ID_IOH_REV           0x2
> +#define IOH_EP_SSVID_OFFSET             0x40
> +#define IOH_EP_SSVID_SVID               PCI_VENDOR_ID_INTEL
> +#define IOH_EP_SSVID_SSID               0
> +#define IOH_EP_MSI_OFFSET               0x60
> +#define IOH_EP_MSI_SUPPORTED_FLAGS      PCI_MSI_FLAGS_MASKBIT
> +#define IOH_EP_MSI_NR_VECTOR            2
> +#define IOH_EP_EXP_OFFSET               0x90
> +#define IOH_EP_AER_OFFSET               0x100
> +
> +#define PCIE_ROOT_VID                   PCI_VENDOR_ID_INTEL
> +#define PCIE_ROOT_DID                   PCI_DEVICE_ID_IOH_EPORT
> +#define PCIE_ROOT_REV                   PCI_DEVICE_ID_IOH_REV
> +#define PCIE_ROOT_SSVID_OFFSET          IOH_EP_SSVID_OFFSET
> +#define PCIE_ROOT_SVID                  IOH_EP_SSVID_SVID
> +#define PCIE_ROOT_SSID                  IOH_EP_SSVID_SSID
> +#define PCIE_ROOT_MSI_SUPPORTED_FLAGS   IOH_EP_MSI_SUPPORTED_FLAGS
> +#define PCIE_ROOT_MSI_NR_VECTOR         IOH_EP_MSI_NR_VECTOR
> +#define PCIE_ROOT_MSI_OFFSET            IOH_EP_MSI_OFFSET
> +#define PCIE_ROOT_EXP_OFFSET            IOH_EP_EXP_OFFSET
> +#define PCIE_ROOT_AER_OFFSET            IOH_EP_AER_OFFSET
> +
> +/*
> + * If two MSI vector are allocated, Advanced Error Interrupt Message Number
> + * is 1. otherwise 0.
> + * 17.12.5.10 RPERRSTS,  32:27 bit Advanced Error Interrupt Message Number.
> + */
> +static uint8_t pcie_root_aer_vector(const PCIDevice *d)
> +{
> +    switch (msi_nr_vectors_allocated(d)) {
> +    case 1:
> +        return 0;
> +    case 2:
> +        return 1;
> +    case 4:
> +    case 8:
> +    case 16:
> +    case 32:
> +    default:
> +        break;
> +    }
> +    abort();
> +    return 0;
> +}
> +
> +static void pcie_root_aer_vector_update(PCIDevice *d)
> +{
> +    pcie_aer_root_set_vector(d, pcie_root_aer_vector(d));
> +}
> +
> +static void pcie_root_write_config(PCIDevice *d,
> +                                   uint32_t address, uint32_t val, int len)
> +{
> +    uint16_t sltctl =
> +        pci_get_word(d->config + pci_pcie_cap(d) + PCI_EXP_SLTCTL);
> +    uint32_t uncorsta =
> +        pci_get_long(d->config + pcie_aer_cap(d) + PCI_ERR_UNCOR_STATUS);
> +    uint32_t root_cmd =
> +        pci_get_long(d->config + pcie_aer_cap(d) + PCI_ERR_ROOT_COMMAND);
> +
> +    pci_bridge_write_config(d, address, val, len);
> +    msi_write_config(d, address, val, len);
> +    pcie_root_aer_vector_update(d);
> +    pcie_cap_slot_write_config(d, address, val, len, sltctl);
> +    pcie_aer_write_config(d, address, val, len, uncorsta);
> +    pcie_aer_root_write_config(d, address, val, len, root_cmd);
> +}
> +
> +static void pcie_root_reset(DeviceState *qdev)
> +{
> +    PCIDevice *d = DO_UPCAST(PCIDevice, qdev, qdev);
> +    msi_reset(d);
> +    pcie_root_aer_vector_update(d);
> +    pcie_cap_root_reset(d);
> +    pcie_cap_deverr_reset(d);
> +    pcie_cap_slot_reset(d);
> +    pcie_aer_root_reset(d);
> +    pci_bridge_reset(qdev);
> +}
> +
> +static int pcie_root_initfn(PCIDevice *d)
> +{
> +    PCIBridge* br = DO_UPCAST(PCIBridge, dev, d);
> +    PCIEPort *p = DO_UPCAST(PCIEPort, br, br);
> +    PCIESlot *s = DO_UPCAST(PCIESlot, port, p);
> +    int rc;
> +
> +    rc = pci_bridge_initfn(d);
> +    if (rc < 0) {
> +        return rc;
> +    }
> +
> +    d->config[PCI_REVISION_ID] = PCIE_ROOT_REV;
> +    pcie_port_init_reg(d);
> +
> +    pci_config_set_vendor_id(d->config, PCIE_ROOT_VID);
> +    pci_config_set_device_id(d->config, PCIE_ROOT_DID);
> +
> +    rc = pci_bridge_ssvid_init(d, PCIE_ROOT_SSVID_OFFSET,
> +                               PCIE_ROOT_SVID, PCIE_ROOT_SSID);
> +    if (rc < 0) {
> +        return rc;
> +    }
> +    rc = msi_init(d, PCIE_ROOT_MSI_OFFSET, PCIE_ROOT_MSI_NR_VECTOR,
> +                  PCIE_ROOT_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
> +                  PCIE_ROOT_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
> +    if (rc < 0) {
> +        return rc;
> +    }
> +    rc = pcie_cap_init(d, PCIE_ROOT_EXP_OFFSET, PCI_EXP_TYPE_ROOT_PORT,
> +                       p->port);
> +    if (rc < 0) {
> +        return rc;
> +    }
> +    pcie_cap_deverr_init(d);
> +    pcie_cap_slot_init(d, s->slot);
> +    pcie_chassis_create(s->chassis);
> +    rc = pcie_chassis_add_slot(s);
> +    if (rc < 0) {
> +        return rc;
> +    }
> +    pcie_cap_root_init(d);
> +    pcie_aer_init(d, PCIE_ROOT_AER_OFFSET);
> +    pcie_aer_root_init(d);
> +    pcie_root_aer_vector_update(d);
> +    return 0;
> +}
> +
> +static int pcie_root_exitfn(PCIDevice *d)
> +{
> +    pcie_aer_exit(d);
> +    msi_uninit(d);
> +    pcie_cap_exit(d);
> +    return pci_bridge_exitfn(d);
> +}
> +
> +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> +                         const char *bus_name, pci_map_irq_fn map_irq,
> +                         uint8_t port, uint8_t chassis, uint16_t slot)
> +{
> +    PCIDevice *d;
> +    PCIBridge *br;
> +    DeviceState *qdev;
> +
> +    d = pci_create_multifunction(bus, devfn, multifunction, PCIE_ROOT_PORT);
> +    if (!d) {
> +        return NULL;
> +    }
> +    br = DO_UPCAST(PCIBridge, dev, d);
> +
> +    qdev = &br->dev.qdev;
> +    pci_bridge_map_irq(br, bus_name, map_irq);
> +    qdev_prop_set_uint8(qdev, "port", port);
> +    qdev_prop_set_uint8(qdev, "chassis", chassis);
> +    qdev_prop_set_uint16(qdev, "slot", slot);
> +    qdev_init_nofail(qdev);
> +
> +    return DO_UPCAST(PCIESlot, port, DO_UPCAST(PCIEPort, br, br));
> +}
> +
> +static const VMStateDescription vmstate_pcie_root = {
> +    .name = "pcie-root-port",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .minimum_version_id_old = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_PCIE_DEVICE(port.br.dev, PCIESlot),
> +        VMSTATE_STRUCT(port.br.dev.exp.aer_log, PCIESlot, 0,
> +                       vmstate_pcie_aer_log, PCIE_AERLog),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static PCIDeviceInfo pcie_root_info = {
> +    .qdev.name = PCIE_ROOT_PORT,
> +    .qdev.desc = "Root Port of PCI Express Switch",
> +    .qdev.size = sizeof(PCIESlot),
> +    .qdev.reset = pcie_root_reset,
> +    .qdev.vmsd = &vmstate_pcie_root,
> +
> +    .is_express = 1,
> +    .is_bridge = 1,
> +    .config_write = pcie_root_write_config,
> +    .init = pcie_root_initfn,
> +    .exit = pcie_root_exitfn,
> +
> +    .qdev.props = (Property[]) {
> +        DEFINE_PROP_UINT8("port", PCIESlot, port.port, 0),
> +        DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0),
> +        DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0),
> +        DEFINE_PROP_UINT16("aer_log_max", PCIESlot,
> +                           port.br.dev.exp.aer_log.log_max,
> +                           PCIE_AER_LOG_MAX_DEFAULT),
> +        DEFINE_PROP_END_OF_LIST(),
> +    }
> +};
> +
> +static void pcie_root_register(void)
> +{
> +    pci_qdev_register(&pcie_root_info);
> +}
> +
> +device_init(pcie_root_register);
> +
> +/*
> + * Local variables:
> + *  c-indent-level: 4
> + *  c-basic-offset: 4
> + *  tab-width: 8
> + *  indent-tab-mode: nil
> + * End:
> + */
> diff --git a/hw/pcie_root.h b/hw/pcie_root.h
> new file mode 100644
> index 0000000..9c5d4d0
> --- /dev/null
> +++ b/hw/pcie_root.h
> @@ -0,0 +1,32 @@
> +/*
> + * pcie_root.h
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef QEMU_PCIE_ROOT_H
> +#define QEMU_PCIE_ROOT_H
> +
> +#include "pcie_port.h"
> +
> +#define PCIE_ROOT_PORT    "pcie-root-port"
> +
> +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> +                         const char *bus_name, pci_map_irq_fn map_irq,
> +                         uint8_t port, uint8_t chassis, uint16_t slot);
> +

I am a bit unhappy about all these _init functions.
Can devices be created with qdev? If they were
it would be possible to configure the system completely
from qemu command line.

> +#endif /* QEMU_PCIE_ROOT_H */
> -- 
> 1.7.1.1
Isaku Yamahata - Sept. 24, 2010, 5:38 a.m.
On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:

> > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > +
> 
> I am a bit unhappy about all these _init functions.
> Can devices be created with qdev? If they were
> it would be possible to configure the system completely
> from qemu command line.

That's very reasonable question.
Once machine configuration file is supported, those initialization
functions will go away.
I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
those functions will also go away.

Until that, those initialization glues will stay like pci_create family
or other many initialization glues unfortunately.
This is the result of qdev missing a feature, not the cause.
It would be a long-term issue to add machine configuration file support.
Michael S. Tsirkin - Sept. 26, 2010, 12:49 p.m.
On Fri, Sep 24, 2010 at 02:38:09PM +0900, Isaku Yamahata wrote:
> On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:
> 
> > > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > > +
> > 
> > I am a bit unhappy about all these _init functions.
> > Can devices be created with qdev? If they were
> > it would be possible to configure the system completely
> > from qemu command line.
> 
> That's very reasonable question.
> Once machine configuration file is supported, those initialization
> functions will go away.
> I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
> those functions will also go away.
> 
> Until that, those initialization glues will stay like pci_create family
> or other many initialization glues unfortunately.
> This is the result of qdev missing a feature, not the cause.
> It would be a long-term issue to add machine configuration file support.

Yes, but will it be better to do everything from qdev_init?

> -- 
> yamahata
Michael S. Tsirkin - Sept. 26, 2010, 12:50 p.m.
On Fri, Sep 24, 2010 at 02:38:09PM +0900, Isaku Yamahata wrote:
> On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:
> 
> > > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > > +
> > 
> > I am a bit unhappy about all these _init functions.
> > Can devices be created with qdev? If they were
> > it would be possible to configure the system completely
> > from qemu command line.
> 
> That's very reasonable question.
> Once machine configuration file is supported, those initialization
> functions will go away.
> I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
> those functions will also go away.
> 
> Until that, those initialization glues will stay like pci_create family
> or other many initialization glues unfortunately.
> This is the result of qdev missing a feature, not the cause.
> It would be a long-term issue to add machine configuration file support.

Just to clarify, if I wanted to have a flag to make virtio-net
a pci express device, how would I do this?

> -- 
> yamahata
Isaku Yamahata - Sept. 27, 2010, 6:22 a.m.
On Sun, Sep 26, 2010 at 02:50:42PM +0200, Michael S. Tsirkin wrote:
> On Fri, Sep 24, 2010 at 02:38:09PM +0900, Isaku Yamahata wrote:
> > On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:
> > 
> > > > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > > > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > > > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > > > +
> > > 
> > > I am a bit unhappy about all these _init functions.
> > > Can devices be created with qdev? If they were
> > > it would be possible to configure the system completely
> > > from qemu command line.
> > 
> > That's very reasonable question.
> > Once machine configuration file is supported, those initialization
> > functions will go away.
> > I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
> > those functions will also go away.
> > 
> > Until that, those initialization glues will stay like pci_create family
> > or other many initialization glues unfortunately.
> > This is the result of qdev missing a feature, not the cause.
> > It would be a long-term issue to add machine configuration file support.
> 
> Just to clarify, if I wanted to have a flag to make virtio-net
> a pci express device, how would I do this?

the following preparation is needed.
- register PCIDeviceInfo with name like "virtio-net-pci-xen"
  with PCIDeviceInfo::is_express = true.
- in initialization function, initialize express capability.
- in write config function, call related function.

And then,
if (express)
   create "virtio-net-pci-xen"
else
   create "virtio-net-pci"
Isaku Yamahata - Sept. 27, 2010, 6:36 a.m.
On Sun, Sep 26, 2010 at 02:49:40PM +0200, Michael S. Tsirkin wrote:
> On Fri, Sep 24, 2010 at 02:38:09PM +0900, Isaku Yamahata wrote:
> > On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:
> > 
> > > > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > > > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > > > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > > > +
> > > 
> > > I am a bit unhappy about all these _init functions.
> > > Can devices be created with qdev? If they were
> > > it would be possible to configure the system completely
> > > from qemu command line.
> > 
> > That's very reasonable question.
> > Once machine configuration file is supported, those initialization
> > functions will go away.
> > I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
> > those functions will also go away.
> > 
> > Until that, those initialization glues will stay like pci_create family
> > or other many initialization glues unfortunately.
> > This is the result of qdev missing a feature, not the cause.
> > It would be a long-term issue to add machine configuration file support.
> 
> Yes, but will it be better to do everything from qdev_init?

Yes, ideally.
Michael S. Tsirkin - Sept. 27, 2010, 10:40 a.m.
On Mon, Sep 27, 2010 at 03:22:43PM +0900, Isaku Yamahata wrote:
> On Sun, Sep 26, 2010 at 02:50:42PM +0200, Michael S. Tsirkin wrote:
> > On Fri, Sep 24, 2010 at 02:38:09PM +0900, Isaku Yamahata wrote:
> > > On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:
> > > 
> > > > > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > > > > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > > > > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > > > > +
> > > > 
> > > > I am a bit unhappy about all these _init functions.
> > > > Can devices be created with qdev? If they were
> > > > it would be possible to configure the system completely
> > > > from qemu command line.
> > > 
> > > That's very reasonable question.
> > > Once machine configuration file is supported, those initialization
> > > functions will go away.
> > > I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
> > > those functions will also go away.
> > > 
> > > Until that, those initialization glues will stay like pci_create family
> > > or other many initialization glues unfortunately.
> > > This is the result of qdev missing a feature, not the cause.
> > > It would be a long-term issue to add machine configuration file support.
> > 
> > Just to clarify, if I wanted to have a flag to make virtio-net
> > a pci express device, how would I do this?
> 
> the following preparation is needed.
> - register PCIDeviceInfo with name like "virtio-net-pci-xen"
>   with PCIDeviceInfo::is_express = true.
> - in initialization function, initialize express capability.
> - in write config function, call related function.
> 
> And then,
> if (express)
>    create "virtio-net-pci-xen"
> else
>    create "virtio-net-pci"

Sounds pretty bad: we can't double the number of devices
with each capability we add.
Can we make it so setting is_express on command line
will convert the device to PCI express?


> -- 
> yamahata
Isaku Yamahata - Sept. 27, 2010, 11:01 p.m.
On Mon, Sep 27, 2010 at 12:40:12PM +0200, Michael S. Tsirkin wrote:
> On Mon, Sep 27, 2010 at 03:22:43PM +0900, Isaku Yamahata wrote:
> > On Sun, Sep 26, 2010 at 02:50:42PM +0200, Michael S. Tsirkin wrote:
> > > On Fri, Sep 24, 2010 at 02:38:09PM +0900, Isaku Yamahata wrote:
> > > > On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:
> > > > 
> > > > > > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > > > > > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > > > > > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > > > > > +
> > > > > 
> > > > > I am a bit unhappy about all these _init functions.
> > > > > Can devices be created with qdev? If they were
> > > > > it would be possible to configure the system completely
> > > > > from qemu command line.
> > > > 
> > > > That's very reasonable question.
> > > > Once machine configuration file is supported, those initialization
> > > > functions will go away.
> > > > I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
> > > > those functions will also go away.
> > > > 
> > > > Until that, those initialization glues will stay like pci_create family
> > > > or other many initialization glues unfortunately.
> > > > This is the result of qdev missing a feature, not the cause.
> > > > It would be a long-term issue to add machine configuration file support.
> > > 
> > > Just to clarify, if I wanted to have a flag to make virtio-net
> > > a pci express device, how would I do this?
> > 
> > the following preparation is needed.
> > - register PCIDeviceInfo with name like "virtio-net-pci-xen"
> >   with PCIDeviceInfo::is_express = true.
> > - in initialization function, initialize express capability.
> > - in write config function, call related function.
> > 
> > And then,
> > if (express)
> >    create "virtio-net-pci-xen"
> > else
> >    create "virtio-net-pci"
> 
> Sounds pretty bad: we can't double the number of devices
> with each capability we add.
> Can we make it so setting is_express on command line
> will convert the device to PCI express?

I don't see your point. Capability isn't something that should be
genericly customizable.
If any, it would not be a generic option, but one specific to device.
There is no generic way to automatically convert pci device into
express device. It means designing a new express device.

Maybe what you want is
- set is_express = true
  This result in always allocating 4k-sized configuration space.
  (Possibly we need more fine-grained parameter in PCIDeviceInfo
   than is_express.)
- introduce device specific property that a user can turn on/off
- In initialization function/config write function and where necessary,
  check the property.
Michael S. Tsirkin - Sept. 28, 2010, 9:27 a.m.
On Tue, Sep 28, 2010 at 08:01:15AM +0900, Isaku Yamahata wrote:
> On Mon, Sep 27, 2010 at 12:40:12PM +0200, Michael S. Tsirkin wrote:
> > On Mon, Sep 27, 2010 at 03:22:43PM +0900, Isaku Yamahata wrote:
> > > On Sun, Sep 26, 2010 at 02:50:42PM +0200, Michael S. Tsirkin wrote:
> > > > On Fri, Sep 24, 2010 at 02:38:09PM +0900, Isaku Yamahata wrote:
> > > > > On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:
> > > > > 
> > > > > > > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > > > > > > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > > > > > > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > > > > > > +
> > > > > > 
> > > > > > I am a bit unhappy about all these _init functions.
> > > > > > Can devices be created with qdev? If they were
> > > > > > it would be possible to configure the system completely
> > > > > > from qemu command line.
> > > > > 
> > > > > That's very reasonable question.
> > > > > Once machine configuration file is supported, those initialization
> > > > > functions will go away.
> > > > > I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
> > > > > those functions will also go away.
> > > > > 
> > > > > Until that, those initialization glues will stay like pci_create family
> > > > > or other many initialization glues unfortunately.
> > > > > This is the result of qdev missing a feature, not the cause.
> > > > > It would be a long-term issue to add machine configuration file support.
> > > > 
> > > > Just to clarify, if I wanted to have a flag to make virtio-net
> > > > a pci express device, how would I do this?
> > > 
> > > the following preparation is needed.
> > > - register PCIDeviceInfo with name like "virtio-net-pci-xen"
> > >   with PCIDeviceInfo::is_express = true.
> > > - in initialization function, initialize express capability.
> > > - in write config function, call related function.
> > > 
> > > And then,
> > > if (express)
> > >    create "virtio-net-pci-xen"
> > > else
> > >    create "virtio-net-pci"
> > 
> > Sounds pretty bad: we can't double the number of devices
> > with each capability we add.
> > Can we make it so setting is_express on command line
> > will convert the device to PCI express?
> 
> I don't see your point. Capability isn't something that should be
> genericly customizable.
> If any, it would not be a generic option, but one specific to device.
> There is no generic way to automatically convert pci device into
> express device. It means designing a new express device.
> Maybe what you want is
> - set is_express = true
>   This result in always allocating 4k-sized configuration space.
>   (Possibly we need more fine-grained parameter in PCIDeviceInfo
>    than is_express.)
> - introduce device specific property that a user can turn on/off
> - In initialization function/config write function and where necessary,
>   check the property.

I think what I want is (At some point) automatically convert all virtio
users to express devices, but have a fallback option for old machine
types.

> -- 
> yamahata
Michael S. Tsirkin - Sept. 28, 2010, 10:38 a.m.
On Tue, Sep 28, 2010 at 11:27:03AM +0200, Michael S. Tsirkin wrote:
> On Tue, Sep 28, 2010 at 08:01:15AM +0900, Isaku Yamahata wrote:
> > On Mon, Sep 27, 2010 at 12:40:12PM +0200, Michael S. Tsirkin wrote:
> > > On Mon, Sep 27, 2010 at 03:22:43PM +0900, Isaku Yamahata wrote:
> > > > On Sun, Sep 26, 2010 at 02:50:42PM +0200, Michael S. Tsirkin wrote:
> > > > > On Fri, Sep 24, 2010 at 02:38:09PM +0900, Isaku Yamahata wrote:
> > > > > > On Wed, Sep 22, 2010 at 01:25:59PM +0200, Michael S. Tsirkin wrote:
> > > > > > 
> > > > > > > > +PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
> > > > > > > > +                         const char *bus_name, pci_map_irq_fn map_irq,
> > > > > > > > +                         uint8_t port, uint8_t chassis, uint16_t slot);
> > > > > > > > +
> > > > > > > 
> > > > > > > I am a bit unhappy about all these _init functions.
> > > > > > > Can devices be created with qdev? If they were
> > > > > > > it would be possible to configure the system completely
> > > > > > > from qemu command line.
> > > > > > 
> > > > > > That's very reasonable question.
> > > > > > Once machine configuration file is supported, those initialization
> > > > > > functions will go away.
> > > > > > I.e. when the initialization code like pc_init1() in pc_piix.c disappears,
> > > > > > those functions will also go away.
> > > > > > 
> > > > > > Until that, those initialization glues will stay like pci_create family
> > > > > > or other many initialization glues unfortunately.
> > > > > > This is the result of qdev missing a feature, not the cause.
> > > > > > It would be a long-term issue to add machine configuration file support.
> > > > > 
> > > > > Just to clarify, if I wanted to have a flag to make virtio-net
> > > > > a pci express device, how would I do this?
> > > > 
> > > > the following preparation is needed.
> > > > - register PCIDeviceInfo with name like "virtio-net-pci-xen"
> > > >   with PCIDeviceInfo::is_express = true.
> > > > - in initialization function, initialize express capability.
> > > > - in write config function, call related function.
> > > > 
> > > > And then,
> > > > if (express)
> > > >    create "virtio-net-pci-xen"
> > > > else
> > > >    create "virtio-net-pci"
> > > 
> > > Sounds pretty bad: we can't double the number of devices
> > > with each capability we add.
> > > Can we make it so setting is_express on command line
> > > will convert the device to PCI express?
> > 
> > I don't see your point. Capability isn't something that should be
> > genericly customizable.
> > If any, it would not be a generic option, but one specific to device.
> > There is no generic way to automatically convert pci device into
> > express device. It means designing a new express device.
> > Maybe what you want is
> > - set is_express = true
> >   This result in always allocating 4k-sized configuration space.
> >   (Possibly we need more fine-grained parameter in PCIDeviceInfo
> >    than is_express.)
> > - introduce device specific property that a user can turn on/off
> > - In initialization function/config write function and where necessary,
> >   check the property.
> 
> I think what I want is (At some point) automatically convert all virtio
> users to express devices, but have a fallback option for old machine
> types.

In any case, this is not a blocker for the merge.

> > -- 
> > yamahata

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 6c3b84a..7e81b57 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -186,7 +186,7 @@  hw-obj-$(CONFIG_PIIX4) += piix4.o
 # PCI watchdog devices
 hw-obj-y += wdt_i6300esb.o
 
-hw-obj-y += pcie.o pcie_aer.o pcie_port.o
+hw-obj-y += pcie.o pcie_aer.o pcie_port.o pcie_root.o
 hw-obj-y += msix.o msi.o
 
 # PCI network cards
diff --git a/hw/pcie_root.c b/hw/pcie_root.c
new file mode 100644
index 0000000..9255bed
--- /dev/null
+++ b/hw/pcie_root.c
@@ -0,0 +1,240 @@ 
+/*
+ * pcie_root.c
+ *
+ * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pci_ids.h"
+#include "msi.h"
+#include "pcie.h"
+#include "pcie_root.h"
+
+/* For now, Intel X58 IOH corporate deice exporess* root port.
+   need to get its own id? */
+#define PCI_DEVICE_ID_IOH_EPORT         0x3420  /* D0:F0 express mode */
+#define PCI_DEVICE_ID_IOH_REV           0x2
+#define IOH_EP_SSVID_OFFSET             0x40
+#define IOH_EP_SSVID_SVID               PCI_VENDOR_ID_INTEL
+#define IOH_EP_SSVID_SSID               0
+#define IOH_EP_MSI_OFFSET               0x60
+#define IOH_EP_MSI_SUPPORTED_FLAGS      PCI_MSI_FLAGS_MASKBIT
+#define IOH_EP_MSI_NR_VECTOR            2
+#define IOH_EP_EXP_OFFSET               0x90
+#define IOH_EP_AER_OFFSET               0x100
+
+#define PCIE_ROOT_VID                   PCI_VENDOR_ID_INTEL
+#define PCIE_ROOT_DID                   PCI_DEVICE_ID_IOH_EPORT
+#define PCIE_ROOT_REV                   PCI_DEVICE_ID_IOH_REV
+#define PCIE_ROOT_SSVID_OFFSET          IOH_EP_SSVID_OFFSET
+#define PCIE_ROOT_SVID                  IOH_EP_SSVID_SVID
+#define PCIE_ROOT_SSID                  IOH_EP_SSVID_SSID
+#define PCIE_ROOT_MSI_SUPPORTED_FLAGS   IOH_EP_MSI_SUPPORTED_FLAGS
+#define PCIE_ROOT_MSI_NR_VECTOR         IOH_EP_MSI_NR_VECTOR
+#define PCIE_ROOT_MSI_OFFSET            IOH_EP_MSI_OFFSET
+#define PCIE_ROOT_EXP_OFFSET            IOH_EP_EXP_OFFSET
+#define PCIE_ROOT_AER_OFFSET            IOH_EP_AER_OFFSET
+
+/*
+ * If two MSI vector are allocated, Advanced Error Interrupt Message Number
+ * is 1. otherwise 0.
+ * 17.12.5.10 RPERRSTS,  32:27 bit Advanced Error Interrupt Message Number.
+ */
+static uint8_t pcie_root_aer_vector(const PCIDevice *d)
+{
+    switch (msi_nr_vectors_allocated(d)) {
+    case 1:
+        return 0;
+    case 2:
+        return 1;
+    case 4:
+    case 8:
+    case 16:
+    case 32:
+    default:
+        break;
+    }
+    abort();
+    return 0;
+}
+
+static void pcie_root_aer_vector_update(PCIDevice *d)
+{
+    pcie_aer_root_set_vector(d, pcie_root_aer_vector(d));
+}
+
+static void pcie_root_write_config(PCIDevice *d,
+                                   uint32_t address, uint32_t val, int len)
+{
+    uint16_t sltctl =
+        pci_get_word(d->config + pci_pcie_cap(d) + PCI_EXP_SLTCTL);
+    uint32_t uncorsta =
+        pci_get_long(d->config + pcie_aer_cap(d) + PCI_ERR_UNCOR_STATUS);
+    uint32_t root_cmd =
+        pci_get_long(d->config + pcie_aer_cap(d) + PCI_ERR_ROOT_COMMAND);
+
+    pci_bridge_write_config(d, address, val, len);
+    msi_write_config(d, address, val, len);
+    pcie_root_aer_vector_update(d);
+    pcie_cap_slot_write_config(d, address, val, len, sltctl);
+    pcie_aer_write_config(d, address, val, len, uncorsta);
+    pcie_aer_root_write_config(d, address, val, len, root_cmd);
+}
+
+static void pcie_root_reset(DeviceState *qdev)
+{
+    PCIDevice *d = DO_UPCAST(PCIDevice, qdev, qdev);
+    msi_reset(d);
+    pcie_root_aer_vector_update(d);
+    pcie_cap_root_reset(d);
+    pcie_cap_deverr_reset(d);
+    pcie_cap_slot_reset(d);
+    pcie_aer_root_reset(d);
+    pci_bridge_reset(qdev);
+}
+
+static int pcie_root_initfn(PCIDevice *d)
+{
+    PCIBridge* br = DO_UPCAST(PCIBridge, dev, d);
+    PCIEPort *p = DO_UPCAST(PCIEPort, br, br);
+    PCIESlot *s = DO_UPCAST(PCIESlot, port, p);
+    int rc;
+
+    rc = pci_bridge_initfn(d);
+    if (rc < 0) {
+        return rc;
+    }
+
+    d->config[PCI_REVISION_ID] = PCIE_ROOT_REV;
+    pcie_port_init_reg(d);
+
+    pci_config_set_vendor_id(d->config, PCIE_ROOT_VID);
+    pci_config_set_device_id(d->config, PCIE_ROOT_DID);
+
+    rc = pci_bridge_ssvid_init(d, PCIE_ROOT_SSVID_OFFSET,
+                               PCIE_ROOT_SVID, PCIE_ROOT_SSID);
+    if (rc < 0) {
+        return rc;
+    }
+    rc = msi_init(d, PCIE_ROOT_MSI_OFFSET, PCIE_ROOT_MSI_NR_VECTOR,
+                  PCIE_ROOT_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
+                  PCIE_ROOT_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT);
+    if (rc < 0) {
+        return rc;
+    }
+    rc = pcie_cap_init(d, PCIE_ROOT_EXP_OFFSET, PCI_EXP_TYPE_ROOT_PORT,
+                       p->port);
+    if (rc < 0) {
+        return rc;
+    }
+    pcie_cap_deverr_init(d);
+    pcie_cap_slot_init(d, s->slot);
+    pcie_chassis_create(s->chassis);
+    rc = pcie_chassis_add_slot(s);
+    if (rc < 0) {
+        return rc;
+    }
+    pcie_cap_root_init(d);
+    pcie_aer_init(d, PCIE_ROOT_AER_OFFSET);
+    pcie_aer_root_init(d);
+    pcie_root_aer_vector_update(d);
+    return 0;
+}
+
+static int pcie_root_exitfn(PCIDevice *d)
+{
+    pcie_aer_exit(d);
+    msi_uninit(d);
+    pcie_cap_exit(d);
+    return pci_bridge_exitfn(d);
+}
+
+PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
+                         const char *bus_name, pci_map_irq_fn map_irq,
+                         uint8_t port, uint8_t chassis, uint16_t slot)
+{
+    PCIDevice *d;
+    PCIBridge *br;
+    DeviceState *qdev;
+
+    d = pci_create_multifunction(bus, devfn, multifunction, PCIE_ROOT_PORT);
+    if (!d) {
+        return NULL;
+    }
+    br = DO_UPCAST(PCIBridge, dev, d);
+
+    qdev = &br->dev.qdev;
+    pci_bridge_map_irq(br, bus_name, map_irq);
+    qdev_prop_set_uint8(qdev, "port", port);
+    qdev_prop_set_uint8(qdev, "chassis", chassis);
+    qdev_prop_set_uint16(qdev, "slot", slot);
+    qdev_init_nofail(qdev);
+
+    return DO_UPCAST(PCIESlot, port, DO_UPCAST(PCIEPort, br, br));
+}
+
+static const VMStateDescription vmstate_pcie_root = {
+    .name = "pcie-root-port",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCIE_DEVICE(port.br.dev, PCIESlot),
+        VMSTATE_STRUCT(port.br.dev.exp.aer_log, PCIESlot, 0,
+                       vmstate_pcie_aer_log, PCIE_AERLog),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static PCIDeviceInfo pcie_root_info = {
+    .qdev.name = PCIE_ROOT_PORT,
+    .qdev.desc = "Root Port of PCI Express Switch",
+    .qdev.size = sizeof(PCIESlot),
+    .qdev.reset = pcie_root_reset,
+    .qdev.vmsd = &vmstate_pcie_root,
+
+    .is_express = 1,
+    .is_bridge = 1,
+    .config_write = pcie_root_write_config,
+    .init = pcie_root_initfn,
+    .exit = pcie_root_exitfn,
+
+    .qdev.props = (Property[]) {
+        DEFINE_PROP_UINT8("port", PCIESlot, port.port, 0),
+        DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0),
+        DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0),
+        DEFINE_PROP_UINT16("aer_log_max", PCIESlot,
+                           port.br.dev.exp.aer_log.log_max,
+                           PCIE_AER_LOG_MAX_DEFAULT),
+        DEFINE_PROP_END_OF_LIST(),
+    }
+};
+
+static void pcie_root_register(void)
+{
+    pci_qdev_register(&pcie_root_info);
+}
+
+device_init(pcie_root_register);
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ *  indent-tab-mode: nil
+ * End:
+ */
diff --git a/hw/pcie_root.h b/hw/pcie_root.h
new file mode 100644
index 0000000..9c5d4d0
--- /dev/null
+++ b/hw/pcie_root.h
@@ -0,0 +1,32 @@ 
+/*
+ * pcie_root.h
+ *
+ * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_PCIE_ROOT_H
+#define QEMU_PCIE_ROOT_H
+
+#include "pcie_port.h"
+
+#define PCIE_ROOT_PORT    "pcie-root-port"
+
+PCIESlot *pcie_root_init(PCIBus *bus, int devfn, bool multifunction,
+                         const char *bus_name, pci_map_irq_fn map_irq,
+                         uint8_t port, uint8_t chassis, uint16_t slot);
+
+#endif /* QEMU_PCIE_ROOT_H */