Patchwork [v2,21/21] q35: add acpi-based pci hotplug.

login
register
mail settings
Submitter Jason Baron
Date Oct. 9, 2012, 3:30 a.m.
Message ID <d7a7e7fccdb3d81ae1ee7c021e1ebbcf36aaa6e3.1349749915.git.jbaron@redhat.com>
Download mbox | patch
Permalink /patch/190196/
State New
Headers show

Comments

Jason Baron - Oct. 9, 2012, 3:30 a.m.
From: Jason Baron <jbaron@redhat.com>

Add piix style acpi hotplug to q35.

Signed-off-by: Jason Baron <jbaron@redhat.com>
---
 hw/acpi_ich9.c |  172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/acpi_ich9.h |   10 +++
 2 files changed, 181 insertions(+), 1 deletions(-)
Paolo Bonzini - Oct. 9, 2012, 8:04 a.m.
Il 09/10/2012 05:30, Jason Baron ha scritto:
> From: Jason Baron <jbaron@redhat.com>
> 
> Add piix style acpi hotplug to q35.
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>

Not a thorough review, but it looks good.

Paolo

> ---
>  hw/acpi_ich9.c |  172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/acpi_ich9.h |   10 +++
>  2 files changed, 181 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/acpi_ich9.c b/hw/acpi_ich9.c
> index 1412ad3..412c9c1 100644
> --- a/hw/acpi_ich9.c
> +++ b/hw/acpi_ich9.c
> @@ -41,6 +41,13 @@ do { printf("%s "fmt, __func__, ## __VA_ARGS__); } while (0)
>  #define ICH9_DEBUG(fmt, ...)    do { } while (0)
>  #endif
>  
> +#define PCI_UP_BASE 0xae00
> +#define PCI_DOWN_BASE 0xae04
> +#define PCI_EJ_BASE 0xae08
> +#define PCI_RMV_BASE 0xae0c
> +#define ICH9_PCI_HOTPLUG_STATUS 2
> +
> +
>  static void pm_ioport_write_fallback(void *opaque, uint32_t addr, int len,
>                                       uint32_t val);
>  static uint32_t pm_ioport_read_fallback(void *opaque, uint32_t addr, int len);
> @@ -55,7 +62,10 @@ static void pm_update_sci(ICH9LPCPMRegs *pm)
>                    (ACPI_BITMASK_RT_CLOCK_ENABLE |
>                     ACPI_BITMASK_POWER_BUTTON_ENABLE |
>                     ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
> -                   ACPI_BITMASK_TIMER_ENABLE)) != 0);
> +                   ACPI_BITMASK_TIMER_ENABLE)) != 0) ||
> +         (((pm->acpi_regs.gpe.sts[0] & pm->acpi_regs.gpe.en[0])
> +          & ICH9_PCI_HOTPLUG_STATUS) != 0);
> +
>      qemu_set_irq(pm->irq, sci_level);
>  
>      /* schedule a timer interruption if needed */
> @@ -77,6 +87,7 @@ static void pm_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
>      switch (addr & ICH9_PMIO_MASK) {
>      case ICH9_PMIO_GPE0_STS ... (ICH9_PMIO_GPE0_STS + ICH9_PMIO_GPE0_LEN - 1):
>          acpi_gpe_ioport_writeb(&pm->acpi_regs, addr, val);
> +        pm_update_sci(pm);
>          break;
>      default:
>          break;
> @@ -283,6 +294,65 @@ const VMStateDescription vmstate_ich9_pm = {
>      }
>  };
>  
> +static void acpi_ich9_eject_slot(ICH9LPCPMRegs *opaque, unsigned slots)
> +{
> +    BusChild *kid, *next;
> +    ICH9LPCPMRegs *pm = opaque;
> +    ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm);
> +    PCIDevice *s = PCI_DEVICE(lpc);
> +    BusState *bus = qdev_get_parent_bus(&s->qdev);
> +    int slot = ffs(slots) - 1;
> +    bool slot_free = true;
> +
> +    /* Mark request as complete */
> +    pm->pci0_status.down &= ~(1U << slot);
> +
> +    QTAILQ_FOREACH_SAFE(kid, &bus->children, sibling, next) {
> +        DeviceState *qdev = kid->child;
> +        PCIDevice *dev = PCI_DEVICE(qdev);
> +        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> +        if (PCI_SLOT(dev->devfn) == slot) {
> +            if (pc->no_hotplug) {
> +                slot_free = false;
> +            } else {
> +                qdev_free(qdev);
> +            }
> +        }
> +    }
> +    if (slot_free) {
> +        pm->pci0_slot_device_present &= ~(1U << slot);
> +    }
> +}
> +
> +static void acpi_ich9_update_hotplug(ICH9LPCPMRegs *pm)
> +{
> +    ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm);
> +    PCIDevice *dev = PCI_DEVICE(lpc);
> +    BusState *bus = qdev_get_parent_bus(&dev->qdev);
> +    BusChild *kid, *next;
> +
> +    /* Execute any pending removes during reset */
> +    while (pm->pci0_status.down) {
> +        acpi_ich9_eject_slot(pm, pm->pci0_status.down);
> +    }
> +
> +    pm->pci0_hotplug_enable = ~0;
> +    pm->pci0_slot_device_present = 0;
> +
> +    QTAILQ_FOREACH_SAFE(kid, &bus->children, sibling, next) {
> +        DeviceState *qdev = kid->child;
> +        PCIDevice *pdev = PCI_DEVICE(qdev);
> +        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pdev);
> +        int slot = PCI_SLOT(pdev->devfn);
> +
> +        if (pc->no_hotplug) {
> +            pm->pci0_hotplug_enable &= ~(1U << slot);
> +        }
> +
> +        pm->pci0_slot_device_present |= (1U << slot);
> +    }
> +}
> +
>  static void pm_reset(void *opaque)
>  {
>      ICH9LPCPMRegs *pm = opaque;
> @@ -300,6 +370,7 @@ static void pm_reset(void *opaque)
>      }
>  
>      pm_update_sci(pm);
> +    acpi_ich9_update_hotplug(pm);
>  }
>  
>  static void pm_powerdown_req(Notifier *n, void *opaque)
> @@ -309,6 +380,104 @@ static void pm_powerdown_req(Notifier *n, void *opaque)
>      acpi_pm1_evt_power_down(&pm->acpi_regs);
>  }
>  
> +static uint32_t pci_up_read(void *opaque, uint32_t addr)
> +{
> +    ICH9LPCPMRegs *pm = opaque;
> +    uint32_t val;
> +
> +    /* Manufacture an "up" value to cause a device check on any hotplug
> +     * slot with a device.  Extra device checks are harmless. */
> +    val = pm->pci0_slot_device_present & pm->pci0_hotplug_enable;
> +
> +    ICH9_DEBUG("pci_up_read %x\n", val);
> +    return val;
> +}
> +
> +static uint32_t pci_down_read(void *opaque, uint32_t addr)
> +{
> +    ICH9LPCPMRegs *pm = opaque;
> +    uint32_t val = pm->pci0_status.down;
> +
> +    ICH9_DEBUG("pci_down_read %x\n", val);
> +    return val;
> +}
> +
> +static uint32_t pci_features_read(void *opaque, uint32_t addr)
> +{
> +    /* No feature defined yet */
> +    ICH9_DEBUG("pci_features_read %x\n", 0);
> +    return 0;
> +}
> +
> +static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    acpi_ich9_eject_slot(opaque, val);
> +
> +    ICH9_DEBUG("pciej write %x <== %d\n", addr, val);
> +}
> +
> +static uint32_t pcirmv_read(void *opaque, uint32_t addr)
> +{
> +    ICH9LPCPMRegs *pm = opaque;
> +
> +    return pm->pci0_hotplug_enable;
> +}
> +
> +static void enable_device(ICH9LPCPMRegs *pm, int slot)
> +{
> +    pm->acpi_regs.gpe.sts[0] |= ICH9_PCI_HOTPLUG_STATUS;
> +    pm->pci0_slot_device_present |= (1U << slot);
> +}
> +
> +static void disable_device(ICH9LPCPMRegs *pm, int slot)
> +{
> +    pm->acpi_regs.gpe.sts[0] |= ICH9_PCI_HOTPLUG_STATUS;
> +    pm->pci0_status.down |= (1U << slot);
> +}
> +
> +static int ich9_device_hotplug(DeviceState *qdev, PCIDevice *dev,
> +                PCIHotplugState state)
> +{
> +    int slot = PCI_SLOT(dev->devfn);
> +    ICH9LPCState *lpc = DO_UPCAST(ICH9LPCState, d,
> +                                PCI_DEVICE(qdev));
> +    ICH9LPCPMRegs *pm = &lpc->pm;
> +
> +    /* Don't send event when device is enabled during qemu machine creation:
> +     * it is present on boot, no hotplug event is necessary. We do send an
> +     * event when the device is disabled later. */
> +    if (state == PCI_COLDPLUG_ENABLED) {
> +        pm->pci0_slot_device_present |= (1U << slot);
> +        return 0;
> +    }
> +
> +    if (state == PCI_HOTPLUG_ENABLED) {
> +        enable_device(pm, slot);
> +    } else {
> +        disable_device(pm, slot);
> +    }
> +
> +    pm_update_sci(pm);
> +
> +    return 0;
> +}
> +
> +static void ich9_acpi_system_hot_add_init(ICH9LPCPMRegs *s)
> +{
> +    ICH9LPCState *lpc = container_of(s, ICH9LPCState, pm);
> +    PCIDevice *pdev = PCI_DEVICE(lpc);
> +
> +    register_ioport_read(PCI_UP_BASE, 4, 4, pci_up_read, s);
> +    register_ioport_read(PCI_DOWN_BASE, 4, 4, pci_down_read, s);
> +
> +    register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, s);
> +    register_ioport_read(PCI_EJ_BASE, 4, 4,  pci_features_read, s);
> +
> +    register_ioport_read(PCI_RMV_BASE, 4, 4,  pcirmv_read, s);
> +
> +    pci_bus_hotplug(pdev->bus, ich9_device_hotplug, &pdev->qdev);
> +}
> +
>  void ich9_pm_init(ICH9LPCPMRegs *pm, qemu_irq sci_irq, qemu_irq cmos_s3)
>  {
>      acpi_pm_tmr_init(&pm->acpi_regs, ich9_pm_update_sci_fn);
> @@ -319,4 +488,5 @@ void ich9_pm_init(ICH9LPCPMRegs *pm, qemu_irq sci_irq, qemu_irq cmos_s3)
>      qemu_register_reset(pm_reset, pm);
>      pm->powerdown_notifier.notify = pm_powerdown_req;
>      qemu_register_powerdown_notifier(&pm->powerdown_notifier);
> +    ich9_acpi_system_hot_add_init(pm);
>  }
> diff --git a/hw/acpi_ich9.h b/hw/acpi_ich9.h
> index 180c406..b4e2aff 100644
> --- a/hw/acpi_ich9.h
> +++ b/hw/acpi_ich9.h
> @@ -23,6 +23,11 @@
>  
>  #include "acpi.h"
>  
> +struct pci_status {
> +    uint32_t up; /* deprecated, maintained for migration compatibility */
> +    uint32_t down;
> +};
> +
>  typedef struct ICH9LPCPMRegs {
>      /*
>       * In ich9 spec says that pm1_cnt register is 32bit width and
> @@ -37,6 +42,11 @@ typedef struct ICH9LPCPMRegs {
>  
>      uint32_t pm_io_base;
>      Notifier powerdown_notifier;
> +
> +    /* for pci hotplug */
> +    struct pci_status pci0_status;
> +    uint32_t pci0_hotplug_enable;
> +    uint32_t pci0_slot_device_present;
>  } ICH9LPCPMRegs;
>  
>  void ich9_pm_init(ICH9LPCPMRegs *pm,
>
Michael S. Tsirkin - Oct. 11, 2012, 10:57 a.m.
On Mon, Oct 08, 2012 at 11:30:39PM -0400, Jason Baron wrote:
> From: Jason Baron <jbaron@redhat.com>
> 
> Add piix style acpi hotplug to q35.
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>

Something I don't understand here: this only handles hotplug
of devices behind the root, no?
Don't we need support for hotplug/hot remove of devices behind
bridges?

> ---
>  hw/acpi_ich9.c |  172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/acpi_ich9.h |   10 +++
>  2 files changed, 181 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/acpi_ich9.c b/hw/acpi_ich9.c
> index 1412ad3..412c9c1 100644
> --- a/hw/acpi_ich9.c
> +++ b/hw/acpi_ich9.c
> @@ -41,6 +41,13 @@ do { printf("%s "fmt, __func__, ## __VA_ARGS__); } while (0)
>  #define ICH9_DEBUG(fmt, ...)    do { } while (0)
>  #endif
>  
> +#define PCI_UP_BASE 0xae00
> +#define PCI_DOWN_BASE 0xae04
> +#define PCI_EJ_BASE 0xae08
> +#define PCI_RMV_BASE 0xae0c
> +#define ICH9_PCI_HOTPLUG_STATUS 2
> +
> +
>  static void pm_ioport_write_fallback(void *opaque, uint32_t addr, int len,
>                                       uint32_t val);
>  static uint32_t pm_ioport_read_fallback(void *opaque, uint32_t addr, int len);
> @@ -55,7 +62,10 @@ static void pm_update_sci(ICH9LPCPMRegs *pm)
>                    (ACPI_BITMASK_RT_CLOCK_ENABLE |
>                     ACPI_BITMASK_POWER_BUTTON_ENABLE |
>                     ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
> -                   ACPI_BITMASK_TIMER_ENABLE)) != 0);
> +                   ACPI_BITMASK_TIMER_ENABLE)) != 0) ||
> +         (((pm->acpi_regs.gpe.sts[0] & pm->acpi_regs.gpe.en[0])
> +          & ICH9_PCI_HOTPLUG_STATUS) != 0);
> +
>      qemu_set_irq(pm->irq, sci_level);
>  
>      /* schedule a timer interruption if needed */
> @@ -77,6 +87,7 @@ static void pm_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
>      switch (addr & ICH9_PMIO_MASK) {
>      case ICH9_PMIO_GPE0_STS ... (ICH9_PMIO_GPE0_STS + ICH9_PMIO_GPE0_LEN - 1):
>          acpi_gpe_ioport_writeb(&pm->acpi_regs, addr, val);
> +        pm_update_sci(pm);
>          break;
>      default:
>          break;
> @@ -283,6 +294,65 @@ const VMStateDescription vmstate_ich9_pm = {
>      }
>  };
>  
> +static void acpi_ich9_eject_slot(ICH9LPCPMRegs *opaque, unsigned slots)
> +{
> +    BusChild *kid, *next;
> +    ICH9LPCPMRegs *pm = opaque;
> +    ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm);
> +    PCIDevice *s = PCI_DEVICE(lpc);
> +    BusState *bus = qdev_get_parent_bus(&s->qdev);
> +    int slot = ffs(slots) - 1;
> +    bool slot_free = true;
> +
> +    /* Mark request as complete */
> +    pm->pci0_status.down &= ~(1U << slot);
> +
> +    QTAILQ_FOREACH_SAFE(kid, &bus->children, sibling, next) {
> +        DeviceState *qdev = kid->child;
> +        PCIDevice *dev = PCI_DEVICE(qdev);
> +        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> +        if (PCI_SLOT(dev->devfn) == slot) {
> +            if (pc->no_hotplug) {
> +                slot_free = false;
> +            } else {
> +                qdev_free(qdev);
> +            }
> +        }
> +    }
> +    if (slot_free) {
> +        pm->pci0_slot_device_present &= ~(1U << slot);
> +    }
> +}
> +
> +static void acpi_ich9_update_hotplug(ICH9LPCPMRegs *pm)
> +{
> +    ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm);
> +    PCIDevice *dev = PCI_DEVICE(lpc);
> +    BusState *bus = qdev_get_parent_bus(&dev->qdev);
> +    BusChild *kid, *next;
> +
> +    /* Execute any pending removes during reset */
> +    while (pm->pci0_status.down) {
> +        acpi_ich9_eject_slot(pm, pm->pci0_status.down);
> +    }
> +
> +    pm->pci0_hotplug_enable = ~0;
> +    pm->pci0_slot_device_present = 0;
> +
> +    QTAILQ_FOREACH_SAFE(kid, &bus->children, sibling, next) {
> +        DeviceState *qdev = kid->child;
> +        PCIDevice *pdev = PCI_DEVICE(qdev);
> +        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pdev);
> +        int slot = PCI_SLOT(pdev->devfn);
> +
> +        if (pc->no_hotplug) {
> +            pm->pci0_hotplug_enable &= ~(1U << slot);
> +        }
> +
> +        pm->pci0_slot_device_present |= (1U << slot);
> +    }
> +}
> +
>  static void pm_reset(void *opaque)
>  {
>      ICH9LPCPMRegs *pm = opaque;
> @@ -300,6 +370,7 @@ static void pm_reset(void *opaque)
>      }
>  
>      pm_update_sci(pm);
> +    acpi_ich9_update_hotplug(pm);
>  }
>  
>  static void pm_powerdown_req(Notifier *n, void *opaque)
> @@ -309,6 +380,104 @@ static void pm_powerdown_req(Notifier *n, void *opaque)
>      acpi_pm1_evt_power_down(&pm->acpi_regs);
>  }
>  
> +static uint32_t pci_up_read(void *opaque, uint32_t addr)
> +{
> +    ICH9LPCPMRegs *pm = opaque;
> +    uint32_t val;
> +
> +    /* Manufacture an "up" value to cause a device check on any hotplug
> +     * slot with a device.  Extra device checks are harmless. */
> +    val = pm->pci0_slot_device_present & pm->pci0_hotplug_enable;
> +
> +    ICH9_DEBUG("pci_up_read %x\n", val);
> +    return val;
> +}
> +
> +static uint32_t pci_down_read(void *opaque, uint32_t addr)
> +{
> +    ICH9LPCPMRegs *pm = opaque;
> +    uint32_t val = pm->pci0_status.down;
> +
> +    ICH9_DEBUG("pci_down_read %x\n", val);
> +    return val;
> +}
> +
> +static uint32_t pci_features_read(void *opaque, uint32_t addr)
> +{
> +    /* No feature defined yet */
> +    ICH9_DEBUG("pci_features_read %x\n", 0);
> +    return 0;
> +}
> +
> +static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    acpi_ich9_eject_slot(opaque, val);
> +
> +    ICH9_DEBUG("pciej write %x <== %d\n", addr, val);
> +}
> +
> +static uint32_t pcirmv_read(void *opaque, uint32_t addr)
> +{
> +    ICH9LPCPMRegs *pm = opaque;
> +
> +    return pm->pci0_hotplug_enable;
> +}
> +
> +static void enable_device(ICH9LPCPMRegs *pm, int slot)
> +{
> +    pm->acpi_regs.gpe.sts[0] |= ICH9_PCI_HOTPLUG_STATUS;
> +    pm->pci0_slot_device_present |= (1U << slot);
> +}
> +
> +static void disable_device(ICH9LPCPMRegs *pm, int slot)
> +{
> +    pm->acpi_regs.gpe.sts[0] |= ICH9_PCI_HOTPLUG_STATUS;
> +    pm->pci0_status.down |= (1U << slot);
> +}
> +
> +static int ich9_device_hotplug(DeviceState *qdev, PCIDevice *dev,
> +                PCIHotplugState state)
> +{
> +    int slot = PCI_SLOT(dev->devfn);
> +    ICH9LPCState *lpc = DO_UPCAST(ICH9LPCState, d,
> +                                PCI_DEVICE(qdev));
> +    ICH9LPCPMRegs *pm = &lpc->pm;
> +
> +    /* Don't send event when device is enabled during qemu machine creation:
> +     * it is present on boot, no hotplug event is necessary. We do send an
> +     * event when the device is disabled later. */
> +    if (state == PCI_COLDPLUG_ENABLED) {
> +        pm->pci0_slot_device_present |= (1U << slot);
> +        return 0;
> +    }
> +
> +    if (state == PCI_HOTPLUG_ENABLED) {
> +        enable_device(pm, slot);
> +    } else {
> +        disable_device(pm, slot);
> +    }
> +
> +    pm_update_sci(pm);
> +
> +    return 0;
> +}
> +
> +static void ich9_acpi_system_hot_add_init(ICH9LPCPMRegs *s)
> +{
> +    ICH9LPCState *lpc = container_of(s, ICH9LPCState, pm);
> +    PCIDevice *pdev = PCI_DEVICE(lpc);
> +
> +    register_ioport_read(PCI_UP_BASE, 4, 4, pci_up_read, s);
> +    register_ioport_read(PCI_DOWN_BASE, 4, 4, pci_down_read, s);
> +
> +    register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, s);
> +    register_ioport_read(PCI_EJ_BASE, 4, 4,  pci_features_read, s);
> +
> +    register_ioport_read(PCI_RMV_BASE, 4, 4,  pcirmv_read, s);
> +
> +    pci_bus_hotplug(pdev->bus, ich9_device_hotplug, &pdev->qdev);
> +}
> +
>  void ich9_pm_init(ICH9LPCPMRegs *pm, qemu_irq sci_irq, qemu_irq cmos_s3)
>  {
>      acpi_pm_tmr_init(&pm->acpi_regs, ich9_pm_update_sci_fn);
> @@ -319,4 +488,5 @@ void ich9_pm_init(ICH9LPCPMRegs *pm, qemu_irq sci_irq, qemu_irq cmos_s3)
>      qemu_register_reset(pm_reset, pm);
>      pm->powerdown_notifier.notify = pm_powerdown_req;
>      qemu_register_powerdown_notifier(&pm->powerdown_notifier);
> +    ich9_acpi_system_hot_add_init(pm);
>  }
> diff --git a/hw/acpi_ich9.h b/hw/acpi_ich9.h
> index 180c406..b4e2aff 100644
> --- a/hw/acpi_ich9.h
> +++ b/hw/acpi_ich9.h
> @@ -23,6 +23,11 @@
>  
>  #include "acpi.h"
>  
> +struct pci_status {
> +    uint32_t up; /* deprecated, maintained for migration compatibility */
> +    uint32_t down;
> +};
> +
>  typedef struct ICH9LPCPMRegs {
>      /*
>       * In ich9 spec says that pm1_cnt register is 32bit width and
> @@ -37,6 +42,11 @@ typedef struct ICH9LPCPMRegs {
>  
>      uint32_t pm_io_base;
>      Notifier powerdown_notifier;
> +
> +    /* for pci hotplug */
> +    struct pci_status pci0_status;
> +    uint32_t pci0_hotplug_enable;
> +    uint32_t pci0_slot_device_present;
>  } ICH9LPCPMRegs;
>  
>  void ich9_pm_init(ICH9LPCPMRegs *pm,
> -- 
> 1.7.1
Jason Baron - Oct. 11, 2012, 2:21 p.m.
On Thu, Oct 11, 2012 at 12:57:06PM +0200, Michael S. Tsirkin wrote:
> On Mon, Oct 08, 2012 at 11:30:39PM -0400, Jason Baron wrote:
> > From: Jason Baron <jbaron@redhat.com>
> > 
> > Add piix style acpi hotplug to q35.
> > 
> > Signed-off-by: Jason Baron <jbaron@redhat.com>
> 
> Something I don't understand here: this only handles hotplug
> of devices behind the root, no?
> Don't we need support for hotplug/hot remove of devices behind
> bridges?
> 

Yes, this only handles acpi hotplug of devices behind the root. I'm
trying to reach minimal set of q35 patches that we can build upon. I
think that this patch gives us at least the same functionality as piix
does. (Plus there is pcie hotplug).

As you know, I have a proof of concept patch series providing a second
level of hotplug behind bridges. Unfortunately, it was based on the
static acpi tables, before Paolo made the hotplug table generation
dynamic. So it needs quite a bit of re-work. But I know that it should
work :)

Thanks,

-Jason
Michael S. Tsirkin - Oct. 11, 2012, 2:46 p.m.
On Thu, Oct 11, 2012 at 10:21:22AM -0400, Jason Baron wrote:
> On Thu, Oct 11, 2012 at 12:57:06PM +0200, Michael S. Tsirkin wrote:
> > On Mon, Oct 08, 2012 at 11:30:39PM -0400, Jason Baron wrote:
> > > From: Jason Baron <jbaron@redhat.com>
> > > 
> > > Add piix style acpi hotplug to q35.
> > > 
> > > Signed-off-by: Jason Baron <jbaron@redhat.com>
> > 
> > Something I don't understand here: this only handles hotplug
> > of devices behind the root, no?
> > Don't we need support for hotplug/hot remove of devices behind
> > bridges?
> > 
> 
> Yes, this only handles acpi hotplug of devices behind the root. I'm
> trying to reach minimal set of q35 patches that we can build upon. I
> think that this patch gives us at least the same functionality as piix
> does. (Plus there is pcie hotplug).
> 
> As you know, I have a proof of concept patch series providing a second
> level of hotplug behind bridges. Unfortunately, it was based on the
> static acpi tables, before Paolo made the hotplug table generation
> dynamic. So it needs quite a bit of re-work. But I know that it should
> work :)
> 
> Thanks,
> 
> -Jason

Yes. Reason I ask is because q35 is adding bridges by default now.
Would it be possible to only add them if requested on command line by user
instead? I realize some guests expect devices at specific slots
but this does not apply to bridges I think?

It would also be nice to add comments explaining why
specific slots were selected e.g. /* BSD XYZ fails to boot unless ahci is at alow 2 */
etc.

Also - will adding this code now mean that when adding bridges
we'll need to add compatibility code in bios/qemu in the future?
Paolo Bonzini - Oct. 11, 2012, 2:54 p.m.
Il 11/10/2012 16:46, Michael S. Tsirkin ha scritto:
> Yes. Reason I ask is because q35 is adding bridges by default now.
> Would it be possible to only add them if requested on command line by user
> instead?

Can you just use shpc or pcie hotplug on those bridges?

> I realize some guests expect devices at specific slots
> but this does not apply to bridges I think?
> 
> It would also be nice to add comments explaining why
> specific slots were selected e.g. /* BSD XYZ fails to boot unless ahci is at alow 2 */
> etc.
> 
> Also - will adding this code now mean that when adding bridges
> we'll need to add compatibility code in bios/qemu in the future?

It will need SSDT changes.  One ugliness of Jason's patch was that it
introduced a massive chain of 32*32 Ifs in the SSDT.  Changing it to use
LoadTable shouldn't be hard.  Then you can patch out the LoadTable (e.g.
_INI -> INI_) based on what QEMU tells you is a bridge or not.

Paolo
Jason Baron - Oct. 11, 2012, 3:34 p.m.
On Thu, Oct 11, 2012 at 04:46:56PM +0200, Michael S. Tsirkin wrote:
> On Thu, Oct 11, 2012 at 10:21:22AM -0400, Jason Baron wrote:
> > On Thu, Oct 11, 2012 at 12:57:06PM +0200, Michael S. Tsirkin wrote:
> > > On Mon, Oct 08, 2012 at 11:30:39PM -0400, Jason Baron wrote:
> > > > From: Jason Baron <jbaron@redhat.com>
> > > > 
> > > > Add piix style acpi hotplug to q35.
> > > > 
> > > > Signed-off-by: Jason Baron <jbaron@redhat.com>
> > > 
> > > Something I don't understand here: this only handles hotplug
> > > of devices behind the root, no?
> > > Don't we need support for hotplug/hot remove of devices behind
> > > bridges?
> > > 
> > 
> > Yes, this only handles acpi hotplug of devices behind the root. I'm
> > trying to reach minimal set of q35 patches that we can build upon. I
> > think that this patch gives us at least the same functionality as piix
> > does. (Plus there is pcie hotplug).
> > 
> > As you know, I have a proof of concept patch series providing a second
> > level of hotplug behind bridges. Unfortunately, it was based on the
> > static acpi tables, before Paolo made the hotplug table generation
> > dynamic. So it needs quite a bit of re-work. But I know that it should
> > work :)
> > 
> > Thanks,
> > 
> > -Jason
> 
> Yes. Reason I ask is because q35 is adding bridges by default now.
> Would it be possible to only add them if requested on command line by user
> instead? I realize some guests expect devices at specific slots
> but this does not apply to bridges I think?
> 

I just tried out getting rid of the bridges by default.

So 'lspci' goes from:

00:00.0 Host bridge: Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
00:01.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:02.0 VGA compatible controller: Cirrus Logic GD 5446
00:03.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 03)
00:17.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:18.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:1c.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:1c.1 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:1c.2 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:1c.3 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:1c.4 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:1c.5 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 92)
00:1f.0 ISA bridge: Intel Corporation 82801IB (ICH9) LPC Interface Controller (rev 02)
00:1f.2 SATA controller: Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA AHCI Controller (rev 02)
00:1f.3 SMBus: Intel Corporation 82801I (ICH9 Family) SMBus Controller (rev 02)
03:00.0 PCI bridge: Texas Instruments XIO3130 PCI Express Switch (Upstream) (rev 02)
04:00.0 PCI bridge: Texas Instruments XIO3130 PCI Express Switch (Downstream) (rev 01)
0c:1c.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
0c:1d.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
0c:1e.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
0c:1f.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)

To:

00:00.0 Host bridge: Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
00:01.0 VGA compatible controller: Cirrus Logic GD 5446
00:02.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 03)
00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 92)
00:1f.0 ISA bridge: Intel Corporation 82801IB (ICH9) LPC Interface Controller (rev 02)
00:1f.2 SATA controller: Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA AHCI Controller (rev 02)
00:1f.3 SMBus: Intel Corporation 82801I (ICH9 Family) SMBus Controller (rev 02)


Windows and Linux guests seem fine with either layout. Slots 1-2 are
specific to my setup. So this is a pretty minimal set.

I think that providing the minimal set of devices is good, since it
allows the user to configure things as much as possible. So I am in
favor of this more minimal set. My only hesitation is that we pull out,
or that I have not included some important piece h/w at a specific slot
that a guest might need. Thus potentially breaking existing setups.
Perhaps, that might mean a new machine type in the future, if we've
messed up?

These devices and slots are pulled from the Intel docs on ICH9 and Q35
specs. See:

http://www.intel.com/content/www/us/en/io/io-controller-hub-9-datasheet.html

Perhaps, Yamahata can comment further on the specific set of bridges?

> It would also be nice to add comments explaining why
> specific slots were selected e.g. /* BSD XYZ fails to boot unless ahci is at alow 2 */
> etc.

Right, its basically just pulled from the Intel spec as mentioned above.

> 
> Also - will adding this code now mean that when adding bridges
> we'll need to add compatibility code in bios/qemu in the future?
> 

I don't think so, but maybe you can elaborate this concern more
specifically?

Thanks,

-Jason
Jason Baron - Oct. 11, 2012, 3:40 p.m.
On Thu, Oct 11, 2012 at 04:54:53PM +0200, Paolo Bonzini wrote:
> Il 11/10/2012 16:46, Michael S. Tsirkin ha scritto:
> > Yes. Reason I ask is because q35 is adding bridges by default now.
> > Would it be possible to only add them if requested on command line by user
> > instead?
> 
> Can you just use shpc or pcie hotplug on those bridges?
> 

Yes, I've tested pcie hotplug minimally, but it seems to work so far.

> > I realize some guests expect devices at specific slots
> > but this does not apply to bridges I think?
> > 
> > It would also be nice to add comments explaining why
> > specific slots were selected e.g. /* BSD XYZ fails to boot unless ahci is at alow 2 */
> > etc.
> > 
> > Also - will adding this code now mean that when adding bridges
> > we'll need to add compatibility code in bios/qemu in the future?
> 
> It will need SSDT changes.  One ugliness of Jason's patch was that it
> introduced a massive chain of 32*32 Ifs in the SSDT.  Changing it to use
> LoadTable shouldn't be hard.  Then you can patch out the LoadTable (e.g.
> _INI -> INI_) based on what QEMU tells you is a bridge or not.
> 
> Paolo
>
Michael S. Tsirkin - Oct. 11, 2012, 8:40 p.m.
On Thu, Oct 11, 2012 at 11:34:08AM -0400, Jason Baron wrote:
> On Thu, Oct 11, 2012 at 04:46:56PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Oct 11, 2012 at 10:21:22AM -0400, Jason Baron wrote:
> > > On Thu, Oct 11, 2012 at 12:57:06PM +0200, Michael S. Tsirkin wrote:
> > > > On Mon, Oct 08, 2012 at 11:30:39PM -0400, Jason Baron wrote:
> > > > > From: Jason Baron <jbaron@redhat.com>
> > > > > 
> > > > > Add piix style acpi hotplug to q35.
> > > > > 
> > > > > Signed-off-by: Jason Baron <jbaron@redhat.com>
> > > > 
> > > > Something I don't understand here: this only handles hotplug
> > > > of devices behind the root, no?
> > > > Don't we need support for hotplug/hot remove of devices behind
> > > > bridges?
> > > > 
> > > 
> > > Yes, this only handles acpi hotplug of devices behind the root. I'm
> > > trying to reach minimal set of q35 patches that we can build upon. I
> > > think that this patch gives us at least the same functionality as piix
> > > does. (Plus there is pcie hotplug).
> > > 
> > > As you know, I have a proof of concept patch series providing a second
> > > level of hotplug behind bridges. Unfortunately, it was based on the
> > > static acpi tables, before Paolo made the hotplug table generation
> > > dynamic. So it needs quite a bit of re-work. But I know that it should
> > > work :)
> > > 
> > > Thanks,
> > > 
> > > -Jason
> > 
> > Yes. Reason I ask is because q35 is adding bridges by default now.
> > Would it be possible to only add them if requested on command line by user
> > instead? I realize some guests expect devices at specific slots
> > but this does not apply to bridges I think?
> > 
> 
> I just tried out getting rid of the bridges by default.
> 
> So 'lspci' goes from:
> 
> 00:00.0 Host bridge: Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
> 00:01.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:02.0 VGA compatible controller: Cirrus Logic GD 5446
> 00:03.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 03)
> 00:17.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:18.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.1 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.2 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.3 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.4 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.5 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 92)
> 00:1f.0 ISA bridge: Intel Corporation 82801IB (ICH9) LPC Interface Controller (rev 02)
> 00:1f.2 SATA controller: Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA AHCI Controller (rev 02)
> 00:1f.3 SMBus: Intel Corporation 82801I (ICH9 Family) SMBus Controller (rev 02)
> 03:00.0 PCI bridge: Texas Instruments XIO3130 PCI Express Switch (Upstream) (rev 02)
> 04:00.0 PCI bridge: Texas Instruments XIO3130 PCI Express Switch (Downstream) (rev 01)
> 0c:1c.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
> 0c:1d.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
> 0c:1e.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
> 0c:1f.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
> 
> To:
> 
> 00:00.0 Host bridge: Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
> 00:01.0 VGA compatible controller: Cirrus Logic GD 5446
> 00:02.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 03)
> 00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 92)
> 00:1f.0 ISA bridge: Intel Corporation 82801IB (ICH9) LPC Interface Controller (rev 02)
> 00:1f.2 SATA controller: Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA AHCI Controller (rev 02)
> 00:1f.3 SMBus: Intel Corporation 82801I (ICH9 Family) SMBus Controller (rev 02)
> 
> 
> Windows and Linux guests seem fine with either layout. Slots 1-2 are
> specific to my setup. So this is a pretty minimal set.

I guess we can remove the PCI bridge too?

One interesting side effect here is that there are less free pci slots
on root bus now.  I guess at minimum management needs to be taught about
this, and I'm not sure how.

> I think that providing the minimal set of devices is good, since it
> allows the user to configure things as much as possible. So I am in
> favor of this more minimal set. My only hesitation is that we pull out,
> or that I have not included some important piece h/w at a specific slot
> that a guest might need. Thus potentially breaking existing setups.
> Perhaps, that might mean a new machine type in the future, if we've
> messed up?

Yes, that's one solution.

> These devices and slots are pulled from the Intel docs on ICH9 and Q35
> specs. See:
> 
> http://www.intel.com/content/www/us/en/io/io-controller-hub-9-datasheet.html
> 
> Perhaps, Yamahata can comment further on the specific set of bridges?
> 
> > It would also be nice to add comments explaining why
> > specific slots were selected e.g. /* BSD XYZ fails to boot unless ahci is at alow 2 */
> > etc.
> 
> Right, its basically just pulled from the Intel spec as mentioned above.
> 
> > 
> > Also - will adding this code now mean that when adding bridges
> > we'll need to add compatibility code in bios/qemu in the future?
> > 
> 
> I don't think so, but maybe you can elaborate this concern more
> specifically?
> 
> Thanks,
> 
> -Jason

Just this: can same bios work on this interface and the one
you intend for hotplug behind bridge? Or will we need to version
interface?
Gerd Hoffmann - Oct. 12, 2012, 7:27 a.m.
Hi,

>> Yes. Reason I ask is because q35 is adding bridges by default now.
>> Would it be possible to only add them if requested on command line by user
>> instead? I realize some guests expect devices at specific slots
>> but this does not apply to bridges I think?
> 
> I just tried out getting rid of the bridges by default.

That clearly raises the question which devices should be created
automatically by -M q35.  I think the devices which are part of the ich9
chipset should be there by default.  /me looks at my laptop which
happens to have a ich9 chipset.

> So 'lspci' goes from:
> 
> 00:00.0 Host bridge: Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller

Keep.

> 00:01.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)

That looks bogous.  Drop.

> 00:02.0 VGA compatible controller: Cirrus Logic GD 5446

Our default vga.

> 00:03.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 03)

Real ich9 has the e1000 @ 00:19.0, so it would make sense to place one
there.  Adding a default nic will probably create some headache though,
so maybe better don't.

> 00:17.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:18.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)

Looks bogous too, drop.

> 00:1c.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.1 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.2 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.3 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)

Real ich9 has these, keep.

> 00:1c.4 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 00:1c.5 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)

Dunno.  Not present in my laptop, other chipset variants might have more
pcie ports though ...

> 00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 92)

Present on real hardware, keep.

> 00:1f.0 ISA bridge: Intel Corporation 82801IB (ICH9) LPC Interface Controller (rev 02)
> 00:1f.2 SATA controller: Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA AHCI Controller (rev 02)
> 00:1f.3 SMBus: Intel Corporation 82801I (ICH9 Family) SMBus Controller (rev 02)

Keep.

> 03:00.0 PCI bridge: Texas Instruments XIO3130 PCI Express Switch (Upstream) (rev 02)
> 04:00.0 PCI bridge: Texas Instruments XIO3130 PCI Express Switch (Downstream) (rev 01)
> 0c:1c.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
> 0c:1d.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
> 0c:1e.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)
> 0c:1f.0 PCI bridge: Digital Equipment Corporation DECchip 21154 (rev 05)

Drop them.  I guess those are just there to simplify bridge device testing.

Real hardware also has:

00:1b.0 Audio device: Intel Corporation 82801I (ICH9 Family) HD Audio
Controller (rev 03)

Emulated by intel-hda.

00:1d.0 USB controller: Intel Corporation 82801I (ICH9 Family) USB UHCI
Controller #1 (rev 03)
00:1d.1 USB controller: Intel Corporation 82801I (ICH9 Family) USB UHCI
Controller #2 (rev 03)
00:1d.2 USB controller: Intel Corporation 82801I (ICH9 Family) USB UHCI
Controller #3 (rev 03)
00:1d.7 USB controller: Intel Corporation 82801I (ICH9 Family) USB2 EHCI
Controller #1 (rev 03)

We can emulate those too (see docs/docs/ich9-ehci-uhci.cfg).

I think we should add audio+usb to q35.

cheers,
  Gerd
Michael S. Tsirkin - Oct. 12, 2012, 9:39 a.m.
On Fri, Oct 12, 2012 at 09:27:02AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> >> Yes. Reason I ask is because q35 is adding bridges by default now.
> >> Would it be possible to only add them if requested on command line by user
> >> instead? I realize some guests expect devices at specific slots
> >> but this does not apply to bridges I think?
> > 
> > I just tried out getting rid of the bridges by default.
> 
> That clearly raises the question which devices should be created
> automatically by -M q35.  I think the devices which are part of the ich9
> chipset should be there by default.  /me looks at my laptop which
> happens to have a ich9 chipset.

The reason this is a bad idea is very simple: we only have a way to add
devices not to remove them.  So if you miss a device which your guest
needs, it is easy to add, but there is no way to remove.

> > So 'lspci' goes from:
> > 
> > 00:00.0 Host bridge: Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
> 
> Keep.
> 
> > 00:01.0 PCI bridge: Intel Corporation 5500 Non-Legacy I/O Hub PCI Express Root Port 0 (rev 02)
> 
> That looks bogous.  Drop.
> 
> > 00:02.0 VGA compatible controller: Cirrus Logic GD 5446
> 
> Our default vga.
> 
> > 00:03.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 03)
> 
> Real ich9 has the e1000 @ 00:19.0, so it would make sense to place one
> there.  Adding a default nic will probably create some headache though,
> so maybe better don't.

And that's just one example. And it's not really e1000 at all.
Fact is, we don't emulate real hardware exactly.
So let's have a minimal machine and if you want to add e1000 audio etc,
you can do this. We can even teach management to do it with friendly
UI as opposed to cryptic machine types.
Gerd Hoffmann - Oct. 12, 2012, 10:06 a.m.
Hi,

>>> I just tried out getting rid of the bridges by default.
>>
>> That clearly raises the question which devices should be created
>> automatically by -M q35.  I think the devices which are part of the ich9
>> chipset should be there by default.  /me looks at my laptop which
>> happens to have a ich9 chipset.
> 
> The reason this is a bad idea is very simple: we only have a way to add
> devices not to remove them.  So if you miss a device which your guest
> needs, it is easy to add, but there is no way to remove.

Why would you want remove devices?  They don't harm when present.  And
you can't remove them on real hardware either.  Try ordering a ich9
without sound or usb ;)

>> Real ich9 has the e1000 @ 00:19.0, so it would make sense to place one
>> there.  Adding a default nic will probably create some headache though,
>> so maybe better don't.
> 
> And that's just one example.

It is problematic because the nic needs configuration and configuring a
builtin device is tricky.  Also our e1000 model isn't the ich9 one.

Most other chipset devices are not problematic at all as they are just
controllers where you can attach stuff to (and by default there isn't
anything attached).  This includes:

  * pcie ports (waiting for pcie devices plugged in).
  * ahci controller (waiting for disks/cdroms being attached).
  * intel-hda (waiting for audio coded (hda-*) being attached).
  * ehci+uhci (waiting for usb devices being plugged in).

The stuff being attached/plugged there needs configuration, but not the
controllers themself.

> Fact is, we don't emulate real hardware exactly.

But we try to.

> So let's have a minimal machine and if you want to add e1000 audio etc,
> you can do this. We can even teach management to do it with friendly
> UI as opposed to cryptic machine types.

/me disagrees.

cheers,
  Gerd
Michael S. Tsirkin - Oct. 12, 2012, 10:39 a.m.
On Fri, Oct 12, 2012 at 12:06:44PM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> >>> I just tried out getting rid of the bridges by default.
> >>
> >> That clearly raises the question which devices should be created
> >> automatically by -M q35.  I think the devices which are part of the ich9
> >> chipset should be there by default.  /me looks at my laptop which
> >> happens to have a ich9 chipset.
> > 
> > The reason this is a bad idea is very simple: we only have a way to add
> > devices not to remove them.  So if you miss a device which your guest
> > needs, it is easy to add, but there is no way to remove.
> 
> Why would you want remove devices?  They don't harm when present.

Yes they do, they increase attack surface on hypervisor.
Jason Baron - Oct. 12, 2012, 3 p.m.
On Fri, Oct 12, 2012 at 12:06:44PM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> >>> I just tried out getting rid of the bridges by default.
> >>
> >> That clearly raises the question which devices should be created
> >> automatically by -M q35.  I think the devices which are part of the ich9
> >> chipset should be there by default.  /me looks at my laptop which
> >> happens to have a ich9 chipset.
> > 
> > The reason this is a bad idea is very simple: we only have a way to add
> > devices not to remove them.  So if you miss a device which your guest
> > needs, it is easy to add, but there is no way to remove.
> 
> Why would you want remove devices?  They don't harm when present.  And
> you can't remove them on real hardware either.  Try ordering a ich9
> without sound or usb ;)
> 
> >> Real ich9 has the e1000 @ 00:19.0, so it would make sense to place one
> >> there.  Adding a default nic will probably create some headache though,
> >> so maybe better don't.
> > 
> > And that's just one example.
> 
> It is problematic because the nic needs configuration and configuring a
> builtin device is tricky.  Also our e1000 model isn't the ich9 one.
> 
> Most other chipset devices are not problematic at all as they are just
> controllers where you can attach stuff to (and by default there isn't
> anything attached).  This includes:
> 
>   * pcie ports (waiting for pcie devices plugged in).
>   * ahci controller (waiting for disks/cdroms being attached).
>   * intel-hda (waiting for audio coded (hda-*) being attached).
>   * ehci+uhci (waiting for usb devices being plugged in).
> 
> The stuff being attached/plugged there needs configuration, but not the
> controllers themself.
> 
> > Fact is, we don't emulate real hardware exactly.
> 
> But we try to.
> 
> > So let's have a minimal machine and if you want to add e1000 audio etc,
> > you can do this. We can even teach management to do it with friendly
> > UI as opposed to cryptic machine types.
> 
> /me disagrees.
> 

What if we have a 'basic' configuration as Michael suggests, but have an
easy way to specify that we want these additional built-in
controllers/bridges at their default slot.

For example, for q35 I can currently pass '-usb' and it will create a
uhci at slot 1d func 0. (I have small patch to add ehci as well).

Not sure if we have appropriate options we can piggy-back on for default
sound and bridges, but we can add these later, if this makese sense.

Thanks,

-Jason
Jason Baron - Oct. 12, 2012, 3:27 p.m.
On Thu, Oct 11, 2012 at 10:40:04PM +0200, Michael S. Tsirkin wrote:
> > Windows and Linux guests seem fine with either layout. Slots 1-2 are
> > specific to my setup. So this is a pretty minimal set.
> 
> I guess we can remove the PCI bridge too?
> 

maybe. Perhaps, we can have a very basic set of devices, and have easy
ways to specify various default setups, as I've suggested in a separate
mail.

> One interesting side effect here is that there are less free pci slots
> on root bus now.  I guess at minimum management needs to be taught about
> this, and I'm not sure how.
> 
> > I think that providing the minimal set of devices is good, since it
> > allows the user to configure things as much as possible. So I am in
> > favor of this more minimal set. My only hesitation is that we pull out,
> > or that I have not included some important piece h/w at a specific slot
> > that a guest might need. Thus potentially breaking existing setups.
> > Perhaps, that might mean a new machine type in the future, if we've
> > messed up?
> 
> Yes, that's one solution.
> 
> > These devices and slots are pulled from the Intel docs on ICH9 and Q35
> > specs. See:
> > 
> > http://www.intel.com/content/www/us/en/io/io-controller-hub-9-datasheet.html
> > 
> > Perhaps, Yamahata can comment further on the specific set of bridges?
> > 
> > > It would also be nice to add comments explaining why
> > > specific slots were selected e.g. /* BSD XYZ fails to boot unless ahci is at alow 2 */
> > > etc.
> > 
> > Right, its basically just pulled from the Intel spec as mentioned above.
> > 
> > > 
> > > Also - will adding this code now mean that when adding bridges
> > > we'll need to add compatibility code in bios/qemu in the future?
> > > 
> > 
> > I don't think so, but maybe you can elaborate this concern more
> > specifically?
> > 
> > Thanks,
> > 
> > -Jason
> 
> Just this: can same bios work on this interface and the one
> you intend for hotplug behind bridge? Or will we need to version
> interface?
> 

hmm...I wasn't aware of this contraint. Since we control the version of
SeaBIOS in qemu, is this really a problem? And it was suggested that
qemu is the only consumer of the acpi tables.

The current hotplug code doesn't seem to be versioned. Has this caused
problems?

In terms of the interface itself, yes, I think ideally it would be
changed.

Thanks,

-Jason
Michael S. Tsirkin - Oct. 13, 2012, 11:03 p.m.
On Fri, Oct 12, 2012 at 11:27:21AM -0400, Jason Baron wrote:
> On Thu, Oct 11, 2012 at 10:40:04PM +0200, Michael S. Tsirkin wrote:
> > > Windows and Linux guests seem fine with either layout. Slots 1-2 are
> > > specific to my setup. So this is a pretty minimal set.
> > 
> > I guess we can remove the PCI bridge too?
> > 
> 
> maybe. Perhaps, we can have a very basic set of devices, and have easy
> ways to specify various default setups, as I've suggested in a separate
> mail.
> 
> > One interesting side effect here is that there are less free pci slots
> > on root bus now.  I guess at minimum management needs to be taught about
> > this, and I'm not sure how.
> > 
> > > I think that providing the minimal set of devices is good, since it
> > > allows the user to configure things as much as possible. So I am in
> > > favor of this more minimal set. My only hesitation is that we pull out,
> > > or that I have not included some important piece h/w at a specific slot
> > > that a guest might need. Thus potentially breaking existing setups.
> > > Perhaps, that might mean a new machine type in the future, if we've
> > > messed up?
> > 
> > Yes, that's one solution.
> > 
> > > These devices and slots are pulled from the Intel docs on ICH9 and Q35
> > > specs. See:
> > > 
> > > http://www.intel.com/content/www/us/en/io/io-controller-hub-9-datasheet.html
> > > 
> > > Perhaps, Yamahata can comment further on the specific set of bridges?
> > > 
> > > > It would also be nice to add comments explaining why
> > > > specific slots were selected e.g. /* BSD XYZ fails to boot unless ahci is at alow 2 */
> > > > etc.
> > > 
> > > Right, its basically just pulled from the Intel spec as mentioned above.
> > > 
> > > > 
> > > > Also - will adding this code now mean that when adding bridges
> > > > we'll need to add compatibility code in bios/qemu in the future?
> > > > 
> > > 
> > > I don't think so, but maybe you can elaborate this concern more
> > > specifically?
> > > 
> > > Thanks,
> > > 
> > > -Jason
> > 
> > Just this: can same bios work on this interface and the one
> > you intend for hotplug behind bridge? Or will we need to version
> > interface?
> > 
> 
> hmm...I wasn't aware of this contraint. Since we control the version of
> SeaBIOS in qemu, is this really a problem? And it was suggested that
> qemu is the only consumer of the acpi tables.

Yes. But cross version live migration is what creates issues.

> The current hotplug code doesn't seem to be versioned. Has this caused
> problems?

Yes but in the end we found a way to be compatible.

> In terms of the interface itself, yes, I think ideally it would be
> changed.
> 
> Thanks,
> 
> -Jason

K good to know.
I think we can merge even with this knowledge as an interim step
assuming that we address this before we release qemu.

Patch

diff --git a/hw/acpi_ich9.c b/hw/acpi_ich9.c
index 1412ad3..412c9c1 100644
--- a/hw/acpi_ich9.c
+++ b/hw/acpi_ich9.c
@@ -41,6 +41,13 @@  do { printf("%s "fmt, __func__, ## __VA_ARGS__); } while (0)
 #define ICH9_DEBUG(fmt, ...)    do { } while (0)
 #endif
 
+#define PCI_UP_BASE 0xae00
+#define PCI_DOWN_BASE 0xae04
+#define PCI_EJ_BASE 0xae08
+#define PCI_RMV_BASE 0xae0c
+#define ICH9_PCI_HOTPLUG_STATUS 2
+
+
 static void pm_ioport_write_fallback(void *opaque, uint32_t addr, int len,
                                      uint32_t val);
 static uint32_t pm_ioport_read_fallback(void *opaque, uint32_t addr, int len);
@@ -55,7 +62,10 @@  static void pm_update_sci(ICH9LPCPMRegs *pm)
                   (ACPI_BITMASK_RT_CLOCK_ENABLE |
                    ACPI_BITMASK_POWER_BUTTON_ENABLE |
                    ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
-                   ACPI_BITMASK_TIMER_ENABLE)) != 0);
+                   ACPI_BITMASK_TIMER_ENABLE)) != 0) ||
+         (((pm->acpi_regs.gpe.sts[0] & pm->acpi_regs.gpe.en[0])
+          & ICH9_PCI_HOTPLUG_STATUS) != 0);
+
     qemu_set_irq(pm->irq, sci_level);
 
     /* schedule a timer interruption if needed */
@@ -77,6 +87,7 @@  static void pm_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
     switch (addr & ICH9_PMIO_MASK) {
     case ICH9_PMIO_GPE0_STS ... (ICH9_PMIO_GPE0_STS + ICH9_PMIO_GPE0_LEN - 1):
         acpi_gpe_ioport_writeb(&pm->acpi_regs, addr, val);
+        pm_update_sci(pm);
         break;
     default:
         break;
@@ -283,6 +294,65 @@  const VMStateDescription vmstate_ich9_pm = {
     }
 };
 
+static void acpi_ich9_eject_slot(ICH9LPCPMRegs *opaque, unsigned slots)
+{
+    BusChild *kid, *next;
+    ICH9LPCPMRegs *pm = opaque;
+    ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm);
+    PCIDevice *s = PCI_DEVICE(lpc);
+    BusState *bus = qdev_get_parent_bus(&s->qdev);
+    int slot = ffs(slots) - 1;
+    bool slot_free = true;
+
+    /* Mark request as complete */
+    pm->pci0_status.down &= ~(1U << slot);
+
+    QTAILQ_FOREACH_SAFE(kid, &bus->children, sibling, next) {
+        DeviceState *qdev = kid->child;
+        PCIDevice *dev = PCI_DEVICE(qdev);
+        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
+        if (PCI_SLOT(dev->devfn) == slot) {
+            if (pc->no_hotplug) {
+                slot_free = false;
+            } else {
+                qdev_free(qdev);
+            }
+        }
+    }
+    if (slot_free) {
+        pm->pci0_slot_device_present &= ~(1U << slot);
+    }
+}
+
+static void acpi_ich9_update_hotplug(ICH9LPCPMRegs *pm)
+{
+    ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm);
+    PCIDevice *dev = PCI_DEVICE(lpc);
+    BusState *bus = qdev_get_parent_bus(&dev->qdev);
+    BusChild *kid, *next;
+
+    /* Execute any pending removes during reset */
+    while (pm->pci0_status.down) {
+        acpi_ich9_eject_slot(pm, pm->pci0_status.down);
+    }
+
+    pm->pci0_hotplug_enable = ~0;
+    pm->pci0_slot_device_present = 0;
+
+    QTAILQ_FOREACH_SAFE(kid, &bus->children, sibling, next) {
+        DeviceState *qdev = kid->child;
+        PCIDevice *pdev = PCI_DEVICE(qdev);
+        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pdev);
+        int slot = PCI_SLOT(pdev->devfn);
+
+        if (pc->no_hotplug) {
+            pm->pci0_hotplug_enable &= ~(1U << slot);
+        }
+
+        pm->pci0_slot_device_present |= (1U << slot);
+    }
+}
+
 static void pm_reset(void *opaque)
 {
     ICH9LPCPMRegs *pm = opaque;
@@ -300,6 +370,7 @@  static void pm_reset(void *opaque)
     }
 
     pm_update_sci(pm);
+    acpi_ich9_update_hotplug(pm);
 }
 
 static void pm_powerdown_req(Notifier *n, void *opaque)
@@ -309,6 +380,104 @@  static void pm_powerdown_req(Notifier *n, void *opaque)
     acpi_pm1_evt_power_down(&pm->acpi_regs);
 }
 
+static uint32_t pci_up_read(void *opaque, uint32_t addr)
+{
+    ICH9LPCPMRegs *pm = opaque;
+    uint32_t val;
+
+    /* Manufacture an "up" value to cause a device check on any hotplug
+     * slot with a device.  Extra device checks are harmless. */
+    val = pm->pci0_slot_device_present & pm->pci0_hotplug_enable;
+
+    ICH9_DEBUG("pci_up_read %x\n", val);
+    return val;
+}
+
+static uint32_t pci_down_read(void *opaque, uint32_t addr)
+{
+    ICH9LPCPMRegs *pm = opaque;
+    uint32_t val = pm->pci0_status.down;
+
+    ICH9_DEBUG("pci_down_read %x\n", val);
+    return val;
+}
+
+static uint32_t pci_features_read(void *opaque, uint32_t addr)
+{
+    /* No feature defined yet */
+    ICH9_DEBUG("pci_features_read %x\n", 0);
+    return 0;
+}
+
+static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    acpi_ich9_eject_slot(opaque, val);
+
+    ICH9_DEBUG("pciej write %x <== %d\n", addr, val);
+}
+
+static uint32_t pcirmv_read(void *opaque, uint32_t addr)
+{
+    ICH9LPCPMRegs *pm = opaque;
+
+    return pm->pci0_hotplug_enable;
+}
+
+static void enable_device(ICH9LPCPMRegs *pm, int slot)
+{
+    pm->acpi_regs.gpe.sts[0] |= ICH9_PCI_HOTPLUG_STATUS;
+    pm->pci0_slot_device_present |= (1U << slot);
+}
+
+static void disable_device(ICH9LPCPMRegs *pm, int slot)
+{
+    pm->acpi_regs.gpe.sts[0] |= ICH9_PCI_HOTPLUG_STATUS;
+    pm->pci0_status.down |= (1U << slot);
+}
+
+static int ich9_device_hotplug(DeviceState *qdev, PCIDevice *dev,
+                PCIHotplugState state)
+{
+    int slot = PCI_SLOT(dev->devfn);
+    ICH9LPCState *lpc = DO_UPCAST(ICH9LPCState, d,
+                                PCI_DEVICE(qdev));
+    ICH9LPCPMRegs *pm = &lpc->pm;
+
+    /* Don't send event when device is enabled during qemu machine creation:
+     * it is present on boot, no hotplug event is necessary. We do send an
+     * event when the device is disabled later. */
+    if (state == PCI_COLDPLUG_ENABLED) {
+        pm->pci0_slot_device_present |= (1U << slot);
+        return 0;
+    }
+
+    if (state == PCI_HOTPLUG_ENABLED) {
+        enable_device(pm, slot);
+    } else {
+        disable_device(pm, slot);
+    }
+
+    pm_update_sci(pm);
+
+    return 0;
+}
+
+static void ich9_acpi_system_hot_add_init(ICH9LPCPMRegs *s)
+{
+    ICH9LPCState *lpc = container_of(s, ICH9LPCState, pm);
+    PCIDevice *pdev = PCI_DEVICE(lpc);
+
+    register_ioport_read(PCI_UP_BASE, 4, 4, pci_up_read, s);
+    register_ioport_read(PCI_DOWN_BASE, 4, 4, pci_down_read, s);
+
+    register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, s);
+    register_ioport_read(PCI_EJ_BASE, 4, 4,  pci_features_read, s);
+
+    register_ioport_read(PCI_RMV_BASE, 4, 4,  pcirmv_read, s);
+
+    pci_bus_hotplug(pdev->bus, ich9_device_hotplug, &pdev->qdev);
+}
+
 void ich9_pm_init(ICH9LPCPMRegs *pm, qemu_irq sci_irq, qemu_irq cmos_s3)
 {
     acpi_pm_tmr_init(&pm->acpi_regs, ich9_pm_update_sci_fn);
@@ -319,4 +488,5 @@  void ich9_pm_init(ICH9LPCPMRegs *pm, qemu_irq sci_irq, qemu_irq cmos_s3)
     qemu_register_reset(pm_reset, pm);
     pm->powerdown_notifier.notify = pm_powerdown_req;
     qemu_register_powerdown_notifier(&pm->powerdown_notifier);
+    ich9_acpi_system_hot_add_init(pm);
 }
diff --git a/hw/acpi_ich9.h b/hw/acpi_ich9.h
index 180c406..b4e2aff 100644
--- a/hw/acpi_ich9.h
+++ b/hw/acpi_ich9.h
@@ -23,6 +23,11 @@ 
 
 #include "acpi.h"
 
+struct pci_status {
+    uint32_t up; /* deprecated, maintained for migration compatibility */
+    uint32_t down;
+};
+
 typedef struct ICH9LPCPMRegs {
     /*
      * In ich9 spec says that pm1_cnt register is 32bit width and
@@ -37,6 +42,11 @@  typedef struct ICH9LPCPMRegs {
 
     uint32_t pm_io_base;
     Notifier powerdown_notifier;
+
+    /* for pci hotplug */
+    struct pci_status pci0_status;
+    uint32_t pci0_hotplug_enable;
+    uint32_t pci0_slot_device_present;
 } ICH9LPCPMRegs;
 
 void ich9_pm_init(ICH9LPCPMRegs *pm,