Patchwork [15/15] xen: Add a Xen specific ACPI Implementation to target-xen

login
register
mail settings
Submitter Stefano Stabellini
Date Aug. 12, 2010, 2:10 p.m.
Message ID <1281622202-3453-15-git-send-email-stefano.stabellini@eu.citrix.com>
Download mbox | patch
Permalink /patch/61615/
State New
Headers show

Comments

Stefano Stabellini - Aug. 12, 2010, 2:10 p.m.
From: Anthony PERARD <anthony.perard@citrix.com>

Xen currently uses a different BIOS (hvmloader + rombios) therefore the
Qemu acpi_piix4 implementation wouldn't work correctly with Xen.
We plan on fixing this properly but at the moment we are just adding a
new Xen specific acpi_piix4 implementation.
This patch is optional; without it the VM boots but it cannot shutdown
properly or go to S3.

Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 Makefile.target     |    1 +
 hw/xen_acpi_piix4.c |  424 +++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/xen_common.h     |    3 +
 hw/xen_machine_fv.c |    6 +-
 4 files changed, 429 insertions(+), 5 deletions(-)
 create mode 100644 hw/xen_acpi_piix4.c
Blue Swirl - Aug. 12, 2010, 6:46 p.m.
On Thu, Aug 12, 2010 at 2:10 PM,  <stefano.stabellini@eu.citrix.com> wrote:
> From: Anthony PERARD <anthony.perard@citrix.com>
>
> Xen currently uses a different BIOS (hvmloader + rombios) therefore the
> Qemu acpi_piix4 implementation wouldn't work correctly with Xen.
> We plan on fixing this properly but at the moment we are just adding a
> new Xen specific acpi_piix4 implementation.

I'd suppose the proper fix is to modify acpi_piix4 instead of copy&paste.

> This patch is optional; without it the VM boots but it cannot shutdown
> properly or go to S3.
>
> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
>  Makefile.target     |    1 +
>  hw/xen_acpi_piix4.c |  424 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/xen_common.h     |    3 +
>  hw/xen_machine_fv.c |    6 +-
>  4 files changed, 429 insertions(+), 5 deletions(-)
>  create mode 100644 hw/xen_acpi_piix4.c
>
> diff --git a/Makefile.target b/Makefile.target
> index 1984cdd..a2d9217 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -325,6 +325,7 @@ obj-xen-y += piix_pci.o
>  obj-xen-y += mc146818rtc.o
>  obj-xen-y += xenstore.o
>  obj-xen-y += xen_platform.o
> +obj-xen-y += xen_acpi_piix4.o
>
>  obj-xen-y += xen_mapcache.o
>  obj-xen-y += stub-functions.o
> diff --git a/hw/xen_acpi_piix4.c b/hw/xen_acpi_piix4.c
> new file mode 100644
> index 0000000..3c65963
> --- /dev/null
> +++ b/hw/xen_acpi_piix4.c
> @@ -0,0 +1,424 @@
> + /*
> + * PIIX4 ACPI controller emulation
> + *
> + * Winston liwen Wang, winston.l.wang@intel.com
> + * Copyright (c) 2006 , Intel Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "hw.h"
> +#include "pc.h"
> +#include "pci.h"
> +#include "sysemu.h"
> +#include "acpi.h"
> +
> +#include "xen_backend.h"
> +#include "xen_common.h"
> +#include "qemu-log.h"
> +
> +#include <xen/hvm/ioreq.h>
> +#include <xen/hvm/params.h>
> +
> +#define PIIX4ACPI_LOG_ERROR 0
> +#define PIIX4ACPI_LOG_INFO 1
> +#define PIIX4ACPI_LOG_DEBUG 2
> +#define PIIX4ACPI_LOGLEVEL PIIX4ACPI_LOG_INFO
> +#define PIIX4ACPI_LOG(level, fmt, ...) do { if (level <= PIIX4ACPI_LOGLEVEL) qemu_log(fmt, ## __VA_ARGS__); } while (0)
> +
> +/* Sleep state type codes as defined by the \_Sx objects in the DSDT. */
> +/* These must be kept in sync with the DSDT (hvmloader/acpi/dsdt.asl) */
> +#define SLP_TYP_S4        (6 << 10)
> +#define SLP_TYP_S3        (5 << 10)
> +#define SLP_TYP_S5        (7 << 10)
> +
> +#define ACPI_DBG_IO_ADDR  0xb044
> +#define ACPI_PHP_IO_ADDR  0x10c0
> +
> +#define PHP_EVT_ADD     0x0
> +#define PHP_EVT_REMOVE  0x3
> +
> +/* The bit in GPE0_STS/EN to notify the pci hotplug event */
> +#define ACPI_PHP_GPE_BIT 3
> +
> +#define DEVFN_TO_PHP_SLOT_REG(devfn) (devfn >> 1)
> +#define PHP_SLOT_REG_TO_DEVFN(reg, hilo) ((reg << 1) | hilo)
> +
> +/* ioport to monitor cpu add/remove status */
> +#define PROC_BASE 0xaf00
> +
> +typedef struct PCIAcpiState {
> +    PCIDevice dev;
> +    uint16_t pm1_control; /* pm1a_ECNT_BLK */
> +    qemu_irq irq;
> +    qemu_irq cmos_s3;
> +} PCIAcpiState;
> +
> +typedef struct GPEState {
> +    /* GPE0 block */
> +    uint8_t gpe0_sts[ACPI_GPE0_BLK_LEN / 2];
> +    uint8_t gpe0_en[ACPI_GPE0_BLK_LEN / 2];
> +
> +    /* CPU bitmap */
> +    uint8_t cpus_sts[32];
> +
> +    /* SCI IRQ level */
> +    uint8_t sci_asserted;
> +
> +} GPEState;
> +
> +static GPEState gpe_state;
> +
> +static qemu_irq sci_irq;
> +
> +typedef struct AcpiDeviceState AcpiDeviceState;
> +AcpiDeviceState *acpi_device_table;
> +
> +static const VMStateDescription vmstate_acpi = {
> +    .name = "PIIX4 ACPI",
> +    .version_id = 1,
> +    .fields      = (VMStateField []) {
> +        VMSTATE_PCI_DEVICE(dev, PCIAcpiState),
> +        VMSTATE_UINT16(pm1_control, PCIAcpiState),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static void acpiPm1Control_writeb(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    PCIAcpiState *s = opaque;
> +    s->pm1_control = (s->pm1_control & 0xff00) | (val & 0xff);
> +}
> +
> +static uint32_t acpiPm1Control_readb(void *opaque, uint32_t addr)
> +{
> +    PCIAcpiState *s = opaque;
> +    /* Mask out the write-only bits */
> +    return (uint8_t)(s->pm1_control & ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE));
> +}
> +
> +static void acpi_shutdown(PCIAcpiState *s, uint32_t val)
> +{
> +    if (!(val & ACPI_BITMASK_SLEEP_ENABLE))
> +        return;
> +
> +    switch (val & ACPI_BITMASK_SLEEP_TYPE) {
> +    case SLP_TYP_S3:
> +        qemu_system_reset();
> +        qemu_irq_raise(s->cmos_s3);
> +        xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
> +        break;
> +    case SLP_TYP_S4:
> +    case SLP_TYP_S5:
> +        qemu_system_shutdown_request();
> +        break;
> +    default:
> +        break;
> +    }
> +}
> +
> +static void acpiPm1ControlP1_writeb(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    PCIAcpiState *s = opaque;
> +
> +    val <<= 8;
> +    s->pm1_control = ((s->pm1_control & 0xff) | val) & ~ACPI_BITMASK_SLEEP_ENABLE;
> +
> +    acpi_shutdown(s, val);
> +}
> +
> +static uint32_t acpiPm1ControlP1_readb(void *opaque, uint32_t addr)
> +{
> +    PCIAcpiState *s = opaque;
> +    /* Mask out the write-only bits */
> +    return (uint8_t)((s->pm1_control & ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE)) >> 8);
> +}
> +
> +static void acpiPm1Control_writew(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    PCIAcpiState *s = opaque;
> +
> +    s->pm1_control = val & ~ACPI_BITMASK_SLEEP_ENABLE;
> +
> +    acpi_shutdown(s, val);
> +}
> +
> +static uint32_t acpiPm1Control_readw(void *opaque, uint32_t addr)
> +{
> +    PCIAcpiState *s = opaque;
> +    /* Mask out the write-only bits */
> +    return (s->pm1_control & ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE));
> +}
> +
> +static void acpi_map(PCIDevice *pci_dev, int region_num,
> +                     uint32_t addr, uint32_t size, int type)
> +{
> +    PCIAcpiState *d = (PCIAcpiState *)pci_dev;
> +
> +    /* Byte access */
> +    register_ioport_write(addr + 4, 1, 1, acpiPm1Control_writeb, d);
> +    register_ioport_read(addr + 4, 1, 1, acpiPm1Control_readb, d);
> +    register_ioport_write(addr + 4 + 1, 1, 1, acpiPm1ControlP1_writeb, d);
> +    register_ioport_read(addr + 4 +1, 1, 1, acpiPm1ControlP1_readb, d);
> +
> +    /* Word access */
> +    register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d);
> +    register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d);
> +}
> +
> +static inline int test_bit(uint8_t *map, int bit)
> +{
> +    return ( map[bit / 8] & (1 << (bit % 8)) );
> +}
> +
> +static inline void set_bit(uint8_t *map, int bit)
> +{
> +    map[bit / 8] |= (1 << (bit % 8));
> +}
> +
> +static inline void clear_bit(uint8_t *map, int bit)
> +{
> +    map[bit / 8] &= ~(1 << (bit % 8));
> +}
> +
> +static void acpi_dbg_writel(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "ACPI: DBG: 0x%08x\n", val);
> +    PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "ACPI:debug: write addr=0x%x, val=0x%x.\n", addr, val);
> +}
> +
> +/* GPEx_STS occupy 1st half of the block, while GPEx_EN 2nd half */
> +static uint32_t gpe_sts_read(void *opaque, uint32_t addr)
> +{
> +    GPEState *s = opaque;
> +
> +    return s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS];
> +}
> +
> +/* write 1 to clear specific GPE bits */
> +static void gpe_sts_write(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    GPEState *s = opaque;
> +    int hotplugged = 0;
> +
> +    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_sts_write: addr=0x%x, val=0x%x.\n", addr, val);
> +
> +    hotplugged = test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT);
> +    s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS] &= ~val;
> +    if ( s->sci_asserted &&
> +         hotplugged &&
> +         !test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT)) {
> +        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "Clear the GPE0_STS bit for ACPI hotplug & deassert the IRQ.\n");
> +        qemu_irq_lower(sci_irq);
> +    }
> +
> +}
> +
> +static uint32_t gpe_en_read(void *opaque, uint32_t addr)
> +{
> +    GPEState *s = opaque;
> +
> +    return s->gpe0_en[addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2)];
> +}
> +
> +/* write 0 to clear en bit */
> +static void gpe_en_write(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    GPEState *s = opaque;
> +    int reg_count;
> +
> +    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_en_write: addr=0x%x, val=0x%x.\n", addr, val);
> +    reg_count = addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2);
> +    s->gpe0_en[reg_count] = val;
> +    /* If disable GPE bit right after generating SCI on it,
> +     * need deassert the intr to avoid redundant intrs
> +     */
> +    if ( s->sci_asserted &&
> +         reg_count == (ACPI_PHP_GPE_BIT / 8) &&
> +         !(val & (1 << (ACPI_PHP_GPE_BIT % 8))) ) {
> +        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "deassert due to disable GPE bit.\n");
> +        s->sci_asserted = 0;
> +        qemu_irq_lower(sci_irq);
> +    }
> +
> +}
> +
> +static void gpe_save(QEMUFile* f, void* opaque)
> +{
> +    GPEState *s = (GPEState*)opaque;
> +    int i;
> +
> +    for ( i = 0; i < ACPI_GPE0_BLK_LEN / 2; i++ ) {
> +        qemu_put_8s(f, &s->gpe0_sts[i]);
> +        qemu_put_8s(f, &s->gpe0_en[i]);
> +    }
> +
> +    qemu_put_8s(f, &s->sci_asserted);
> +    if ( s->sci_asserted ) {
> +        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "gpe_save with sci asserted!\n");
> +    }
> +}
> +
> +static int gpe_load(QEMUFile* f, void* opaque, int version_id)
> +{
> +    GPEState *s = (GPEState*)opaque;
> +    int i;
> +    if (version_id != 1)
> +        return -EINVAL;
> +
> +    for ( i = 0; i < ACPI_GPE0_BLK_LEN / 2; i++ ) {
> +        qemu_get_8s(f, &s->gpe0_sts[i]);
> +        qemu_get_8s(f, &s->gpe0_en[i]);
> +    }
> +
> +    qemu_get_8s(f, &s->sci_asserted);
> +    return 0;
> +}
> +
> +static uint32_t gpe_cpus_readb(void *opaque, uint32_t addr)
> +{
> +    uint32_t val = 0;
> +    GPEState *g = opaque;
> +
> +    switch (addr) {
> +        case PROC_BASE ... PROC_BASE+31:
> +            val = g->cpus_sts[addr - PROC_BASE];
> +        default:
> +            break;
> +    }
> +
> +    return val;
> +}
> +
> +static void gpe_cpus_writeb(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    /* GPEState *g = opaque; */
> +
> +    switch (addr) {
> +        case PROC_BASE ... PROC_BASE + 31:
> +            /* don't allow to change cpus_sts from inside a guest */
> +            break;
> +        default:
> +            break;
> +    }
> +}
> +
> +static void gpe_acpi_init(void)
> +{
> +    GPEState *s = &gpe_state;
> +    memset(s, 0, sizeof(GPEState));
> +
> +    s->cpus_sts[0] = 1;
> +
> +    register_ioport_read(PROC_BASE, 32, 1,  gpe_cpus_readb, s);
> +    register_ioport_write(PROC_BASE, 32, 1, gpe_cpus_writeb, s);
> +
> +    register_ioport_read(ACPI_GPE0_BLK_ADDRESS,
> +                         ACPI_GPE0_BLK_LEN / 2,
> +                         1,
> +                         gpe_sts_read,
> +                         s);
> +    register_ioport_read(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2,
> +                         ACPI_GPE0_BLK_LEN / 2,
> +                         1,
> +                         gpe_en_read,
> +                         s);
> +
> +    register_ioport_write(ACPI_GPE0_BLK_ADDRESS,
> +                          ACPI_GPE0_BLK_LEN / 2,
> +                          1,
> +                          gpe_sts_write,
> +                          s);
> +    register_ioport_write(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2,
> +                          ACPI_GPE0_BLK_LEN / 2,
> +                          1,
> +                          gpe_en_write,
> +                          s);
> +
> +    register_savevm(NULL, "gpe", 0, 1, gpe_save, gpe_load, s);
> +}
> +
> +static int piix4_pm_xen_initfn(PCIDevice *dev)
> +{
> +    PCIAcpiState *s = DO_UPCAST(PCIAcpiState, dev, dev);
> +    uint8_t *pci_conf;
> +
> +    pci_conf = s->dev.config;
> +    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
> +    pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_3);
> +    pci_conf[0x08] = 0x01;  /* B0 stepping */
> +    pci_conf[0x09] = 0x00;  /* base class */
> +    pci_config_set_class(pci_conf, PCI_CLASS_BRIDGE_OTHER);
> +    pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; /* header_type */
> +    pci_conf[0x3d] = 0x01;  /* Hardwired to PIRQA is used */
> +
> +    /* PMBA POWER MANAGEMENT BASE ADDRESS, hardcoded to 0x1f40
> +     * to make shutdown work for IPF, due to IPF Guest Firmware
> +     * will enumerate pci devices.
> +     *
> +     * TODO:  if Guest Firmware or Guest OS will change this PMBA,
> +     * More logic will be added.
> +     */
> +    pci_conf[0x40] = 0x41; /* Special device-specific BAR at 0x40 */
> +    pci_conf[0x41] = 0x1f;
> +    pci_conf[0x42] = 0x00;
> +    pci_conf[0x43] = 0x00;
> +
> +    s->pm1_control = ACPI_BITMASK_SCI_ENABLE;
> +
> +    acpi_map((PCIDevice *)s, 0, 0x1f40, 0x10, PCI_BASE_ADDRESS_SPACE_IO);
> +
> +    gpe_acpi_init();
> +
> +    register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s);
> +
> +    return 0;
> +}
> +
> +void piix4_pm_xen_init(PCIBus *bus, int devfn, qemu_irq sci_irq_spec, qemu_irq cmos_s3)
> +{
> +    PCIDevice *dev;
> +    PCIAcpiState *s;
> +
> +    sci_irq = sci_irq_spec;
> +
> +    dev = pci_create(bus, devfn, "PIIX4 ACPI");
> +
> +    s = DO_UPCAST(PCIAcpiState, dev, dev);
> +
> +    s->irq = sci_irq_spec;
> +    s->cmos_s3 = cmos_s3;
> +
> +    qdev_init_nofail(&dev->qdev);
> +}
> +
> +static PCIDeviceInfo piix4_pm_xen_info = {
> +    .qdev.name    = "PIIX4 ACPI",
> +    .qdev.desc    = "dm",
> +    .qdev.size    = sizeof(PCIAcpiState),
> +    .qdev.vmsd    = &vmstate_acpi,
> +    .init         = piix4_pm_xen_initfn,
> +};
> +
> +static void piix4_pm_xen_register(void)
> +{
> +    pci_qdev_register(&piix4_pm_xen_info);
> +}
> +
> +device_init(piix4_pm_xen_register);
> diff --git a/hw/xen_common.h b/hw/xen_common.h
> index 020fdd7..e1f07ba 100644
> --- a/hw/xen_common.h
> +++ b/hw/xen_common.h
> @@ -34,4 +34,7 @@
>  /* hw/i8259-xen-stub.c */
>  qemu_irq *i8259_xen_init(void);
>
> +/* hw/xen_acpi_piix4.c */
> +void piix4_pm_xen_init(PCIBus *bus, int devfn, qemu_irq sci_irq_spec, qemu_irq cmos_s3);
> +
>  #endif /* QEMU_HW_XEN_COMMON_H */
> diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c
> index 77563db..bfda944 100644
> --- a/hw/xen_machine_fv.c
> +++ b/hw/xen_machine_fv.c
> @@ -92,7 +92,6 @@ static void xen_init_fv(ram_addr_t ram_size,
>     qemu_irq *isa_irq;
>     qemu_irq *i8259;
>     qemu_irq *cmos_s3;
> -    qemu_irq *smi_irq;
>     IsaIrqState *isa_irq_state;
>     DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
>     FDCtrl *floppy_controller;
> @@ -208,10 +207,7 @@ static void xen_init_fv(ram_addr_t ram_size,
>
>     if (acpi_enabled) {
>         cmos_s3 = qemu_allocate_irqs(pc_cmos_set_s3_resume, rtc_state, 1);
> -        smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1);
> -        piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
> -                isa_reserve_irq(9), *cmos_s3, *smi_irq,
> -                0);
> +        piix4_pm_xen_init(pci_bus, piix3_devfn + 3, isa_reserve_irq(9), *cmos_s3);
>     }
>
>     if (i440fx_state) {
> --
> 1.7.0.4
>
>
>
Stefano Stabellini - Aug. 13, 2010, 1:10 p.m.
On Thu, 12 Aug 2010, Blue Swirl wrote:
> On Thu, Aug 12, 2010 at 2:10 PM,  <stefano.stabellini@eu.citrix.com> wrote:
> > From: Anthony PERARD <anthony.perard@citrix.com>
> >
> > Xen currently uses a different BIOS (hvmloader + rombios) therefore the
> > Qemu acpi_piix4 implementation wouldn't work correctly with Xen.
> > We plan on fixing this properly but at the moment we are just adding a
> > new Xen specific acpi_piix4 implementation.
> 
> I'd suppose the proper fix is to modify acpi_piix4 instead of copy&paste.
> 

Yes, but we would need few xen specific hooks in acpi_piix4 to make the S
state transitions work correctly; if you are OK with it we'll proceed
in that direction.
Anthony Liguori - Aug. 13, 2010, 6:57 p.m.
On 08/12/2010 09:10 AM, stefano.stabellini@eu.citrix.com wrote:
> From: Anthony PERARD<anthony.perard@citrix.com>
>
> Xen currently uses a different BIOS (hvmloader + rombios) therefore the
> Qemu acpi_piix4 implementation wouldn't work correctly with Xen.
> We plan on fixing this properly but at the moment we are just adding a
> new Xen specific acpi_piix4 implementation.
> This patch is optional; without it the VM boots but it cannot shutdown
> properly or go to S3.
>    

What's the long term plan?  Will Xen adopt SeaBIOS or will you adapt 
your BIOS to cope with our ACPI implementation?

Regards,

Anthony Liguori

> Signed-off-by: Anthony PERARD<anthony.perard@citrix.com>
> Signed-off-by: Stefano Stabellini<stefano.stabellini@eu.citrix.com>
> ---
>   Makefile.target     |    1 +
>   hw/xen_acpi_piix4.c |  424 +++++++++++++++++++++++++++++++++++++++++++++++++++
>   hw/xen_common.h     |    3 +
>   hw/xen_machine_fv.c |    6 +-
>   4 files changed, 429 insertions(+), 5 deletions(-)
>   create mode 100644 hw/xen_acpi_piix4.c
>
> diff --git a/Makefile.target b/Makefile.target
> index 1984cdd..a2d9217 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -325,6 +325,7 @@ obj-xen-y += piix_pci.o
>   obj-xen-y += mc146818rtc.o
>   obj-xen-y += xenstore.o
>   obj-xen-y += xen_platform.o
> +obj-xen-y += xen_acpi_piix4.o
>
>   obj-xen-y += xen_mapcache.o
>   obj-xen-y += stub-functions.o
> diff --git a/hw/xen_acpi_piix4.c b/hw/xen_acpi_piix4.c
> new file mode 100644
> index 0000000..3c65963
> --- /dev/null
> +++ b/hw/xen_acpi_piix4.c
> @@ -0,0 +1,424 @@
> + /*
> + * PIIX4 ACPI controller emulation
> + *
> + * Winston liwen Wang, winston.l.wang@intel.com
> + * Copyright (c) 2006 , Intel Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "hw.h"
> +#include "pc.h"
> +#include "pci.h"
> +#include "sysemu.h"
> +#include "acpi.h"
> +
> +#include "xen_backend.h"
> +#include "xen_common.h"
> +#include "qemu-log.h"
> +
> +#include<xen/hvm/ioreq.h>
> +#include<xen/hvm/params.h>
> +
> +#define PIIX4ACPI_LOG_ERROR 0
> +#define PIIX4ACPI_LOG_INFO 1
> +#define PIIX4ACPI_LOG_DEBUG 2
> +#define PIIX4ACPI_LOGLEVEL PIIX4ACPI_LOG_INFO
> +#define PIIX4ACPI_LOG(level, fmt, ...) do { if (level<= PIIX4ACPI_LOGLEVEL) qemu_log(fmt, ## __VA_ARGS__); } while (0)
> +
> +/* Sleep state type codes as defined by the \_Sx objects in the DSDT. */
> +/* These must be kept in sync with the DSDT (hvmloader/acpi/dsdt.asl) */
> +#define SLP_TYP_S4        (6<<  10)
> +#define SLP_TYP_S3        (5<<  10)
> +#define SLP_TYP_S5        (7<<  10)
> +
> +#define ACPI_DBG_IO_ADDR  0xb044
> +#define ACPI_PHP_IO_ADDR  0x10c0
> +
> +#define PHP_EVT_ADD     0x0
> +#define PHP_EVT_REMOVE  0x3
> +
> +/* The bit in GPE0_STS/EN to notify the pci hotplug event */
> +#define ACPI_PHP_GPE_BIT 3
> +
> +#define DEVFN_TO_PHP_SLOT_REG(devfn) (devfn>>  1)
> +#define PHP_SLOT_REG_TO_DEVFN(reg, hilo) ((reg<<  1) | hilo)
> +
> +/* ioport to monitor cpu add/remove status */
> +#define PROC_BASE 0xaf00
> +
> +typedef struct PCIAcpiState {
> +    PCIDevice dev;
> +    uint16_t pm1_control; /* pm1a_ECNT_BLK */
> +    qemu_irq irq;
> +    qemu_irq cmos_s3;
> +} PCIAcpiState;
> +
> +typedef struct GPEState {
> +    /* GPE0 block */
> +    uint8_t gpe0_sts[ACPI_GPE0_BLK_LEN / 2];
> +    uint8_t gpe0_en[ACPI_GPE0_BLK_LEN / 2];
> +
> +    /* CPU bitmap */
> +    uint8_t cpus_sts[32];
> +
> +    /* SCI IRQ level */
> +    uint8_t sci_asserted;
> +
> +} GPEState;
> +
> +static GPEState gpe_state;
> +
> +static qemu_irq sci_irq;
> +
> +typedef struct AcpiDeviceState AcpiDeviceState;
> +AcpiDeviceState *acpi_device_table;
> +
> +static const VMStateDescription vmstate_acpi = {
> +    .name = "PIIX4 ACPI",
> +    .version_id = 1,
> +    .fields      = (VMStateField []) {
> +        VMSTATE_PCI_DEVICE(dev, PCIAcpiState),
> +        VMSTATE_UINT16(pm1_control, PCIAcpiState),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static void acpiPm1Control_writeb(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    PCIAcpiState *s = opaque;
> +    s->pm1_control = (s->pm1_control&  0xff00) | (val&  0xff);
> +}
> +
> +static uint32_t acpiPm1Control_readb(void *opaque, uint32_t addr)
> +{
> +    PCIAcpiState *s = opaque;
> +    /* Mask out the write-only bits */
> +    return (uint8_t)(s->pm1_control&  ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE));
> +}
> +
> +static void acpi_shutdown(PCIAcpiState *s, uint32_t val)
> +{
> +    if (!(val&  ACPI_BITMASK_SLEEP_ENABLE))
> +        return;
> +
> +    switch (val&  ACPI_BITMASK_SLEEP_TYPE) {
> +    case SLP_TYP_S3:
> +        qemu_system_reset();
> +        qemu_irq_raise(s->cmos_s3);
> +        xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
> +        break;
> +    case SLP_TYP_S4:
> +    case SLP_TYP_S5:
> +        qemu_system_shutdown_request();
> +        break;
> +    default:
> +        break;
> +    }
> +}
> +
> +static void acpiPm1ControlP1_writeb(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    PCIAcpiState *s = opaque;
> +
> +    val<<= 8;
> +    s->pm1_control = ((s->pm1_control&  0xff) | val)&  ~ACPI_BITMASK_SLEEP_ENABLE;
> +
> +    acpi_shutdown(s, val);
> +}
> +
> +static uint32_t acpiPm1ControlP1_readb(void *opaque, uint32_t addr)
> +{
> +    PCIAcpiState *s = opaque;
> +    /* Mask out the write-only bits */
> +    return (uint8_t)((s->pm1_control&  ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE))>>  8);
> +}
> +
> +static void acpiPm1Control_writew(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    PCIAcpiState *s = opaque;
> +
> +    s->pm1_control = val&  ~ACPI_BITMASK_SLEEP_ENABLE;
> +
> +    acpi_shutdown(s, val);
> +}
> +
> +static uint32_t acpiPm1Control_readw(void *opaque, uint32_t addr)
> +{
> +    PCIAcpiState *s = opaque;
> +    /* Mask out the write-only bits */
> +    return (s->pm1_control&  ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE));
> +}
> +
> +static void acpi_map(PCIDevice *pci_dev, int region_num,
> +                     uint32_t addr, uint32_t size, int type)
> +{
> +    PCIAcpiState *d = (PCIAcpiState *)pci_dev;
> +
> +    /* Byte access */
> +    register_ioport_write(addr + 4, 1, 1, acpiPm1Control_writeb, d);
> +    register_ioport_read(addr + 4, 1, 1, acpiPm1Control_readb, d);
> +    register_ioport_write(addr + 4 + 1, 1, 1, acpiPm1ControlP1_writeb, d);
> +    register_ioport_read(addr + 4 +1, 1, 1, acpiPm1ControlP1_readb, d);
> +
> +    /* Word access */
> +    register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d);
> +    register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d);
> +}
> +
> +static inline int test_bit(uint8_t *map, int bit)
> +{
> +    return ( map[bit / 8]&  (1<<  (bit % 8)) );
> +}
> +
> +static inline void set_bit(uint8_t *map, int bit)
> +{
> +    map[bit / 8] |= (1<<  (bit % 8));
> +}
> +
> +static inline void clear_bit(uint8_t *map, int bit)
> +{
> +    map[bit / 8]&= ~(1<<  (bit % 8));
> +}
> +
> +static void acpi_dbg_writel(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "ACPI: DBG: 0x%08x\n", val);
> +    PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "ACPI:debug: write addr=0x%x, val=0x%x.\n", addr, val);
> +}
> +
> +/* GPEx_STS occupy 1st half of the block, while GPEx_EN 2nd half */
> +static uint32_t gpe_sts_read(void *opaque, uint32_t addr)
> +{
> +    GPEState *s = opaque;
> +
> +    return s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS];
> +}
> +
> +/* write 1 to clear specific GPE bits */
> +static void gpe_sts_write(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    GPEState *s = opaque;
> +    int hotplugged = 0;
> +
> +    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_sts_write: addr=0x%x, val=0x%x.\n", addr, val);
> +
> +    hotplugged = test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT);
> +    s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS]&= ~val;
> +    if ( s->sci_asserted&&
> +         hotplugged&&
> +         !test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT)) {
> +        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "Clear the GPE0_STS bit for ACPI hotplug&  deassert the IRQ.\n");
> +        qemu_irq_lower(sci_irq);
> +    }
> +
> +}
> +
> +static uint32_t gpe_en_read(void *opaque, uint32_t addr)
> +{
> +    GPEState *s = opaque;
> +
> +    return s->gpe0_en[addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2)];
> +}
> +
> +/* write 0 to clear en bit */
> +static void gpe_en_write(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    GPEState *s = opaque;
> +    int reg_count;
> +
> +    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_en_write: addr=0x%x, val=0x%x.\n", addr, val);
> +    reg_count = addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2);
> +    s->gpe0_en[reg_count] = val;
> +    /* If disable GPE bit right after generating SCI on it,
> +     * need deassert the intr to avoid redundant intrs
> +     */
> +    if ( s->sci_asserted&&
> +         reg_count == (ACPI_PHP_GPE_BIT / 8)&&
> +         !(val&  (1<<  (ACPI_PHP_GPE_BIT % 8))) ) {
> +        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "deassert due to disable GPE bit.\n");
> +        s->sci_asserted = 0;
> +        qemu_irq_lower(sci_irq);
> +    }
> +
> +}
> +
> +static void gpe_save(QEMUFile* f, void* opaque)
> +{
> +    GPEState *s = (GPEState*)opaque;
> +    int i;
> +
> +    for ( i = 0; i<  ACPI_GPE0_BLK_LEN / 2; i++ ) {
> +        qemu_put_8s(f,&s->gpe0_sts[i]);
> +        qemu_put_8s(f,&s->gpe0_en[i]);
> +    }
> +
> +    qemu_put_8s(f,&s->sci_asserted);
> +    if ( s->sci_asserted ) {
> +        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "gpe_save with sci asserted!\n");
> +    }
> +}
> +
> +static int gpe_load(QEMUFile* f, void* opaque, int version_id)
> +{
> +    GPEState *s = (GPEState*)opaque;
> +    int i;
> +    if (version_id != 1)
> +        return -EINVAL;
> +
> +    for ( i = 0; i<  ACPI_GPE0_BLK_LEN / 2; i++ ) {
> +        qemu_get_8s(f,&s->gpe0_sts[i]);
> +        qemu_get_8s(f,&s->gpe0_en[i]);
> +    }
> +
> +    qemu_get_8s(f,&s->sci_asserted);
> +    return 0;
> +}
> +
> +static uint32_t gpe_cpus_readb(void *opaque, uint32_t addr)
> +{
> +    uint32_t val = 0;
> +    GPEState *g = opaque;
> +
> +    switch (addr) {
> +        case PROC_BASE ... PROC_BASE+31:
> +            val = g->cpus_sts[addr - PROC_BASE];
> +        default:
> +            break;
> +    }
> +
> +    return val;
> +}
> +
> +static void gpe_cpus_writeb(void *opaque, uint32_t addr, uint32_t val)
> +{
> +    /* GPEState *g = opaque; */
> +
> +    switch (addr) {
> +        case PROC_BASE ... PROC_BASE + 31:
> +            /* don't allow to change cpus_sts from inside a guest */
> +            break;
> +        default:
> +            break;
> +    }
> +}
> +
> +static void gpe_acpi_init(void)
> +{
> +    GPEState *s =&gpe_state;
> +    memset(s, 0, sizeof(GPEState));
> +
> +    s->cpus_sts[0] = 1;
> +
> +    register_ioport_read(PROC_BASE, 32, 1,  gpe_cpus_readb, s);
> +    register_ioport_write(PROC_BASE, 32, 1, gpe_cpus_writeb, s);
> +
> +    register_ioport_read(ACPI_GPE0_BLK_ADDRESS,
> +                         ACPI_GPE0_BLK_LEN / 2,
> +                         1,
> +                         gpe_sts_read,
> +                         s);
> +    register_ioport_read(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2,
> +                         ACPI_GPE0_BLK_LEN / 2,
> +                         1,
> +                         gpe_en_read,
> +                         s);
> +
> +    register_ioport_write(ACPI_GPE0_BLK_ADDRESS,
> +                          ACPI_GPE0_BLK_LEN / 2,
> +                          1,
> +                          gpe_sts_write,
> +                          s);
> +    register_ioport_write(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2,
> +                          ACPI_GPE0_BLK_LEN / 2,
> +                          1,
> +                          gpe_en_write,
> +                          s);
> +
> +    register_savevm(NULL, "gpe", 0, 1, gpe_save, gpe_load, s);
> +}
> +
> +static int piix4_pm_xen_initfn(PCIDevice *dev)
> +{
> +    PCIAcpiState *s = DO_UPCAST(PCIAcpiState, dev, dev);
> +    uint8_t *pci_conf;
> +
> +    pci_conf = s->dev.config;
> +    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
> +    pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_3);
> +    pci_conf[0x08] = 0x01;  /* B0 stepping */
> +    pci_conf[0x09] = 0x00;  /* base class */
> +    pci_config_set_class(pci_conf, PCI_CLASS_BRIDGE_OTHER);
> +    pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; /* header_type */
> +    pci_conf[0x3d] = 0x01;  /* Hardwired to PIRQA is used */
> +
> +    /* PMBA POWER MANAGEMENT BASE ADDRESS, hardcoded to 0x1f40
> +     * to make shutdown work for IPF, due to IPF Guest Firmware
> +     * will enumerate pci devices.
> +     *
> +     * TODO:  if Guest Firmware or Guest OS will change this PMBA,
> +     * More logic will be added.
> +     */
> +    pci_conf[0x40] = 0x41; /* Special device-specific BAR at 0x40 */
> +    pci_conf[0x41] = 0x1f;
> +    pci_conf[0x42] = 0x00;
> +    pci_conf[0x43] = 0x00;
> +
> +    s->pm1_control = ACPI_BITMASK_SCI_ENABLE;
> +
> +    acpi_map((PCIDevice *)s, 0, 0x1f40, 0x10, PCI_BASE_ADDRESS_SPACE_IO);
> +
> +    gpe_acpi_init();
> +
> +    register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s);
> +
> +    return 0;
> +}
> +
> +void piix4_pm_xen_init(PCIBus *bus, int devfn, qemu_irq sci_irq_spec, qemu_irq cmos_s3)
> +{
> +    PCIDevice *dev;
> +    PCIAcpiState *s;
> +
> +    sci_irq = sci_irq_spec;
> +
> +    dev = pci_create(bus, devfn, "PIIX4 ACPI");
> +
> +    s = DO_UPCAST(PCIAcpiState, dev, dev);
> +
> +    s->irq = sci_irq_spec;
> +    s->cmos_s3 = cmos_s3;
> +
> +    qdev_init_nofail(&dev->qdev);
> +}
> +
> +static PCIDeviceInfo piix4_pm_xen_info = {
> +    .qdev.name    = "PIIX4 ACPI",
> +    .qdev.desc    = "dm",
> +    .qdev.size    = sizeof(PCIAcpiState),
> +    .qdev.vmsd    =&vmstate_acpi,
> +    .init         = piix4_pm_xen_initfn,
> +};
> +
> +static void piix4_pm_xen_register(void)
> +{
> +    pci_qdev_register(&piix4_pm_xen_info);
> +}
> +
> +device_init(piix4_pm_xen_register);
> diff --git a/hw/xen_common.h b/hw/xen_common.h
> index 020fdd7..e1f07ba 100644
> --- a/hw/xen_common.h
> +++ b/hw/xen_common.h
> @@ -34,4 +34,7 @@
>   /* hw/i8259-xen-stub.c */
>   qemu_irq *i8259_xen_init(void);
>
> +/* hw/xen_acpi_piix4.c */
> +void piix4_pm_xen_init(PCIBus *bus, int devfn, qemu_irq sci_irq_spec, qemu_irq cmos_s3);
> +
>   #endif /* QEMU_HW_XEN_COMMON_H */
> diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c
> index 77563db..bfda944 100644
> --- a/hw/xen_machine_fv.c
> +++ b/hw/xen_machine_fv.c
> @@ -92,7 +92,6 @@ static void xen_init_fv(ram_addr_t ram_size,
>       qemu_irq *isa_irq;
>       qemu_irq *i8259;
>       qemu_irq *cmos_s3;
> -    qemu_irq *smi_irq;
>       IsaIrqState *isa_irq_state;
>       DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
>       FDCtrl *floppy_controller;
> @@ -208,10 +207,7 @@ static void xen_init_fv(ram_addr_t ram_size,
>
>       if (acpi_enabled) {
>           cmos_s3 = qemu_allocate_irqs(pc_cmos_set_s3_resume, rtc_state, 1);
> -        smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1);
> -        piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
> -                isa_reserve_irq(9), *cmos_s3, *smi_irq,
> -                0);
> +        piix4_pm_xen_init(pci_bus, piix3_devfn + 3, isa_reserve_irq(9), *cmos_s3);
>       }
>
>       if (i440fx_state) {
>
Stefano Stabellini - Aug. 13, 2010, 7:37 p.m.
On Fri, 13 Aug 2010, Anthony Liguori wrote:
> On 08/12/2010 09:10 AM, stefano.stabellini@eu.citrix.com wrote:
> > From: Anthony PERARD<anthony.perard@citrix.com>
> >
> > Xen currently uses a different BIOS (hvmloader + rombios) therefore the
> > Qemu acpi_piix4 implementation wouldn't work correctly with Xen.
> > We plan on fixing this properly but at the moment we are just adding a
> > new Xen specific acpi_piix4 implementation.
> > This patch is optional; without it the VM boots but it cannot shutdown
> > properly or go to S3.
> >
> 
> What's the long term plan?  Will Xen adopt SeaBIOS or will you adapt
> your BIOS to cope with our ACPI implementation?
> 

I think it shouldn't be too difficult to adapt our current BIOS, but
we'll need few xen specific hooks in acpi_piix4.
The price that we'll have to pay doing so is loosing live-migration
compatibility with older xen installations.
Anthony Liguori - Aug. 13, 2010, 8:51 p.m.
On 08/13/2010 02:37 PM, Stefano Stabellini wrote:
> On Fri, 13 Aug 2010, Anthony Liguori wrote:
>    
>> On 08/12/2010 09:10 AM, stefano.stabellini@eu.citrix.com wrote:
>>      
>>> From: Anthony PERARD<anthony.perard@citrix.com>
>>>
>>> Xen currently uses a different BIOS (hvmloader + rombios) therefore the
>>> Qemu acpi_piix4 implementation wouldn't work correctly with Xen.
>>> We plan on fixing this properly but at the moment we are just adding a
>>> new Xen specific acpi_piix4 implementation.
>>> This patch is optional; without it the VM boots but it cannot shutdown
>>> properly or go to S3.
>>>
>>>        
>> What's the long term plan?  Will Xen adopt SeaBIOS or will you adapt
>> your BIOS to cope with our ACPI implementation?
>>
>>      
> I think it shouldn't be too difficult to adapt our current BIOS, but
> we'll need few xen specific hooks in acpi_piix4.
> The price that we'll have to pay doing so is loosing live-migration
> compatibility with older xen installations.
>    

Does Xen migrate roms (like the BIOS) in such a way that the persist 
after a reboot?

I noticed there was only one machine type defined.  In our experience, 
to preserve compatibility with migration, it's useful to have versioned 
machine names.  We also have some special machine parameters to support 
compatibility with qdev.

How does Xen handle hvm migration machine model compatibility?

Regards,

Anthony Liguori
Stefano Stabellini - Aug. 16, 2010, 11:10 a.m.
On Fri, 13 Aug 2010, Anthony Liguori wrote:
> On 08/13/2010 02:37 PM, Stefano Stabellini wrote:
> > On Fri, 13 Aug 2010, Anthony Liguori wrote:
> >    
> >> On 08/12/2010 09:10 AM, stefano.stabellini@eu.citrix.com wrote:
> >>      
> >>> From: Anthony PERARD<anthony.perard@citrix.com>
> >>>
> >>> Xen currently uses a different BIOS (hvmloader + rombios) therefore the
> >>> Qemu acpi_piix4 implementation wouldn't work correctly with Xen.
> >>> We plan on fixing this properly but at the moment we are just adding a
> >>> new Xen specific acpi_piix4 implementation.
> >>> This patch is optional; without it the VM boots but it cannot shutdown
> >>> properly or go to S3.
> >>>
> >>>        
> >> What's the long term plan?  Will Xen adopt SeaBIOS or will you adapt
> >> your BIOS to cope with our ACPI implementation?
> >>
> >>      
> > I think it shouldn't be too difficult to adapt our current BIOS, but
> > we'll need few xen specific hooks in acpi_piix4.
> > The price that we'll have to pay doing so is loosing live-migration
> > compatibility with older xen installations.
> >    
> 
> Does Xen migrate roms (like the BIOS) in such a way that the persist 
> after a reboot?
> 

No, I don't think so.
However it is common practice not to require any VM reboot on host
upgrade.


> I noticed there was only one machine type defined.  In our experience, 
> to preserve compatibility with migration, it's useful to have versioned 
> machine names.  We also have some special machine parameters to support 
> compatibility with qdev.
> 

Thanks for the tip, we'll look into it.
Knowing that we would have the BIOS problem mentioned above, we didn't
try yet any save/restore or migration compatibility between old qemu-xen
and new qemu.


> How does Xen handle hvm migration machine model compatibility?
> 
 
We use per-device save state versions and we tend to use always the same
set of devices.

Patch

diff --git a/Makefile.target b/Makefile.target
index 1984cdd..a2d9217 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -325,6 +325,7 @@  obj-xen-y += piix_pci.o
 obj-xen-y += mc146818rtc.o
 obj-xen-y += xenstore.o
 obj-xen-y += xen_platform.o
+obj-xen-y += xen_acpi_piix4.o
 
 obj-xen-y += xen_mapcache.o
 obj-xen-y += stub-functions.o
diff --git a/hw/xen_acpi_piix4.c b/hw/xen_acpi_piix4.c
new file mode 100644
index 0000000..3c65963
--- /dev/null
+++ b/hw/xen_acpi_piix4.c
@@ -0,0 +1,424 @@ 
+ /*
+ * PIIX4 ACPI controller emulation
+ *
+ * Winston liwen Wang, winston.l.wang@intel.com
+ * Copyright (c) 2006 , Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw.h"
+#include "pc.h"
+#include "pci.h"
+#include "sysemu.h"
+#include "acpi.h"
+
+#include "xen_backend.h"
+#include "xen_common.h"
+#include "qemu-log.h"
+
+#include <xen/hvm/ioreq.h>
+#include <xen/hvm/params.h>
+
+#define PIIX4ACPI_LOG_ERROR 0
+#define PIIX4ACPI_LOG_INFO 1
+#define PIIX4ACPI_LOG_DEBUG 2
+#define PIIX4ACPI_LOGLEVEL PIIX4ACPI_LOG_INFO
+#define PIIX4ACPI_LOG(level, fmt, ...) do { if (level <= PIIX4ACPI_LOGLEVEL) qemu_log(fmt, ## __VA_ARGS__); } while (0)
+
+/* Sleep state type codes as defined by the \_Sx objects in the DSDT. */
+/* These must be kept in sync with the DSDT (hvmloader/acpi/dsdt.asl) */
+#define SLP_TYP_S4        (6 << 10)
+#define SLP_TYP_S3        (5 << 10)
+#define SLP_TYP_S5        (7 << 10)
+
+#define ACPI_DBG_IO_ADDR  0xb044
+#define ACPI_PHP_IO_ADDR  0x10c0
+
+#define PHP_EVT_ADD     0x0
+#define PHP_EVT_REMOVE  0x3
+
+/* The bit in GPE0_STS/EN to notify the pci hotplug event */
+#define ACPI_PHP_GPE_BIT 3
+
+#define DEVFN_TO_PHP_SLOT_REG(devfn) (devfn >> 1)
+#define PHP_SLOT_REG_TO_DEVFN(reg, hilo) ((reg << 1) | hilo)
+
+/* ioport to monitor cpu add/remove status */
+#define PROC_BASE 0xaf00
+
+typedef struct PCIAcpiState {
+    PCIDevice dev;
+    uint16_t pm1_control; /* pm1a_ECNT_BLK */
+    qemu_irq irq;
+    qemu_irq cmos_s3;
+} PCIAcpiState;
+
+typedef struct GPEState {
+    /* GPE0 block */
+    uint8_t gpe0_sts[ACPI_GPE0_BLK_LEN / 2];
+    uint8_t gpe0_en[ACPI_GPE0_BLK_LEN / 2];
+
+    /* CPU bitmap */
+    uint8_t cpus_sts[32];
+
+    /* SCI IRQ level */
+    uint8_t sci_asserted;
+
+} GPEState;
+
+static GPEState gpe_state;
+
+static qemu_irq sci_irq;
+
+typedef struct AcpiDeviceState AcpiDeviceState;
+AcpiDeviceState *acpi_device_table;
+
+static const VMStateDescription vmstate_acpi = {
+    .name = "PIIX4 ACPI",
+    .version_id = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_PCI_DEVICE(dev, PCIAcpiState),
+        VMSTATE_UINT16(pm1_control, PCIAcpiState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void acpiPm1Control_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+    s->pm1_control = (s->pm1_control & 0xff00) | (val & 0xff);
+}
+
+static uint32_t acpiPm1Control_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    /* Mask out the write-only bits */
+    return (uint8_t)(s->pm1_control & ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE));
+}
+
+static void acpi_shutdown(PCIAcpiState *s, uint32_t val)
+{
+    if (!(val & ACPI_BITMASK_SLEEP_ENABLE))
+        return;
+
+    switch (val & ACPI_BITMASK_SLEEP_TYPE) {
+    case SLP_TYP_S3:
+        qemu_system_reset();
+        qemu_irq_raise(s->cmos_s3);
+        xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
+        break;
+    case SLP_TYP_S4:
+    case SLP_TYP_S5:
+        qemu_system_shutdown_request();
+        break;
+    default:
+        break;
+    }
+}
+
+static void acpiPm1ControlP1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    val <<= 8;
+    s->pm1_control = ((s->pm1_control & 0xff) | val) & ~ACPI_BITMASK_SLEEP_ENABLE;
+
+    acpi_shutdown(s, val);
+}
+
+static uint32_t acpiPm1ControlP1_readb(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    /* Mask out the write-only bits */
+    return (uint8_t)((s->pm1_control & ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE)) >> 8);
+}
+
+static void acpiPm1Control_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIAcpiState *s = opaque;
+
+    s->pm1_control = val & ~ACPI_BITMASK_SLEEP_ENABLE;
+
+    acpi_shutdown(s, val);
+}
+
+static uint32_t acpiPm1Control_readw(void *opaque, uint32_t addr)
+{
+    PCIAcpiState *s = opaque;
+    /* Mask out the write-only bits */
+    return (s->pm1_control & ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE));
+}
+
+static void acpi_map(PCIDevice *pci_dev, int region_num,
+                     uint32_t addr, uint32_t size, int type)
+{
+    PCIAcpiState *d = (PCIAcpiState *)pci_dev;
+
+    /* Byte access */
+    register_ioport_write(addr + 4, 1, 1, acpiPm1Control_writeb, d);
+    register_ioport_read(addr + 4, 1, 1, acpiPm1Control_readb, d);
+    register_ioport_write(addr + 4 + 1, 1, 1, acpiPm1ControlP1_writeb, d);
+    register_ioport_read(addr + 4 +1, 1, 1, acpiPm1ControlP1_readb, d);
+
+    /* Word access */
+    register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d);
+    register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d);
+}
+
+static inline int test_bit(uint8_t *map, int bit)
+{
+    return ( map[bit / 8] & (1 << (bit % 8)) );
+}
+
+static inline void set_bit(uint8_t *map, int bit)
+{
+    map[bit / 8] |= (1 << (bit % 8));
+}
+
+static inline void clear_bit(uint8_t *map, int bit)
+{
+    map[bit / 8] &= ~(1 << (bit % 8));
+}
+
+static void acpi_dbg_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "ACPI: DBG: 0x%08x\n", val);
+    PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "ACPI:debug: write addr=0x%x, val=0x%x.\n", addr, val);
+}
+
+/* GPEx_STS occupy 1st half of the block, while GPEx_EN 2nd half */
+static uint32_t gpe_sts_read(void *opaque, uint32_t addr)
+{
+    GPEState *s = opaque;
+
+    return s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS];
+}
+
+/* write 1 to clear specific GPE bits */
+static void gpe_sts_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    GPEState *s = opaque;
+    int hotplugged = 0;
+
+    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_sts_write: addr=0x%x, val=0x%x.\n", addr, val);
+
+    hotplugged = test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT);
+    s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS] &= ~val;
+    if ( s->sci_asserted &&
+         hotplugged &&
+         !test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT)) {
+        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "Clear the GPE0_STS bit for ACPI hotplug & deassert the IRQ.\n");
+        qemu_irq_lower(sci_irq);
+    }
+
+}
+
+static uint32_t gpe_en_read(void *opaque, uint32_t addr)
+{
+    GPEState *s = opaque;
+
+    return s->gpe0_en[addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2)];
+}
+
+/* write 0 to clear en bit */
+static void gpe_en_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    GPEState *s = opaque;
+    int reg_count;
+
+    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_en_write: addr=0x%x, val=0x%x.\n", addr, val);
+    reg_count = addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2);
+    s->gpe0_en[reg_count] = val;
+    /* If disable GPE bit right after generating SCI on it,
+     * need deassert the intr to avoid redundant intrs
+     */
+    if ( s->sci_asserted &&
+         reg_count == (ACPI_PHP_GPE_BIT / 8) &&
+         !(val & (1 << (ACPI_PHP_GPE_BIT % 8))) ) {
+        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "deassert due to disable GPE bit.\n");
+        s->sci_asserted = 0;
+        qemu_irq_lower(sci_irq);
+    }
+
+}
+
+static void gpe_save(QEMUFile* f, void* opaque)
+{
+    GPEState *s = (GPEState*)opaque;
+    int i;
+
+    for ( i = 0; i < ACPI_GPE0_BLK_LEN / 2; i++ ) {
+        qemu_put_8s(f, &s->gpe0_sts[i]);
+        qemu_put_8s(f, &s->gpe0_en[i]);
+    }
+
+    qemu_put_8s(f, &s->sci_asserted);
+    if ( s->sci_asserted ) {
+        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "gpe_save with sci asserted!\n");
+    }
+}
+
+static int gpe_load(QEMUFile* f, void* opaque, int version_id)
+{
+    GPEState *s = (GPEState*)opaque;
+    int i;
+    if (version_id != 1)
+        return -EINVAL;
+
+    for ( i = 0; i < ACPI_GPE0_BLK_LEN / 2; i++ ) {
+        qemu_get_8s(f, &s->gpe0_sts[i]);
+        qemu_get_8s(f, &s->gpe0_en[i]);
+    }
+
+    qemu_get_8s(f, &s->sci_asserted);
+    return 0;
+}
+
+static uint32_t gpe_cpus_readb(void *opaque, uint32_t addr)
+{
+    uint32_t val = 0;
+    GPEState *g = opaque;
+
+    switch (addr) {
+        case PROC_BASE ... PROC_BASE+31:
+            val = g->cpus_sts[addr - PROC_BASE];
+        default:
+            break;
+    }
+
+    return val;
+}
+
+static void gpe_cpus_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    /* GPEState *g = opaque; */
+
+    switch (addr) {
+        case PROC_BASE ... PROC_BASE + 31:
+            /* don't allow to change cpus_sts from inside a guest */
+            break;
+        default:
+            break;
+    }
+}
+
+static void gpe_acpi_init(void)
+{
+    GPEState *s = &gpe_state;
+    memset(s, 0, sizeof(GPEState));
+
+    s->cpus_sts[0] = 1;
+
+    register_ioport_read(PROC_BASE, 32, 1,  gpe_cpus_readb, s);
+    register_ioport_write(PROC_BASE, 32, 1, gpe_cpus_writeb, s);
+
+    register_ioport_read(ACPI_GPE0_BLK_ADDRESS,
+                         ACPI_GPE0_BLK_LEN / 2,
+                         1,
+                         gpe_sts_read,
+                         s);
+    register_ioport_read(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2,
+                         ACPI_GPE0_BLK_LEN / 2,
+                         1,
+                         gpe_en_read,
+                         s);
+
+    register_ioport_write(ACPI_GPE0_BLK_ADDRESS,
+                          ACPI_GPE0_BLK_LEN / 2,
+                          1,
+                          gpe_sts_write,
+                          s);
+    register_ioport_write(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2,
+                          ACPI_GPE0_BLK_LEN / 2,
+                          1,
+                          gpe_en_write,
+                          s);
+
+    register_savevm(NULL, "gpe", 0, 1, gpe_save, gpe_load, s);
+}
+
+static int piix4_pm_xen_initfn(PCIDevice *dev)
+{
+    PCIAcpiState *s = DO_UPCAST(PCIAcpiState, dev, dev);
+    uint8_t *pci_conf;
+
+    pci_conf = s->dev.config;
+    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_3);
+    pci_conf[0x08] = 0x01;  /* B0 stepping */
+    pci_conf[0x09] = 0x00;  /* base class */
+    pci_config_set_class(pci_conf, PCI_CLASS_BRIDGE_OTHER);
+    pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; /* header_type */
+    pci_conf[0x3d] = 0x01;  /* Hardwired to PIRQA is used */
+
+    /* PMBA POWER MANAGEMENT BASE ADDRESS, hardcoded to 0x1f40
+     * to make shutdown work for IPF, due to IPF Guest Firmware
+     * will enumerate pci devices.
+     *
+     * TODO:  if Guest Firmware or Guest OS will change this PMBA,
+     * More logic will be added.
+     */
+    pci_conf[0x40] = 0x41; /* Special device-specific BAR at 0x40 */
+    pci_conf[0x41] = 0x1f;
+    pci_conf[0x42] = 0x00;
+    pci_conf[0x43] = 0x00;
+
+    s->pm1_control = ACPI_BITMASK_SCI_ENABLE;
+
+    acpi_map((PCIDevice *)s, 0, 0x1f40, 0x10, PCI_BASE_ADDRESS_SPACE_IO);
+
+    gpe_acpi_init();
+
+    register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s);
+
+    return 0;
+}
+
+void piix4_pm_xen_init(PCIBus *bus, int devfn, qemu_irq sci_irq_spec, qemu_irq cmos_s3)
+{
+    PCIDevice *dev;
+    PCIAcpiState *s;
+
+    sci_irq = sci_irq_spec;
+
+    dev = pci_create(bus, devfn, "PIIX4 ACPI");
+
+    s = DO_UPCAST(PCIAcpiState, dev, dev);
+
+    s->irq = sci_irq_spec;
+    s->cmos_s3 = cmos_s3;
+
+    qdev_init_nofail(&dev->qdev);
+}
+
+static PCIDeviceInfo piix4_pm_xen_info = {
+    .qdev.name    = "PIIX4 ACPI",
+    .qdev.desc    = "dm",
+    .qdev.size    = sizeof(PCIAcpiState),
+    .qdev.vmsd    = &vmstate_acpi,
+    .init         = piix4_pm_xen_initfn,
+};
+
+static void piix4_pm_xen_register(void)
+{
+    pci_qdev_register(&piix4_pm_xen_info);
+}
+
+device_init(piix4_pm_xen_register);
diff --git a/hw/xen_common.h b/hw/xen_common.h
index 020fdd7..e1f07ba 100644
--- a/hw/xen_common.h
+++ b/hw/xen_common.h
@@ -34,4 +34,7 @@ 
 /* hw/i8259-xen-stub.c */
 qemu_irq *i8259_xen_init(void);
 
+/* hw/xen_acpi_piix4.c */
+void piix4_pm_xen_init(PCIBus *bus, int devfn, qemu_irq sci_irq_spec, qemu_irq cmos_s3);
+
 #endif /* QEMU_HW_XEN_COMMON_H */
diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c
index 77563db..bfda944 100644
--- a/hw/xen_machine_fv.c
+++ b/hw/xen_machine_fv.c
@@ -92,7 +92,6 @@  static void xen_init_fv(ram_addr_t ram_size,
     qemu_irq *isa_irq;
     qemu_irq *i8259;
     qemu_irq *cmos_s3;
-    qemu_irq *smi_irq;
     IsaIrqState *isa_irq_state;
     DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
     FDCtrl *floppy_controller;
@@ -208,10 +207,7 @@  static void xen_init_fv(ram_addr_t ram_size,
 
     if (acpi_enabled) {
         cmos_s3 = qemu_allocate_irqs(pc_cmos_set_s3_resume, rtc_state, 1);
-        smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1);
-        piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
-                isa_reserve_irq(9), *cmos_s3, *smi_irq,
-                0);
+        piix4_pm_xen_init(pci_bus, piix3_devfn + 3, isa_reserve_irq(9), *cmos_s3);
     }
 
     if (i440fx_state) {