Patchwork [RFC,V4,14/14] xen: Add a Xen specific ACPI Implementation to target-xen

login
register
mail settings
Submitter Anthony PERARD
Date Sept. 28, 2010, 3:01 p.m.
Message ID <1285686097-13036-15-git-send-email-anthony.perard@citrix.com>
Download mbox | patch
Permalink /patch/65986/
State New
Headers show

Comments

Anthony PERARD - Sept. 28, 2010, 3:01 p.m.
From: Anthony PERARD <anthony.perard@citrix.com>

Xen currently uses a different BIOS (hvmloader + rombios) therefore the
Qemu acpi_piix4 implementation wouldn't work correctly with Xen.
We plan on fixing this properly but at the moment we are just adding a
new Xen specific acpi_piix4 implementation.
This patch is optional; without it the VM boots but it cannot shutdown
properly or go to S3.

Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 Makefile.target     |    1 +
 hw/xen_acpi_piix4.c |  411 +++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/xen_common.h     |    3 +
 hw/xen_machine_fv.c |    6 +-
 4 files changed, 416 insertions(+), 5 deletions(-)
 create mode 100644 hw/xen_acpi_piix4.c

Patch

diff --git a/Makefile.target b/Makefile.target
index fddce71..c796566 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -194,6 +194,7 @@  obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o
 # xen full virtualized machine
 obj-i386-$(CONFIG_XEN) += xen_machine_fv.o
 obj-i386-$(CONFIG_XEN) += xen_platform.o
+obj-i386-$(CONFIG_XEN) += xen_acpi_piix4.o
 
 # USB layer
 obj-$(CONFIG_USB_OHCI) += usb-ohci.o
diff --git a/hw/xen_acpi_piix4.c b/hw/xen_acpi_piix4.c
new file mode 100644
index 0000000..03f0629
--- /dev/null
+++ b/hw/xen_acpi_piix4.c
@@ -0,0 +1,411 @@ 
+ /*
+ * PIIX4 ACPI controller emulation
+ *
+ * Winston liwen Wang, winston.l.wang@intel.com
+ * Copyright (c) 2006 , Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw.h"
+#include "pc.h"
+#include "pci.h"
+#include "sysemu.h"
+#include "acpi.h"
+
+#include "xen_backend.h"
+#include "xen_common.h"
+#include "qemu-log.h"
+
+#include <xen/hvm/ioreq.h>
+#include <xen/hvm/params.h>
+
+#define PIIX4ACPI_LOG_ERROR 0
+#define PIIX4ACPI_LOG_INFO 1
+#define PIIX4ACPI_LOG_DEBUG 2
+#define PIIX4ACPI_LOGLEVEL PIIX4ACPI_LOG_INFO
+#define PIIX4ACPI_LOG(level, fmt, ...) do { \
+    if (level <= PIIX4ACPI_LOGLEVEL) qemu_log(fmt, ## __VA_ARGS__); \
+} while (0)
+
+/* Sleep state type codes as defined by the \_Sx objects in the DSDT. */
+/* These must be kept in sync with the DSDT (hvmloader/acpi/dsdt.asl) */
+#define SLP_TYP_S4        (6 << 10)
+#define SLP_TYP_S3        (5 << 10)
+#define SLP_TYP_S5        (7 << 10)
+
+#define ACPI_DBG_IO_ADDR  0xb044
+#define ACPI_PHP_IO_ADDR  0x10c0
+
+#define PHP_EVT_ADD     0x0
+#define PHP_EVT_REMOVE  0x3
+
+/* The bit in GPE0_STS/EN to notify the pci hotplug event */
+#define ACPI_PHP_GPE_BIT 3
+
+#define DEVFN_TO_PHP_SLOT_REG(devfn) (devfn >> 1)
+#define PHP_SLOT_REG_TO_DEVFN(reg, hilo) ((reg << 1) | hilo)
+
+/* ioport to monitor cpu add/remove status */
+#define PROC_BASE 0xaf00
+
+typedef struct GPEState {
+    /* GPE0 block */
+    uint8_t gpe0_sts[ACPI_GPE0_BLK_LEN / 2];
+    uint8_t gpe0_en[ACPI_GPE0_BLK_LEN / 2];
+
+    /* CPU bitmap */
+    uint8_t cpus_sts[32];
+
+    /* SCI IRQ level */
+    uint8_t sci_asserted;
+
+    qemu_irq sci_irq;
+} GPEState;
+
+typedef struct PCIACPIState {
+    PCIDevice dev;
+    uint16_t pm1_control; /* pm1a_ECNT_BLK */
+    qemu_irq irq;
+    qemu_irq cmos_s3;
+
+    GPEState gpe_state;
+} PCIACPIState;
+
+
+static const VMStateDescription vmstate_acpi = {
+    .name = "PIIX4 ACPI",
+    .version_id = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_PCI_DEVICE(dev, PCIACPIState),
+        VMSTATE_UINT16(pm1_control, PCIACPIState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void acpi_pm1_control_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIACPIState *s = opaque;
+    s->pm1_control = (s->pm1_control & 0xff00) | (val & 0xff);
+}
+
+static uint32_t acpi_pm1_control_readb(void *opaque, uint32_t addr)
+{
+    PCIACPIState *s = opaque;
+    /* Mask out the write-only bits */
+    return (uint8_t)(s->pm1_control &
+                     ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE | ACPI_BITMASK_SLEEP_ENABLE));
+}
+
+static void acpi_shutdown(PCIACPIState *s, uint32_t val)
+{
+    if (!(val & ACPI_BITMASK_SLEEP_ENABLE)) {
+        return;
+    }
+
+    switch (val & ACPI_BITMASK_SLEEP_TYPE) {
+    case SLP_TYP_S3:
+        qemu_system_reset();
+        qemu_irq_raise(s->cmos_s3);
+        xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
+        break;
+    case SLP_TYP_S4:
+    case SLP_TYP_S5:
+        qemu_system_shutdown_request();
+        break;
+    default:
+        break;
+    }
+}
+
+static void acpi_pm1_control_p1_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIACPIState *s = opaque;
+
+    val <<= 8;
+    s->pm1_control = ((s->pm1_control & 0xff) | val) & ~ACPI_BITMASK_SLEEP_ENABLE;
+
+    acpi_shutdown(s, val);
+}
+
+static uint32_t acpi_pm1_control_p1_readb(void *opaque, uint32_t addr)
+{
+    PCIACPIState *s = opaque;
+    /* Mask out the write-only bits */
+    return (uint8_t)((s->pm1_control & ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE | ACPI_BITMASK_SLEEP_ENABLE)) >> 8);
+}
+
+static void acpi_pm1_control_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+    PCIACPIState *s = opaque;
+
+    s->pm1_control = val & ~ACPI_BITMASK_SLEEP_ENABLE;
+
+    acpi_shutdown(s, val);
+}
+
+static uint32_t acpi_pm1_control_readw(void *opaque, uint32_t addr)
+{
+    PCIACPIState *s = opaque;
+    /* Mask out the write-only bits */
+    return s->pm1_control & ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE | ACPI_BITMASK_SLEEP_ENABLE);
+}
+
+static void acpi_map(PCIDevice *pci_dev, int region_num,
+                     uint32_t addr, uint32_t size, int type)
+{
+    PCIACPIState *d = (PCIACPIState *)pci_dev;
+
+    /* Byte access */
+    register_ioport_write(addr + 4, 1, 1, acpi_pm1_control_writeb, d);
+    register_ioport_read(addr + 4, 1, 1, acpi_pm1_control_readb, d);
+    register_ioport_write(addr + 4 + 1, 1, 1, acpi_pm1_control_p1_writeb, d);
+    register_ioport_read(addr + 4 + 1, 1, 1, acpi_pm1_control_p1_readb, d);
+
+    /* Word access */
+    register_ioport_write(addr + 4, 2, 2, acpi_pm1_control_writew, d);
+    register_ioport_read(addr + 4, 2, 2, acpi_pm1_control_readw, d);
+}
+
+static inline int test_bit(uint8_t *map, int bit)
+{
+    return map[bit / 8] & (1 << (bit % 8));
+}
+
+static inline void set_bit(uint8_t *map, int bit)
+{
+    map[bit / 8] |= (1 << (bit % 8));
+}
+
+static inline void clear_bit(uint8_t *map, int bit)
+{
+    map[bit / 8] &= ~(1 << (bit % 8));
+}
+
+static void acpi_dbg_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "ACPI: DBG: 0x%08x\n", val);
+    PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "ACPI:debug: write addr=0x%x, val=0x%x.\n", addr, val);
+}
+
+/* GPEx_STS occupy 1st half of the block, while GPEx_EN 2nd half */
+static uint32_t gpe_sts_read(void *opaque, uint32_t addr)
+{
+    GPEState *s = opaque;
+
+    return s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS];
+}
+
+/* write 1 to clear specific GPE bits */
+static void gpe_sts_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    GPEState *s = opaque;
+    int hotplugged = 0;
+
+    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_sts_write: addr=0x%x, val=0x%x.\n", addr, val);
+
+    hotplugged = test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT);
+    s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS] &= ~val;
+    if ( s->sci_asserted &&
+         hotplugged &&
+         !test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT)) {
+        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "Clear the GPE0_STS bit for ACPI hotplug & deassert the IRQ.\n");
+        qemu_irq_lower(s->sci_irq);
+    }
+}
+
+static uint32_t gpe_en_read(void *opaque, uint32_t addr)
+{
+    GPEState *s = opaque;
+
+    return s->gpe0_en[addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2)];
+}
+
+/* write 0 to clear en bit */
+static void gpe_en_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    GPEState *s = opaque;
+    int reg_count;
+
+    PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_en_write: addr=0x%x, val=0x%x.\n", addr, val);
+    reg_count = addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2);
+    s->gpe0_en[reg_count] = val;
+    /* If disable GPE bit right after generating SCI on it,
+     * need deassert the intr to avoid redundant intrs
+     */
+    if ( s->sci_asserted &&
+         reg_count == (ACPI_PHP_GPE_BIT / 8) &&
+         !(val & (1 << (ACPI_PHP_GPE_BIT % 8))) ) {
+        PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "deassert due to disable GPE bit.\n");
+        s->sci_asserted = 0;
+        qemu_irq_lower(s->sci_irq);
+    }
+}
+
+static const VMStateDescription vmstate_gpe = {
+    .name = "gpe",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .minimum_version_id_old = 2,
+    .fields = (VMStateField []) {
+        VMSTATE_BUFFER(gpe0_sts, GPEState),
+        VMSTATE_BUFFER(gpe0_en, GPEState),
+        VMSTATE_UINT8(sci_asserted, GPEState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static uint32_t gpe_cpus_readb(void *opaque, uint32_t addr)
+{
+    uint32_t val = 0;
+    GPEState *g = opaque;
+
+    switch (addr) {
+        case PROC_BASE ... PROC_BASE + 31:
+            val = g->cpus_sts[addr - PROC_BASE];
+            break;
+        default:
+            break;
+    }
+
+    return val;
+}
+
+static void gpe_cpus_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    /* GPEState *g = opaque; */
+
+    switch (addr) {
+        case PROC_BASE ... PROC_BASE + 31:
+            /* don't allow to change cpus_sts from inside a guest */
+            break;
+        default:
+            break;
+    }
+}
+
+static void gpe_acpi_init(PCIACPIState *acpi_state)
+{
+    GPEState *s = &acpi_state->gpe_state;
+    memset(s, 0, sizeof (GPEState));
+
+    s->cpus_sts[0] = 1;
+
+    register_ioport_read(PROC_BASE, 32, 1, gpe_cpus_readb, s);
+    register_ioport_write(PROC_BASE, 32, 1, gpe_cpus_writeb, s);
+
+    register_ioport_read(ACPI_GPE0_BLK_ADDRESS,
+                         ACPI_GPE0_BLK_LEN / 2,
+                         1,
+                         gpe_sts_read,
+                         s);
+    register_ioport_read(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2,
+                         ACPI_GPE0_BLK_LEN / 2,
+                         1,
+                         gpe_en_read,
+                         s);
+
+    register_ioport_write(ACPI_GPE0_BLK_ADDRESS,
+                          ACPI_GPE0_BLK_LEN / 2,
+                          1,
+                          gpe_sts_write,
+                          s);
+    register_ioport_write(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2,
+                          ACPI_GPE0_BLK_LEN / 2,
+                          1,
+                          gpe_en_write,
+                          s);
+
+    vmstate_register(NULL, 0, &vmstate_gpe, s);
+}
+
+static void piix4_pm_xen_reset(void *opaque)
+{
+    PCIACPIState *s = opaque;
+
+    s->pm1_control = ACPI_BITMASK_SCI_ENABLE;
+}
+static int piix4_pm_xen_initfn(PCIDevice *dev)
+{
+    PCIACPIState *s = DO_UPCAST(PCIACPIState, dev, dev);
+    uint8_t *pci_conf;
+
+    pci_conf = s->dev.config;
+    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_3);
+    pci_conf[0x08] = 0x01;  /* B0 stepping */
+    pci_conf[0x09] = 0x00;  /* base class */
+    pci_config_set_class(pci_conf, PCI_CLASS_BRIDGE_OTHER);
+    pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; /* header_type */
+    pci_conf[0x3d] = 0x01;  /* Hardwired to PIRQA is used */
+
+    /* PMBA POWER MANAGEMENT BASE ADDRESS, hardcoded to 0x1f40
+     * to make shutdown work for IPF, due to IPF Guest Firmware
+     * will enumerate pci devices.
+     *
+     * TODO:  if Guest Firmware or Guest OS will change this PMBA,
+     * More logic will be added.
+     */
+    pci_conf[0x40] = 0x41; /* Special device-specific BAR at 0x40 */
+    pci_conf[0x41] = 0x1f;
+    pci_conf[0x42] = 0x00;
+    pci_conf[0x43] = 0x00;
+
+    qemu_register_reset(piix4_pm_xen_reset, s);
+
+    acpi_map((PCIDevice *)s, 0, 0x1f40, 0x10, PCI_BASE_ADDRESS_SPACE_IO);
+
+    gpe_acpi_init(s);
+
+    register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s);
+
+    return 0;
+}
+
+void piix4_pm_xen_init(PCIBus *bus, int devfn, qemu_irq sci_irq_spec, qemu_irq cmos_s3)
+{
+    PCIDevice *dev;
+    PCIACPIState *s;
+
+    dev = pci_create(bus, devfn, "PIIX4 ACPI");
+
+    s = DO_UPCAST(PCIACPIState, dev, dev);
+
+    s->irq = sci_irq_spec;
+    s->gpe_state.sci_irq = sci_irq_spec;
+
+    s->cmos_s3 = cmos_s3;
+
+    qdev_init_nofail(&dev->qdev);
+}
+
+static PCIDeviceInfo piix4_pm_xen_info = {
+    .qdev.name    = "PIIX4 ACPI",
+    .qdev.desc    = "dm",
+    .qdev.size    = sizeof(PCIACPIState),
+    .qdev.vmsd    = &vmstate_acpi,
+    .init         = piix4_pm_xen_initfn,
+};
+
+static void piix4_pm_xen_register(void)
+{
+    pci_qdev_register(&piix4_pm_xen_info);
+}
+
+device_init(piix4_pm_xen_register);
diff --git a/hw/xen_common.h b/hw/xen_common.h
index b3d1fe3..ca7a76c 100644
--- a/hw/xen_common.h
+++ b/hw/xen_common.h
@@ -51,4 +51,7 @@  typedef xc_interface *qemu_xc_interface;
 qemu_irq *i8259_xen_init(void);
 void destroy_hvm_domain(void);
 
+/* hw/xen_acpi_piix4.c */
+void piix4_pm_xen_init(PCIBus *bus, int devfn, qemu_irq sci_irq_spec, qemu_irq cmos_s3);
+
 #endif /* QEMU_HW_XEN_COMMON_H */
diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c
index fe4491f..30bcc47 100644
--- a/hw/xen_machine_fv.c
+++ b/hw/xen_machine_fv.c
@@ -55,7 +55,6 @@  static void xen_init_fv(ram_addr_t ram_size,
     qemu_irq *isa_irq;
     qemu_irq *i8259;
     qemu_irq *cmos_s3;
-    qemu_irq *smi_irq;
     IsaIrqState *isa_irq_state;
     DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
     FDCtrl *floppy_controller;
@@ -132,10 +131,7 @@  static void xen_init_fv(ram_addr_t ram_size,
 
     if (acpi_enabled) {
         cmos_s3 = qemu_allocate_irqs(pc_cmos_set_s3_resume, rtc_state, 1);
-        smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1);
-        piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
-                isa_reserve_irq(9), *cmos_s3, *smi_irq,
-                0);
+        piix4_pm_xen_init(pci_bus, piix3_devfn + 3, isa_reserve_irq(9), *cmos_s3);
     }
 
     if (i440fx_state) {