Patchwork [08/14] pcie: helper functions for pcie extended capability.

login
register
mail settings
Submitter Isaku Yamahata
Date Sept. 6, 2010, 7:46 a.m.
Message ID <9c16f9d62dc04a7c2b508399c2a2258b1e3a51fb.1283759074.git.yamahata@valinux.co.jp>
Download mbox | patch
Permalink /patch/63890/
State New
Headers show

Comments

Isaku Yamahata - Sept. 6, 2010, 7:46 a.m.
This patch implements helper functions for pci express extended capability.
NOTE: presence detection depends on pci_qdev_init() change.
      PCIExpressDevice::aer_log_max is in PCIDevice for device property.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 Makefile.objs |    1 +
 hw/pci.h      |   24 +
 hw/pcie.c     | 1668 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pcie.h     |  186 +++++++
 qemu-common.h |    1 +
 5 files changed, 1880 insertions(+), 0 deletions(-)
 create mode 100644 hw/pcie.c
 create mode 100644 hw/pcie.h

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 5f5a4c5..eeb5134 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -186,6 +186,7 @@  hw-obj-$(CONFIG_PIIX4) += piix4.o
 # PCI watchdog devices
 hw-obj-y += wdt_i6300esb.o
 
+hw-obj-y += pcie.o
 hw-obj-y += msix.o msi.o
 
 # PCI network cards
diff --git a/hw/pci.h b/hw/pci.h
index 9387a03..3f912b8 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -9,6 +9,8 @@ 
 /* PCI includes legacy ISA access.  */
 #include "isa.h"
 
+#include "pcie.h"
+
 /* PCI bus */
 
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
@@ -173,6 +175,12 @@  struct PCIDevice {
     /* Offset of MSI capability in config space */
     uint8_t msi_cap;
 
+    /* PCI Express */
+    PCIExpressDevice *exp;
+    /* Theoretically this belongs to  PCIExpressDevice.
+       However it is here for property and save/load */
+    struct pcie_aer_log aer_log;
+
     /* Location of option rom */
     char *romfile;
     ram_addr_t rom_offset;
@@ -368,6 +376,22 @@  static inline uint32_t pci_config_size(const PCIDevice *d)
     return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
 }
 
+
+/* These are pci express specific, so should belong to pcie.h.
+   they're here to avoid header inclusion error. */
+static inline uint8_t pci_pcie_cap(const PCIDevice *d)
+{
+    return d->exp ? d->exp->exp_cap : 0;
+}
+
+/* AER */
+static inline uint16_t pcie_aer_cap(const PCIDevice *d)
+{
+    assert(d->exp);
+    return d->exp->aer_cap;
+}
+
+
 /* These are not pci specific. Should move into a separate header.
  * Only pci.c uses them, so keep them here for now.
  */
diff --git a/hw/pcie.c b/hw/pcie.c
new file mode 100644
index 0000000..1f24c2a
--- /dev/null
+++ b/hw/pcie.c
@@ -0,0 +1,1668 @@ 
+/*
+ * pcie.c
+ *
+ * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "sysemu.h"
+#include "pci_bridge.h"
+#include "pcie.h"
+#include "msix.h"
+#include "msi.h"
+#include "pci_internals.h"
+
+//#define DEBUG_PCIE
+#ifdef DEBUG_PCIE
+# define PCIE_DPRINTF(fmt, ...)                                         \
+    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
+#else
+# define PCIE_DPRINTF(fmt, ...) do {} while (0)
+#endif
+#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
+    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
+
+static inline const char *pcie_hp_event_name(enum PCIExpressHotPlugEvent event)
+{
+    switch (event) {
+    case PCI_EXP_HP_EV_ABP:
+        return "attention button pushed";
+    case PCI_EXP_HP_EV_PDC:
+        return "present detection changed";
+    case PCI_EXP_HP_EV_CCI:
+        return "command completed";
+    default:
+        break;
+    }
+    return "Unknown event";
+}
+
+static void pcie_aer_clear_error(PCIDevice *dev);
+static void pcie_aer_root_notify(PCIDevice *dev, bool trigger, int level);
+static AER_ERR_MSG_RESULT
+pcie_aer_errmsg_alldev(PCIDevice *dev, const struct pcie_aer_err_msg *msg);
+static AER_ERR_MSG_RESULT
+pcie_aer_errmsg_vbridge(PCIDevice *dev, const struct pcie_aer_err_msg *msg);
+
+/***************************************************************************
+ * pci express capability helper functions
+ */
+
+#define PCI_EXP_VER2_SIZEOF     0x3c    /* express capability of version 2 */
+
+/* PCI_EXP_FLAGS */
+#define PCI_EXP_FLAGS_VER2      2       /* for now, supports only version 2 */
+#define PCI_EXP_FLAGS_IRQ_SHIFT 9
+#define PCI_EXP_FLAGS_IRQ_REG(irq)      (((irq) << PCI_EXP_FLAGS_IRQ_SHIFT) & PCI_EXP_FLAGS_IRQ)
+#define PCI_EXP_FLAGS_TYPE_SHIFT        4
+
+/* PCI_EXP_LINK{CAP, STA} */
+/* link speed */
+#define PCI_EXP_LNK_LS_25               1
+
+#define PCI_EXP_LNK_MLW_SHIFT           4
+#define PCI_EXP_LNK_MLW_1               (1 << PCI_EXP_LNK_MLW_SHIFT)
+
+/* PCI_EXP_LINKCAP */
+#define PCI_EXP_LNKCAP_ASPMS_SHIFT      10
+#define PCI_EXP_LNKCAP_ASPMS_0S         (1 << PCI_EXP_LNKCAP_ASPMS_SHIFT)
+
+#define PCI_EXP_LNKCAP_PN_SHIFT         24
+#define PCI_EXP_LNKCAP_PN_REG(pn)       (((pn) << PCI_EXP_LNKCAP_PN_SHIFT) & PCI_EXP_LNKCAP_PN)
+
+#define PCI_EXP_SLTCAP_PSN_SHIFT        19
+#define PCI_EXP_SLTCAP_PSN_REG(slot)    (((slot) << PCI_EXP_SLTCAP_PSN_SHIFT) & PCI_EXP_SLTCAP_PSN)
+
+#define PCI_EXP_SLTCTL_AIC_SHIFT        6
+#define PCI_EXP_SLTCTL_AIC_ON           (PCI_EXP_HP_IND_ON << PCI_EXP_SLTCTL_AIC_SHIFT)
+#define PCI_EXP_SLTCTL_AIC_BLINK        (PCI_EXP_HP_IND_BLINK << PCI_EXP_SLTCTL_AIC_SHIFT)
+#define PCI_EXP_SLTCTL_AIC_OFF          (PCI_EXP_HP_IND_OFF << PCI_EXP_SLTCTL_AIC_SHIFT)
+
+#define PCI_EXP_SLTCTL_PIC_SHIFT        8
+#define PCI_EXP_SLTCTL_PIC_ON           (PCI_EXP_HP_IND_ON << PCI_EXP_SLTCTL_PIC_SHIFT)
+#define PCI_EXP_SLTCTL_PIC_BLINK        (PCI_EXP_HP_IND_BLINK << PCI_EXP_SLTCTL_PIC_SHIFT)
+#define PCI_EXP_SLTCTL_PIC_OFF          (PCI_EXP_HP_IND_OFF << PCI_EXP_SLTCTL_PIC_SHIFT)
+
+#define PCI_EXP_DEVCAP2_EFF             0x100000
+#define PCI_EXP_DEVCAP2_EETLPP          0x200000
+
+#define PCI_EXP_DEVCTL2_EETLPPB         0x80
+
+static void pcie_notify(PCIDevice *dev, uint16_t vector,
+                        bool trigger, int level)
+{
+    /* masking/masking interrupt is handled by upper layer.
+     * i.e. msix_notify() for MSI-X
+     *      msi_notify()  for MSI
+     *      pci_set_irq() for INTx
+     */
+    PCIE_DEV_PRINTF(dev, "noitfy vector %d tirgger:%d level:%d\n",
+                    vector, trigger, level);
+    if (msix_enabled(dev)) {
+        if (trigger) {
+            msix_notify(dev, vector);
+        }
+    } else if (msi_enabled(dev)) {
+        if (trigger){
+            msi_notify(dev, vector);
+        }
+    } else  {
+        qemu_set_irq(dev->irq[0], level);
+    }
+}
+
+static inline uint32_t pcie_written_val_long(uint32_t addr, uint32_t val,
+                                             uint32_t pos)
+{
+    if (addr >= pos) {
+        val <<= addr - pos;
+    } else {
+        val >>= pos - addr;
+    }
+    return val;
+}
+
+static inline uint16_t pcie_written_val_word(uint32_t addr, uint32_t val,
+                                             uint32_t pos)
+{
+    return pcie_written_val_long(addr, val, pos) & 0xffff;
+}
+
+/*
+ * RW1C: Write-1-to-clear
+ * regiger      written val        result
+ * 0            0               => 0
+ * 1            0               => 1
+ * 0            1               => 0
+ * 1            1               => 0
+ */
+static inline void pcie_w1c_long(PCIDevice *d, uint32_t pos, uint32_t mask,
+                                 uint32_t addr, uint32_t val)
+{
+    uint32_t written = pcie_written_val_long(addr, val, pos) & mask;
+    uint32_t reg = pci_get_long(d->config + pos);
+    reg &= ~written;
+    pci_set_long(d->config + pos, reg);
+}
+
+static inline void pcie_w1c_word(PCIDevice *d, uint32_t pos, uint16_t mask,
+                                 uint32_t addr, uint32_t val)
+{
+    uint16_t written = pcie_written_val_word(addr, val, pos) & mask;
+    uint16_t reg = pci_get_word(d->config + pos);
+    reg &= ~written;
+    pci_set_word(d->config + pos, reg);
+}
+
+int pci_pcie_cap_init(PCIDevice *dev,
+                      uint8_t offset, uint8_t type, uint8_t port)
+{
+    int exp_cap;
+    uint8_t *pcie_cap;
+
+    assert(pci_is_express(dev));
+    dev->exp = qemu_mallocz(sizeof(*dev->exp));
+
+    exp_cap = pci_add_capability(dev, PCI_CAP_ID_EXP, offset,
+                                 PCI_EXP_VER2_SIZEOF);
+    if (exp_cap < 0) {
+        qemu_free(dev->exp);
+        dev->exp = NULL;
+        return exp_cap;
+    }
+    dev->exp->exp_cap = exp_cap;
+    /* dev->cap_present |= QEMU_PCI_CAP_EXPRESS; */ /* already done in pci_qdev_init() */
+
+    pcie_cap = dev->config + pci_pcie_cap(dev);
+
+    /* capability register
+       interrupt message number defaults to 0 */
+    pci_set_word(pcie_cap + PCI_EXP_FLAGS,
+                 ((type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE) |
+                 PCI_EXP_FLAGS_VER2);
+
+    /* device capability register
+     * table 7-12:
+     * roll based error reporting bit must be set by all
+     * Functions conforming to the ECN, PCI Express Base
+     * Specification, Revision 1.1., or subsequent PCI Express Base
+     * Specification revisions.
+     */
+    pci_set_long(pcie_cap + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER);
+
+    pci_set_long(pcie_cap + PCI_EXP_LNKCAP,
+                 PCI_EXP_LNKCAP_PN_REG(port) |
+                 PCI_EXP_LNKCAP_ASPMS_0S |
+                 PCI_EXP_LNK_MLW_1 |
+                 PCI_EXP_LNK_LS_25);
+
+    pci_set_word(pcie_cap + PCI_EXP_LNKSTA,
+                 PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25);
+
+    pci_set_long(pcie_cap + PCI_EXP_DEVCAP2,
+                 PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);
+
+    pci_set_word(dev->wmask + exp_cap, PCI_EXP_DEVCTL2_EETLPPB);
+    return exp_cap;
+}
+
+int pci_pcie_cap_exit(PCIDevice *dev)
+{
+    /* pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER2_SIZEOF); */
+    qemu_free(dev->exp);
+    return 0;
+}
+
+uint8_t pcie_cap_get_type(const PCIDevice *dev)
+{
+    uint32_t pos = pci_pcie_cap(dev);
+    assert(pos > 0);
+    return (pci_get_word(dev->config + pos + PCI_EXP_FLAGS) &
+            PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT;
+}
+
+/* MSI/MSI-X */
+/* pci express interrupt message number */
+void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector)
+{
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+    uint16_t tmp;
+
+    assert(vector <= 32);
+    tmp = pci_get_word(pcie_cap + PCI_EXP_FLAGS);
+    tmp &= ~PCI_EXP_FLAGS_IRQ;
+    tmp |= PCI_EXP_FLAGS_IRQ_REG(vector);
+    pci_set_word(pcie_cap + PCI_EXP_FLAGS, tmp);
+}
+
+uint8_t pcie_cap_flags_get_vector(PCIDevice *dev)
+{
+    return (pci_get_word(dev->config + pci_pcie_cap(dev) + PCI_EXP_FLAGS) &
+            PCI_EXP_FLAGS_IRQ) >> PCI_EXP_FLAGS_IRQ_SHIFT;
+}
+
+static void pcie_cap_notify(PCIDevice *dev, bool trigger, int level)
+{
+    pcie_notify(dev, pcie_cap_flags_get_vector(dev), trigger, level);
+}
+
+void pcie_cap_deverr_init(PCIDevice *dev)
+{
+    uint32_t pos = pci_pcie_cap(dev);
+    uint8_t *pcie_cap = dev->config + pos;
+    uint8_t *pcie_wmask = dev->wmask + pos;
+
+    pci_set_long(pcie_cap + PCI_EXP_DEVCAP,
+                 pci_get_long(pcie_cap + PCI_EXP_DEVCAP) |
+                 PCI_EXP_DEVCAP_RBER);
+
+    pci_set_long(pcie_wmask + PCI_EXP_DEVCTL,
+                 pci_get_long(pcie_wmask + PCI_EXP_DEVCTL) |
+                 PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
+                 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
+}
+
+void pcie_cap_deverr_reset(PCIDevice *dev)
+{
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+    pci_set_long(pcie_cap + PCI_EXP_DEVCTL,
+                 pci_get_long(pcie_cap + PCI_EXP_DEVCTL) &
+                 ~(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
+                   PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE));
+}
+
+void pcie_cap_deverr_write_config(PCIDevice *dev,
+                                  uint32_t addr, uint32_t val, int len)
+{
+    uint32_t pos = pci_pcie_cap(dev);
+    if (ranges_overlap(addr, len, pos + PCI_EXP_DEVSTA, 4)) {
+        /* RW1C */
+        pcie_w1c_long(dev, pos + PCI_EXP_DEVSTA,
+                      PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED |
+                      PCI_EXP_DEVSTA_URD | PCI_EXP_DEVSTA_URD,
+                      addr, val);
+    }
+}
+
+/*
+ * events: PCI_EXP_HP_EV_xxx
+ * status: bit or of PCI_EXP_SLTSTA_xxx
+ */
+static void pcie_cap_slot_event(PCIDevice *dev,
+                                enum PCIExpressHotPlugEvent events,
+                                uint16_t status)
+{
+    bool trigger = false;
+    int level = 0;
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+    uint16_t sltctl = pci_get_word(pcie_cap + PCI_EXP_SLTCTL);
+    uint16_t sltsta = pci_get_word(pcie_cap + PCI_EXP_SLTSTA);
+
+    PCIE_DEV_PRINTF(dev,
+                    "sltctl: 0x%0x2 sltsta: 0x%02x event:%x %s status:%d\n",
+                    sltctl, sltsta,
+                    events, pcie_hp_event_name(events), status);
+    events &= PCI_EXP_HP_EV_SUPPORTED;
+    if ((sltctl & PCI_EXP_SLTCTL_HPIE) && (sltctl & events) &&
+        ((sltsta ^ events) & events) /* 0 -> 1 */) {
+        trigger = true;
+    }
+
+    if (events & PCI_EXP_HP_EV_PDC) {
+        sltsta &= ~PCI_EXP_SLTSTA_PDS;
+        sltsta |= (status & PCI_EXP_SLTSTA_PDS);
+    }
+    sltsta |= events;
+    pci_set_word(pcie_cap + PCI_EXP_SLTSTA, sltsta);
+    PCIE_DEV_PRINTF(dev, "sltsta -> %02xn", sltsta);
+
+    if ((sltctl & PCI_EXP_SLTCTL_HPIE) && (sltsta & PCI_EXP_HP_EV_SUPPORTED)) {
+        level = 1;
+    }
+
+    pcie_cap_notify(dev, trigger, level);
+}
+
+static int pcie_cap_slot_hotplug(DeviceState *qdev,
+                                 PCIDevice *pci_dev, int state)
+{
+    PCIDevice *d = DO_UPCAST(PCIDevice, qdev, qdev);
+    uint8_t *pcie_cap = d->config + pci_pcie_cap(d);
+    uint16_t sltsta = pci_get_word(pcie_cap + PCI_EXP_SLTSTA);
+
+    if (!pci_dev->qdev.hotplugged) {
+        assert(state); /* this case only happens machine creation. */
+        sltsta |= PCI_EXP_SLTSTA_PDS;
+        pci_set_word(pcie_cap + PCI_EXP_SLTSTA, sltsta);
+        return 0;
+    }
+
+    PCIE_DEV_PRINTF(pci_dev, "hotplug state: %d\n", state);
+    if (sltsta & PCI_EXP_SLTSTA_EIS) {
+        /* the slot is electromechanically locked. */
+        return -EBUSY;
+    }
+
+    if (state) {
+        if (PCI_FUNC(pci_dev->devfn) == 0) {
+            /* event is per slot. Not per function
+             * only generates event for function = 0.
+             * When hot plug, populate functions > 0
+             * and then add function = 0 last.
+             */
+            pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC, PCI_EXP_SLTSTA_PDS);
+        }
+    } else {
+        PCIBridge *br;
+        PCIBus *bus;
+        DeviceState *next;
+        if (PCI_FUNC(pci_dev->devfn) != 0) {
+            /* event is per slot. Not per function.
+               accepts function = 0 only. */
+            return -EINVAL;
+        }
+
+        /* zap all functions. */
+        br = DO_UPCAST(PCIBridge, dev, d);
+        bus = pci_bridge_get_sec_bus(br);
+        QLIST_FOREACH_SAFE(qdev, &bus->qbus.children, sibling, next) {
+            qdev_free(qdev);
+        }
+
+        pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC, 0);
+    }
+    return 0;
+}
+
+/* pci express slot for pci express root/downstream port
+   PCI express capability slot registers */
+void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot)
+{
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+    uint8_t *pcie_wmask = dev->wmask + pci_pcie_cap(dev);
+    uint32_t tmp;
+
+    pci_set_word(pcie_cap + PCI_EXP_FLAGS,
+                 pci_get_word(pcie_cap + PCI_EXP_FLAGS) | PCI_EXP_FLAGS_SLOT);
+
+    tmp = pci_get_long(pcie_cap + PCI_EXP_SLTCAP);
+    tmp &= PCI_EXP_SLTCAP_PSN;
+    tmp |=
+        PCI_EXP_SLTCAP_PSN_REG(slot) |
+        PCI_EXP_SLTCAP_EIP |
+        PCI_EXP_SLTCAP_HPS |
+        PCI_EXP_SLTCAP_HPC |
+        PCI_EXP_SLTCAP_PIP |
+        PCI_EXP_SLTCAP_AIP |
+        PCI_EXP_SLTCAP_ABP;
+    pci_set_long(pcie_cap + PCI_EXP_SLTCAP, tmp);
+
+    tmp = pci_get_word(pcie_cap + PCI_EXP_SLTCTL);
+    tmp &= ~(PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC);
+    tmp |= PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF;
+    pci_set_word(pcie_cap + PCI_EXP_SLTCTL, tmp);
+    pci_set_word(pcie_wmask + PCI_EXP_SLTCTL,
+                 pci_get_word(pcie_wmask + PCI_EXP_SLTCTL) |
+                 PCI_EXP_SLTCTL_PIC |
+                 PCI_EXP_SLTCTL_AIC |
+                 PCI_EXP_SLTCTL_HPIE |
+                 PCI_EXP_SLTCTL_CCIE |
+                 PCI_EXP_SLTCTL_PDCE |
+                 PCI_EXP_SLTCTL_ABPE);
+
+    pci_bus_hotplug(pci_bridge_get_sec_bus(DO_UPCAST(PCIBridge, dev, dev)),
+                    pcie_cap_slot_hotplug, &dev->qdev);
+}
+
+void pcie_cap_slot_reset(PCIDevice *dev)
+{
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+    uint32_t tmp;
+
+    PCIE_DEV_PRINTF(dev, "reset\n");
+
+    tmp = pci_get_word(pcie_cap + PCI_EXP_SLTCTL);
+    tmp &= ~(PCI_EXP_SLTCTL_EIC |
+             PCI_EXP_SLTCTL_PIC |
+             PCI_EXP_SLTCTL_AIC |
+             PCI_EXP_SLTCTL_HPIE |
+             PCI_EXP_SLTCTL_CCIE |
+             PCI_EXP_SLTCTL_PDCE |
+             PCI_EXP_SLTCTL_ABPE);
+    tmp |= PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF;
+    pci_set_word(pcie_cap + PCI_EXP_SLTCTL, tmp);
+
+    tmp = pci_get_word(pcie_cap + PCI_EXP_SLTSTA);
+    tmp &= ~(PCI_EXP_SLTSTA_EIS | /* by reset, the lock is released */
+             PCI_EXP_SLTSTA_CC |
+             PCI_EXP_SLTSTA_PDC |
+             PCI_EXP_SLTSTA_ABP);
+    pci_set_word(pcie_cap + PCI_EXP_SLTSTA, tmp);
+}
+
+void pcie_cap_slot_write_config(PCIDevice *dev,
+                                uint32_t addr, uint32_t val, int len,
+                                uint16_t sltctl_prev)
+{
+    uint32_t pos = pci_pcie_cap(dev);
+    uint8_t *pcie_cap = dev->config + pos;
+    uint16_t sltctl = pci_get_word(pcie_cap + PCI_EXP_SLTCTL);
+    uint16_t sltsta = pci_get_word(pcie_cap + PCI_EXP_SLTSTA);
+
+    PCIE_DEV_PRINTF(dev,
+                    "addr: 0x%x val: 0x%x len: %d\n"
+                    "\tsltctl_prev: 0x%02x sltctl: 0x%02x sltsta 0x%02x\n",
+                    addr, val, len, sltctl_prev, sltctl, sltsta);
+    /* SLTSTA: process SLTSTA before SLTCTL to avoid spurious interrupt */
+    if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
+        /* RW1C */
+        pcie_w1c_word(dev, pos + PCI_EXP_SLTSTA, PCI_EXP_HP_EV_SUPPORTED,
+                      addr, val);
+        sltsta = pci_get_word(pcie_cap + PCI_EXP_SLTSTA);
+
+        /* write to stlsta results in clearing bits,
+           so new interrupts won't be generated. */
+        PCIE_DEV_PRINTF(dev, "sltsta -> 0x%02x\n", sltsta);
+    }
+
+    /* SLTCTL */
+    if (ranges_overlap(addr, len, pos + PCI_EXP_SLTCTL, 2)) {
+        PCIE_DEV_PRINTF(dev, "sltctl: 0x%02x -> 0x%02x\n",
+                        sltctl_prev, sltctl);
+        if (pcie_written_val_word(addr, val, pos + PCI_EXP_SLTCTL) &
+            PCI_EXP_SLTCTL_EIC) {
+            /* toggle PCI_EXP_SLTSTA_EIS */
+            sltsta = (sltsta & ~PCI_EXP_SLTSTA_EIS) |
+                ((sltsta ^ PCI_EXP_SLTSTA_EIS) & PCI_EXP_SLTSTA_EIS);
+            pci_set_word(pcie_cap + PCI_EXP_SLTSTA, sltsta);
+            PCIE_DEV_PRINTF(dev, "PCI_EXP_SLTCTL_EIC: sltsta -> 0x%02x\n",
+                            sltsta);
+        }
+
+        if (sltctl & PCI_EXP_SLTCTL_HPIE) {
+            bool trigger = false;
+            int level = 0;
+
+            if (((sltctl_prev ^ sltctl) & sltctl) & PCI_EXP_HP_EV_SUPPORTED) {
+                /* 0 -> 1 */
+                trigger = true;
+            }
+            if ((sltctl & sltsta) & PCI_EXP_HP_EV_SUPPORTED) {
+                level = 1;
+            }
+            pcie_cap_notify(dev, trigger, level);
+        }
+
+        /* command completed.
+           unlike real hardware, command completes instantaneously */
+#define PCI_EXP_SLTCTL_SUPPORTED        \
+            (PCI_EXP_SLTCTL_ABPE |      \
+             PCI_EXP_SLTCTL_PDCE |      \
+             PCI_EXP_SLTCTL_CCIE |      \
+             PCI_EXP_SLTCTL_HPIE |      \
+             PCI_EXP_SLTCTL_AIC |       \
+             PCI_EXP_SLTCTL_PCC |       \
+             PCI_EXP_SLTCTL_EIC)
+        if ( 1 /* (sltctl_prev ^ sltctl) & PCI_EXP_SLTCTL_SUPPORTED */ ) {
+            /* set command completed bit */
+            pcie_cap_slot_event(dev, PCI_EXP_HP_EV_CCI, 0);
+        }
+    }
+}
+
+void pcie_cap_slot_push_attention_button(PCIDevice *dev)
+{
+    pcie_cap_slot_event(dev, PCI_EXP_HP_EV_ABP, 0);
+}
+
+/* root control/capabilities/status. PME isn't emulated for now */
+void pcie_cap_root_init(PCIDevice *dev)
+{
+    uint8_t pos = pci_pcie_cap(dev);
+    pci_set_word(dev->wmask + pos + PCI_EXP_RTCTL,
+                 PCI_EXP_RTCTL_SECEE | PCI_EXP_RTCTL_SENFEE |
+                 PCI_EXP_RTCTL_SEFEE);
+}
+
+void pcie_cap_root_reset(PCIDevice *dev)
+{
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+    pci_set_word(pcie_cap + PCI_EXP_RTCTL, 0);
+}
+
+/* function level reset(FLR) */
+void pcie_cap_flr_init(PCIDevice *dev, pcie_flr_fn flr)
+{
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+    pci_set_word(pcie_cap + PCI_EXP_DEVCAP,
+                 pci_get_word(pcie_cap + PCI_EXP_DEVCAP) | PCI_EXP_DEVCAP_FLR);
+    dev->exp->flr = flr;
+}
+
+void pcie_cap_flr_write_config(PCIDevice *dev,
+                               uint32_t addr, uint32_t val, int len)
+{
+    uint32_t pos = pci_pcie_cap(dev);
+    if (ranges_overlap(addr, len, pos + PCI_EXP_DEVCTL, 2)) {
+        uint16_t val16 = pcie_written_val_word(addr, val,
+                                               pos + PCI_EXP_DEVCTL);
+        if ((val16 & PCI_EXP_DEVCTL_BCR_FLR) && dev->exp->flr) {
+            dev->exp->flr(dev);
+        }
+    }
+}
+
+
+/* Alternative Routing-ID Interpretation (ARI) */
+/* ari forwarding support for down stream port */
+void pcie_cap_ari_init(PCIDevice *dev)
+{
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+    uint8_t *pcie_wmask = dev->wmask + pci_pcie_cap(dev);
+
+    pci_set_long(pcie_cap + PCI_EXP_DEVCAP2,
+                 pci_get_long(pcie_cap + PCI_EXP_DEVCAP2) |
+                 PCI_EXP_DEVCAP2_ARI);
+
+    pci_set_long(pcie_wmask + PCI_EXP_DEVCTL2,
+                 pci_get_long(pcie_wmask + PCI_EXP_DEVCTL2) |
+                 PCI_EXP_DEVCTL2_ARI);
+}
+
+void pcie_cap_ari_reset(PCIDevice *dev)
+{
+    uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev);
+
+    pci_set_long(pcie_cap + PCI_EXP_DEVCTL2,
+                 pci_get_long(pcie_cap + PCI_EXP_DEVCTL2) &
+                 ~PCI_EXP_DEVCTL2_ARI);
+}
+
+bool pcie_cap_is_ari_enabled(const PCIDevice *dev)
+{
+    if (!pci_is_express(dev)) {
+        return false;
+    }
+    if (!pci_pcie_cap(dev)) {
+        return false;
+    }
+
+    return pci_get_long(dev->config + pci_pcie_cap(dev) + PCI_EXP_DEVCTL2) &
+        PCI_EXP_DEVCTL2_ARI;
+}
+
+/**************************************************************************
+ * pci express extended capability allocation functions
+ * uint16_t ext_cap_id (16 bit)
+ * uint8_t cap_ver (4 bit)
+ * uint16_t cap_offset (12 bit)
+ * uint16_t ext_cap_size
+ */
+
+#define PCI_EXT_CAP_VER_SHIFT   16
+#define PCI_EXT_CAP_NEXT_MASK   0xfff00000
+#define PCI_EXT_CAP_NEXT_SHIFT  20
+
+#define PCI_EXT_CAP(id, ver, next) ((id) | ((ver) << PCI_EXT_CAP_VER_SHIFT) | ((next) << PCI_EXT_CAP_NEXT_SHIFT))
+
+#define PCI_EXT_CAP_ALIGN       4
+#define PCI_EXT_CAP_ALIGNUP(x)  (((x) + PCI_EXT_CAP_ALIGN - 1) & ~(PCI_EXT_CAP_ALIGN - 1))
+
+static int16_t pcie_ext_cap_find_space(PCIDevice *dev, uint16_t size)
+{
+    uint16_t offset = PCI_CONFIG_SPACE_SIZE;
+    uint16_t i = offset;
+
+    while (i < PCIE_CONFIG_SPACE_SIZE - size) {
+        if (dev->used[i]) {
+            offset = PCI_EXT_CAP_ALIGNUP(i + 1);
+            i = offset;
+            continue;
+        } else if (i - offset + 1 == size) {
+            return offset;
+        }
+
+        ++i;
+    }
+
+    return 0;
+}
+
+static uint16_t pcie_find_ext_capability_list(PCIDevice *dev, uint16_t cap_id,
+                                              uint16_t *prev_p)
+{
+    int ttl;
+
+    uint16_t prev = 0;
+    uint16_t next = PCI_CONFIG_SPACE_SIZE;
+    uint32_t header = pci_get_long(dev->config + next);
+
+    if (!header) {
+        return 0;
+    }
+
+    /* minimum 8 bytes per capability */
+    ttl = (PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / 8;
+
+    while (ttl-- > 0) {
+        if (PCI_EXT_CAP_ID(header) == cap_id) {
+            break;
+        }
+
+        prev = next;
+        next = PCI_EXT_CAP_NEXT(header);
+        if (next < PCI_CONFIG_SPACE_SIZE) {
+            return 0;
+        }
+        header = pci_get_long(dev->config + prev);
+    }
+
+    if (!ttl) {
+        return 0;
+    }
+    if (prev_p) {
+        *prev_p = prev;
+    }
+    return next;
+}
+
+uint16_t pcie_find_ext_capability(PCIDevice *dev, uint16_t cap_id)
+{
+    return pcie_find_ext_capability_list(dev, cap_id, NULL);
+}
+
+static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t next)
+{
+    uint16_t header = pci_get_long(dev->config + pos);
+    assert(!(next & (PCI_EXT_CAP_ALIGN - 1)));
+    header = (header & ~PCI_EXT_CAP_NEXT_MASK) |
+        ((next << PCI_EXT_CAP_NEXT_SHIFT) & PCI_EXT_CAP_NEXT_MASK);
+    pci_set_long(dev->config + pos, header);
+}
+
+static void pcie_allocate_ext_capability(PCIDevice *dev,
+                                         uint16_t cap_id, uint8_t cap_ver,
+                                         uint16_t offset, uint16_t size)
+{
+    uint32_t header;
+    uint16_t next;
+
+    assert(offset < offset + size);
+    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
+    assert(size >= 8);
+
+    if (offset == PCI_CONFIG_SPACE_SIZE) {
+        header = pci_get_long(dev->config + offset);
+        next = PCI_EXT_CAP_NEXT(header);
+    } else {
+        /* find last ext cap */
+        int ttl = (PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / 8;
+        uint16_t pos = PCI_CONFIG_SPACE_SIZE;
+        while (ttl-- > 0) {
+            header = pci_get_long(dev->config + pos);
+            if (PCI_EXT_CAP_NEXT(header) < PCI_CONFIG_SPACE_SIZE) {
+                break;
+            }
+
+            pos = PCI_EXT_CAP_NEXT(header);
+        }
+
+        assert(ttl > 0); /* since it is known that [offset, offset + size]
+                            is unused, so ttl shouldn't be zero */
+        pcie_ext_cap_set_next(dev, pos, offset);
+        next = 0;
+    }
+    pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next));
+
+    memset(dev->used + offset, 0xFF, size);
+    /* Make capability read-only by default */
+    memset(dev->wmask + offset, 0, size);
+    /* Check capability by default */
+    memset(dev->cmask + offset, 0xFF, size);
+}
+
+int pcie_add_ext_capability(PCIDevice *dev,
+                            uint16_t cap_id, uint8_t cap_ver, uint16_t size)
+{
+    uint16_t offset = pcie_ext_cap_find_space(dev, size);
+
+    if (!offset) {
+        return -ENOSPC;
+    }
+
+    pcie_allocate_ext_capability(dev, cap_id, cap_ver, offset, size);
+    return offset;
+}
+
+int pcie_append_ext_capability(PCIDevice *dev,
+                               uint16_t cap_id, uint8_t cap_ver,
+                               uint16_t offset, uint16_t size)
+{
+    uint16_t i;
+
+    if (!offset) {
+        return pcie_add_ext_capability(dev, cap_id, cap_ver, size);
+    }
+
+    assert(offset < offset + size);
+    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
+    assert(size >= 8);
+
+    for (i = offset; i < offset + size; ++i) {
+        if (dev->used[i]) {
+            return -EBUSY;
+        }
+    }
+
+    pcie_allocate_ext_capability(dev, cap_id, cap_ver, offset, size);
+    return offset;
+}
+
+void pcie_del_ext_capability(PCIDevice *dev, uint16_t cap_id, uint16_t size)
+{
+    uint16_t prev;
+    uint16_t offset = pcie_find_ext_capability_list(dev, cap_id, &prev);
+    uint32_t header;
+
+    if (!offset) {
+        return;
+    }
+
+    header = pci_get_long(dev->config + offset);
+    if (prev) {
+        pcie_ext_cap_set_next(dev, prev, PCI_EXT_CAP_NEXT(header));
+    } else {
+        /* move up next ext cap to PCI_CONFIG_SPACE_SIZE? */
+        assert(offset == PCI_CONFIG_SPACE_SIZE);
+        pci_set_long(dev->config + offset,
+                     PCI_EXT_CAP(0, 0, PCI_EXT_CAP_NEXT(header)));
+    }
+
+    /* Make capability writeable again */
+    memset(dev->wmask + offset, 0xff, size);
+    /* Clear cmask as device-specific registers can't be checked */
+    memset(dev->cmask + offset, 0, size);
+    memset(dev->used + offset, 0, size);
+}
+
+void pcie_reserve_ext_capability(PCIDevice *dev,
+                                 uint16_t offset, uint16_t size)
+{
+    memset(dev->used + offset, 0xff, size);
+}
+
+/**************************************************************************
+ * pci express extended capability helper functions
+ */
+
+/* ARI */
+#define PCI_ARI_VER     1
+#define PCI_ARI_SIZEOF  8
+
+int pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn)
+{
+    int pos;
+    pos = pcie_append_ext_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
+                                     offset, PCI_ARI_SIZEOF);
+    if (pos < 0) {
+        return pos;
+    }
+
+    pci_set_long(dev->config + pos + PCI_ARI_CAP, PCI_ARI_CAP_NFN(nextfn));
+    return pos;
+}
+
+/* AER */
+#define PCI_ERR_VER                     2
+#define PCI_ERR_SIZEOF                  0x48
+
+#define PCI_ERR_UNC_SDN                 0x00000020      /* surprise down */
+#define PCI_ERR_UNC_ACSV                0x00200000      /* ACS Violation */
+#define PCI_ERR_UNC_INTN                0x00400000      /* Internal Error */
+#define PCI_ERR_UNC_MCBTLP              0x00800000      /* MC Blcoked TLP */
+#define PCI_ERR_UNC_ATOP_EBLOCKED       0x01000000      /* atomic op egress blocked */
+#define PCI_ERR_UNC_TLP_PRF_BLOCKED     0x02000000      /* TLP Prefix Blocked */
+#define PCI_ERR_UNC_SUPPORTED           (PCI_ERR_UNC_DLP |              \
+                                         PCI_ERR_UNC_SDN |              \
+                                         PCI_ERR_UNC_POISON_TLP |       \
+                                         PCI_ERR_UNC_FCP |              \
+                                         PCI_ERR_UNC_COMP_TIME |        \
+                                         PCI_ERR_UNC_COMP_ABORT |       \
+                                         PCI_ERR_UNC_UNX_COMP |         \
+                                         PCI_ERR_UNC_RX_OVER |          \
+                                         PCI_ERR_UNC_MALF_TLP |         \
+                                         PCI_ERR_UNC_ECRC |             \
+                                         PCI_ERR_UNC_UNSUP |            \
+                                         PCI_ERR_UNC_ACSV |             \
+                                         PCI_ERR_UNC_INTN |             \
+                                         PCI_ERR_UNC_MCBTLP |           \
+                                         PCI_ERR_UNC_ATOP_EBLOCKED |    \
+                                         PCI_ERR_UNC_TLP_PRF_BLOCKED)
+
+#define PCI_ERR_UNC_SEVERITY_DEFAULT    (PCI_ERR_UNC_DLP |              \
+                                         PCI_ERR_UNC_SDN |              \
+                                         PCI_ERR_UNC_FCP |              \
+                                         PCI_ERR_UNC_RX_OVER |          \
+                                         PCI_ERR_UNC_MALF_TLP |         \
+                                         PCI_ERR_UNC_INTN)
+
+#define PCI_ERR_COR_ADV_NONFATAL        0x00002000      /* Advisory Non-Fatal */
+#define PCI_ERR_COR_INTERNAL            0x00004000      /* Corrected Internal */
+#define PCI_ERR_COR_HL_OVERFLOW         0x00008000      /* Header Long Overflow */
+#define PCI_ERR_COR_SUPPORTED           (PCI_ERR_COR_RCVR |             \
+                                         PCI_ERR_COR_BAD_TLP |          \
+                                         PCI_ERR_COR_BAD_DLLP |         \
+                                         PCI_ERR_COR_REP_ROLL |         \
+                                         PCI_ERR_COR_REP_TIMER |        \
+                                         PCI_ERR_COR_ADV_NONFATAL |     \
+                                         PCI_ERR_COR_INTERNAL |         \
+                                         PCI_ERR_COR_HL_OVERFLOW)
+#define PCI_ERR_COR_MASK_DEFAULT        (PCI_ERR_COR_ADV_NONFATAL |     \
+                                         PCI_ERR_COR_INTERNAL |         \
+                                         PCI_ERR_COR_HL_OVERFLOW)
+
+
+#define PCI_ERR_CAP_FEP_MASK            0x0000001f
+#define PCI_ERR_CAP_MHRC                0x00000200
+#define PCI_ERR_CAP_MHRE                0x00000400
+#define PCI_ERR_CAP_TLP                 0x00000800
+
+#define PCI_ERR_TLP_PREFIX_LOG          0x38
+
+/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
+static enum PCIE_AER_SEVERITY pcie_aer_uncor_default_severity(uint32_t status)
+{
+    switch (status) {
+    case PCI_ERR_UNC_INTN:
+    case PCI_ERR_UNC_DLP:
+    case PCI_ERR_UNC_SDN:
+    case PCI_ERR_UNC_RX_OVER:
+    case PCI_ERR_UNC_FCP:
+    case PCI_ERR_UNC_MALF_TLP:
+        return AER_ERR_FATAL;
+    case PCI_ERR_UNC_POISON_TLP:
+    case PCI_ERR_UNC_ECRC:
+    case PCI_ERR_UNC_UNSUP:
+    case PCI_ERR_UNC_COMP_TIME:
+    case PCI_ERR_UNC_COMP_ABORT:
+    case PCI_ERR_UNC_UNX_COMP:
+    case PCI_ERR_UNC_ACSV:
+    case PCI_ERR_UNC_MCBTLP:
+    case PCI_ERR_UNC_ATOP_EBLOCKED:
+    case PCI_ERR_UNC_TLP_PRF_BLOCKED:
+        return AER_ERR_NONFATAL;
+    default:
+        break;
+    }
+    abort();
+    return AER_ERR_FATAL;
+}
+
+static uint32_t pcie_aer_log_next(uint32_t i, uint32_t max)
+{
+    return (i + 1) % max;
+}
+
+static bool pcie_aer_log_empty_index(uint32_t producer, uint32_t consumer)
+{
+    return producer == consumer;
+}
+
+static bool pcie_aer_log_empty(struct pcie_aer_log *aer_log)
+{
+    return pcie_aer_log_empty_index(aer_log->producer, aer_log->consumer);
+}
+
+static bool pcie_aer_log_full(struct pcie_aer_log *aer_log)
+{
+    return pcie_aer_log_next(aer_log->producer, aer_log->log_max) ==
+        aer_log->consumer;
+}
+
+static uint32_t pcie_aer_log_add(struct pcie_aer_log *aer_log)
+{
+    uint32_t i = aer_log->producer;
+    aer_log->producer = pcie_aer_log_next(aer_log->producer, aer_log->log_max);
+    return i;
+}
+
+static uint32_t pcie_aer_log_del(struct pcie_aer_log *aer_log)
+{
+    uint32_t i = aer_log->consumer;
+    aer_log->consumer = pcie_aer_log_next(aer_log->consumer, aer_log->log_max);
+    return i;
+}
+
+static int pcie_aer_log_add_err(struct pcie_aer_log *aer_log,
+                                const struct pcie_aer_err *err)
+{
+    uint32_t i;
+    if (pcie_aer_log_full(aer_log)) {
+        return -1;
+    }
+    i = pcie_aer_log_add(aer_log);
+    memcpy(&aer_log->log[i], err, sizeof(*err));
+    return 0;
+}
+
+static const struct pcie_aer_err*
+pcie_aer_log_del_err(struct pcie_aer_log *aer_log)
+{
+    uint32_t i;
+    assert(!pcie_aer_log_empty(aer_log));
+    i = pcie_aer_log_del(aer_log);
+    return &aer_log->log[i];
+}
+
+static void pcie_aer_log_clear_all_err(struct pcie_aer_log *aer_log)
+{
+    aer_log->producer = 0;
+    aer_log->consumer = 0;
+}
+
+int pcie_aer_init(PCIDevice *dev, uint16_t offset)
+{
+    int pos;
+    PCIExpressDevice *exp;
+
+    pci_set_word(dev->wmask + PCI_COMMAND,
+                 pci_get_word(dev->wmask + PCI_COMMAND) | PCI_COMMAND_SERR);
+
+    pos = pcie_append_ext_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
+                                     offset, PCI_ERR_SIZEOF);
+    if (pos < 0) {
+        return pos;
+    }
+    exp = dev->exp;
+    exp->aer_cap = pos;
+    if (dev->aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
+        dev->aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    }
+    if (dev->aer_log.log_max > PCIE_AER_LOG_MAX_MAX) {
+        dev->aer_log.log_max = PCIE_AER_LOG_MAX_MAX;
+    }
+    dev->aer_log.log =
+        qemu_mallocz(sizeof(dev->aer_log.log[0]) * dev->aer_log.log_max);
+
+    pci_set_long(dev->wmask + pos + PCI_ERR_UNCOR_MASK,
+                 PCI_ERR_UNC_SUPPORTED);
+
+    pci_set_long(dev->config + pos + PCI_ERR_UNCOR_SEVER,
+                 PCI_ERR_UNC_SEVERITY_DEFAULT);
+    pci_set_long(dev->wmask + pos + PCI_ERR_UNCOR_SEVER,
+                 PCI_ERR_UNC_SUPPORTED);
+
+    pci_set_long(dev->config + pos + PCI_ERR_COR_MASK,
+                 PCI_ERR_COR_MASK_DEFAULT);
+    pci_set_long(dev->wmask + pos + PCI_ERR_COR_MASK,
+                 PCI_ERR_COR_SUPPORTED);
+
+    /* capabilities and control. multiple header logging is supported */
+    if (dev->aer_log.log_max > 0) {
+        pci_set_long(dev->config + pos + PCI_ERR_CAP,
+                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
+                     PCI_ERR_CAP_MHRC);
+        pci_set_long(dev->wmask + pos + PCI_ERR_CAP,
+                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
+                     PCI_ERR_CAP_MHRE);
+    } else {
+        pci_set_long(dev->config + pos + PCI_ERR_CAP,
+                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
+        pci_set_long(dev->wmask + pos + PCI_ERR_CAP,
+                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+    }
+
+    switch (pcie_cap_get_type(dev)) {
+    case PCI_EXP_TYPE_ROOT_PORT:
+        /* this case will be set by pcie_aer_root_init() */
+        /* fallthrough */
+    case PCI_EXP_TYPE_DOWNSTREAM:
+    case PCI_EXP_TYPE_UPSTREAM:
+        pci_set_word(dev->wmask + PCI_BRIDGE_CONTROL,
+                     pci_get_word(dev->wmask + PCI_BRIDGE_CONTROL) |
+                     PCI_BRIDGE_CTL_SERR);
+        exp->aer_errmsg = pcie_aer_errmsg_vbridge;
+        break;
+    default:
+        exp->aer_errmsg = pcie_aer_errmsg_alldev;
+        break;
+    }
+    return pos;
+}
+
+void pcie_aer_exit(PCIDevice *dev)
+{
+    pci_del_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_SIZEOF);
+    qemu_free(dev->aer_log.log);
+}
+
+/* Multiple Header recording isn't implemented. Is it wanted? */
+void pcie_aer_write_config(PCIDevice *dev,
+                           uint32_t addr, uint32_t val, int len)
+{
+    uint32_t pos = dev->exp->aer_cap;
+
+    /* PCI_STATUS_SIG_SYSTEM_ERROR */
+    if (ranges_overlap(addr, len, PCI_STATUS, 2)) {
+        pcie_w1c_word(dev, PCI_STATUS, PCI_STATUS_SIG_SYSTEM_ERROR, addr, val);
+    }
+
+    /* uncorrectable */
+    if (ranges_overlap(addr, len, pos + PCI_ERR_UNCOR_STATUS, 4)) {
+        uint32_t written =
+            pcie_written_val_long(addr, val, pos + PCI_ERR_UNCOR_STATUS) &
+            PCI_ERR_UNC_SUPPORTED;
+        uint32_t uncorsta =
+            pci_get_long(dev->config + pos + PCI_ERR_UNCOR_STATUS);
+        uint32_t errcap = pci_get_long(dev->config + pos + PCI_ERR_CAP);
+        uint32_t first_error = (1 << PCI_ERR_CAP_FEP(errcap));
+
+        if ((uncorsta & first_error) && (written & first_error)) {
+            pcie_aer_clear_error(dev);
+        }
+        if (!(errcap & PCI_ERR_CAP_MHRE)) {
+            /* RW1CS */
+            pcie_w1c_long(dev, pos + PCI_ERR_UNCOR_STATUS,
+                          PCI_ERR_UNC_SUPPORTED, addr, val);
+        }
+    }
+
+    /* correctable */
+    if (ranges_overlap(addr, len, pos + PCI_ERR_COR_STATUS, 4)) {
+        /* RW1CS */
+        pcie_w1c_long(dev, pos + PCI_ERR_COR_STATUS, PCI_ERR_COR_SUPPORTED,
+                      addr, val);
+    }
+
+    /* capability & control */
+    if (ranges_overlap(addr, len, pos + PCI_ERR_CAP, 4)) {
+        uint32_t err_cap = pci_get_long(dev->config + pos + PCI_ERR_CAP);
+        if (!(err_cap & PCI_ERR_CAP_MHRE)) {
+            pcie_aer_log_clear_all_err(&dev->aer_log);
+        }
+    }
+}
+
+#define PCI_SEC_STATUS_RCV_SYSTEM_ERROR         0x4000
+
+void pcie_aer_write_config_vbridge(PCIDevice *dev,
+                                   uint32_t addr, uint32_t val, int len)
+{
+    /* PCI_SEC_STATUS_RCV_SYSTEM_ERROR */
+    if (ranges_overlap(addr, len, PCI_STATUS, 2)) {
+        pcie_w1c_word(dev, PCI_SEC_STATUS, PCI_SEC_STATUS_RCV_SYSTEM_ERROR,
+                      addr, val);
+    }
+}
+
+static inline void pcie_aer_errmsg(PCIDevice *dev,
+                                   const struct pcie_aer_err_msg *msg)
+{
+    assert(dev->exp);
+    assert(dev->exp->aer_errmsg);
+    dev->exp->aer_errmsg(dev, msg);
+}
+
+static AER_ERR_MSG_RESULT
+pcie_aer_errmsg_alldev(PCIDevice *dev, const struct pcie_aer_err_msg *msg)
+{
+    uint16_t cmd = pci_get_word(dev->config + PCI_COMMAND);
+    bool transmit1 =
+        pcie_aer_err_msg_is_uncor(msg) && (cmd & PCI_COMMAND_SERR);
+    uint32_t pos = pci_pcie_cap(dev);
+    uint32_t devctl = pci_get_word(dev->config + pos + PCI_EXP_DEVCTL);
+    bool transmit2 = msg->severity & devctl;
+    PCIDevice *parent_port;
+
+    if (transmit1) {
+        if (pcie_aer_err_msg_is_uncor(msg)) {
+            /* Signaled System Error */
+            uint8_t *status = dev->config + PCI_STATUS;
+            pci_set_word(status,
+                         pci_get_word(status) | PCI_STATUS_SIG_SYSTEM_ERROR);
+        }
+    }
+
+    if (!(transmit1 || transmit2)) {
+        return AER_ERR_MSG_MASKED;
+    }
+
+    /* send up error message */
+    if (pci_is_express(dev) &&
+        pcie_cap_get_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
+        /* Root port notify system itself,
+           or send the error message to root complex event collector. */
+        /*
+         * if root port is associated to event collector, set
+         * parent_port = root complex event collector
+         * For now root complex event collector isn't supported.
+         */
+        parent_port = NULL;
+    } else {
+        parent_port = pci_bridge_get_device(dev->bus);
+    }
+    if (parent_port) {
+        if (!pci_is_express(parent_port)) {
+            /* What to do? */
+            return AER_ERR_MSG_MASKED;
+        }
+        pcie_aer_errmsg(parent_port, msg);
+    }
+    return AER_ERR_MSG_SENT;
+}
+
+static AER_ERR_MSG_RESULT
+pcie_aer_errmsg_vbridge(PCIDevice *dev, const struct pcie_aer_err_msg *msg)
+{
+    uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
+
+    if (pcie_aer_err_msg_is_uncor(msg)) {
+        /* Received System Error */
+        uint8_t *sec_status = dev->config + PCI_SEC_STATUS;
+        pci_set_word(sec_status,
+                     pci_get_word(sec_status) |
+                     PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
+    }
+
+    if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
+        return AER_ERR_MSG_MASKED;
+    }
+    return pcie_aer_errmsg_alldev(dev, msg);
+}
+
+static AER_ERR_MSG_RESULT
+pcie_aer_errmsg_root_port(PCIDevice *dev, const struct pcie_aer_err_msg *msg)
+{
+    AER_ERR_MSG_RESULT ret;
+    uint16_t cmd;
+    uint8_t *aer_cap;
+    uint32_t root_cmd;
+    uint32_t root_sta;
+    bool trigger;
+
+    ret = pcie_aer_errmsg_vbridge(dev, msg);
+    if (ret != AER_ERR_MSG_SENT) {
+        return ret;
+    }
+
+    ret = AER_ERR_MSG_MASKED;
+    cmd = pci_get_word(dev->config + PCI_COMMAND);
+    aer_cap = dev->config + pcie_aer_cap(dev);
+    root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
+    root_sta = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
+    trigger = false;
+
+    if (cmd & PCI_COMMAND_SERR) {
+        /* System Error. Platform Specific */
+        /* ret = AER_ERR_MSG_SENT; */
+    }
+
+    /* Errro Message Received: Root Error Status register */
+    switch (msg->severity) {
+    case AER_ERR_COR:
+        if (root_sta & PCI_ERR_ROOT_COR_RCV) {
+            root_sta |= PCI_ERR_ROOT_MULTI_COR_RCV;
+        } else {
+            if (root_cmd & PCI_ERR_ROOT_CMD_COR_EN) {
+                trigger = true;
+            }
+            pci_set_word(aer_cap + PCI_ERR_ROOT_COR_SRC, msg->source_id);
+        }
+        root_sta |= PCI_ERR_ROOT_COR_RCV;
+        break;
+    case AER_ERR_NONFATAL:
+        if (!(root_sta & PCI_ERR_ROOT_NONFATAL_RCV) &&
+            root_cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) {
+            trigger = true;
+        }
+        root_sta |= PCI_ERR_ROOT_NONFATAL_RCV;
+        break;
+    case AER_ERR_FATAL:
+        if (!(root_sta & PCI_ERR_ROOT_FATAL_RCV) &&
+            root_cmd & PCI_ERR_ROOT_CMD_FATAL_EN) {
+            trigger = true;
+        }
+        if (!(root_sta & PCI_ERR_ROOT_UNCOR_RCV)) {
+            root_sta |= PCI_ERR_ROOT_FIRST_FATAL;
+        }
+        root_sta |= PCI_ERR_ROOT_FATAL_RCV;
+        break;
+    }
+    if (pcie_aer_err_msg_is_uncor(msg)) {
+        if (root_sta & PCI_ERR_ROOT_UNCOR_RCV) {
+            root_sta |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
+        } else {
+            pci_set_word(aer_cap + PCI_ERR_ROOT_SRC, msg->source_id);
+        }
+        root_sta |= PCI_ERR_ROOT_UNCOR_RCV;
+    }
+    pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_sta);
+
+    if (root_cmd & msg->severity) {
+        /* Error Interrupt(INTx or MSI) */
+        pcie_aer_root_notify(dev, trigger, 1);
+        ret = AER_ERR_MSG_SENT;
+    }
+    return ret;
+}
+
+static void pcie_aer_update_log(PCIDevice *dev, const struct pcie_aer_err *err)
+{
+    uint8_t *aer_cap = dev->config + pcie_aer_cap(dev);
+    uint8_t first_bit = ffsl(err->status) - 1;
+    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
+    int i;
+    uint32_t dw;
+
+    errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
+    errcap |= PCI_ERR_CAP_FEP(first_bit);
+
+    if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
+        for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
+            /* 7.10.8 Header Log Register */
+            cpu_to_be32wu(&dw, err->header[i]);
+            memcpy(aer_cap + PCI_ERR_HEADER_LOG + sizeof(err->header[0]) * i,
+                   &dw, sizeof(dw));
+        }
+    } else {
+        assert(!(err->flags & PCIE_AER_ERR_TLP_PRESENT));
+        memset(aer_cap + PCI_ERR_HEADER_LOG, 0, sizeof(err->header));
+    }
+
+    if ((err->flags & PCIE_AER_ERR_TLP_PRESENT) &&
+        (pci_get_long(dev->config + pci_pcie_cap(dev) + PCI_EXP_DEVCTL2) &
+         PCI_EXP_DEVCAP2_EETLPP)) {
+        for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
+            /* 7.10.12 tlp prefix log register */
+            cpu_to_be32wu(&dw, err->prefix[i]);
+            memcpy(aer_cap + PCI_ERR_TLP_PREFIX_LOG +
+                   sizeof(err->prefix[0]) * i, &dw, sizeof(dw));
+        }
+        errcap |= PCI_ERR_CAP_TLP;
+    } else {
+        memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, sizeof(err->prefix));
+    }
+    pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
+}
+
+static void pcie_aer_clear_log(PCIDevice *dev)
+{
+    struct pcie_aer_err *err;
+    uint8_t *aer_cap = dev->config + pcie_aer_cap(dev);
+    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
+
+    errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
+    pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
+
+    memset(aer_cap + PCI_ERR_HEADER_LOG, 0, sizeof(err->header));
+    memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, sizeof(err->prefix));
+}
+
+static int pcie_aer_record_error(PCIDevice *dev,
+                                 const struct pcie_aer_err *err)
+{
+    uint8_t *aer_cap = dev->config + pcie_aer_cap(dev);
+    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
+    int fep = PCI_ERR_CAP_FEP(errcap);
+
+    if (errcap & PCI_ERR_CAP_MHRE &&
+        (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1ULL << fep))) {
+        /*  Not first error. queue error */
+        if (pcie_aer_log_add_err(&dev->aer_log, err) < 0) {
+            /* overflow */
+            return -1;
+        }
+        return 0;
+    }
+
+    pcie_aer_update_log(dev, err);
+    return 0;
+}
+
+static void pcie_aer_clear_error(PCIDevice *dev)
+{
+    uint8_t *aer_cap = dev->config + pcie_aer_cap(dev);
+    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
+    uint32_t old_err = (1UL << PCI_ERR_CAP_FEP(errcap));
+    struct pcie_aer_log *aer_log = &dev->aer_log;
+    const struct pcie_aer_err *err;
+    uint32_t consumer;
+
+    if (!(errcap & PCI_ERR_CAP_MHRE) || pcie_aer_log_empty(aer_log)) {
+        pcie_aer_clear_log(dev);
+        pci_set_long(aer_cap + PCI_ERR_UNCOR_STATUS,
+                     pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & ~old_err);
+        return;
+    }
+
+    /* if no same error is queued, clear bit in uncorrectable error status */
+    for (consumer = dev->aer_log.consumer;
+         !pcie_aer_log_empty_index(dev->aer_log.producer, consumer);
+         consumer = pcie_aer_log_next(consumer, dev->aer_log.log_max)) {
+        if (dev->aer_log.log[consumer].status & old_err) {
+            old_err = 0;
+            break;
+        }
+    }
+    if (old_err) {
+        pci_set_long(aer_cap + PCI_ERR_UNCOR_STATUS,
+                     pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & ~old_err);
+    }
+
+    err = pcie_aer_log_del_err(aer_log);
+    pcie_aer_update_log(dev, err);
+}
+
+/*
+ * non-Function specific error must be recorded in all functions.
+ * It is the responsibility of the caller of this function.
+ * It is also caller's responsiblity to determine which function should
+ * report the rerror.
+ *
+ * 6.2.4 Error Logging
+ * 6.2.5 Sqeucne of Device Error Signaling and Logging Operations
+ * table 6-2: Flowchard Showing Sequence of Device Error Signaling and Logging
+ *            Operations
+ *
+ * Although this implementation can be shortened/optimized, this is kept
+ * parallel to table 6-2.
+ */
+void pcie_aer_inject_error(PCIDevice *dev, const struct pcie_aer_err *err)
+{
+    uint8_t *exp_cap;
+    uint8_t *aer_cap = NULL;
+    uint32_t devctl = 0;
+    uint32_t devsta = 0;
+    uint32_t status = err->status;
+    uint32_t mask;
+    bool is_unsupported_request =
+        (!(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
+         err->status == PCI_ERR_UNC_UNSUP);
+    bool is_advisory_nonfatal = false;  /* for advisory non-fatal error */
+    uint32_t uncor_status = 0;          /* for advisory non-fatal error */
+    struct pcie_aer_err_msg msg;
+    int is_header_log_overflowed = 0;
+
+    if (!pci_is_express(dev)) {
+        /* What to do? */
+        return;
+    }
+
+    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
+        status &= PCI_ERR_COR_SUPPORTED;
+    } else {
+        status &= PCI_ERR_UNC_SUPPORTED;
+    }
+    if (!status || status & (status - 1)) {
+        /* invalid status bit. one and only one bit must be set */
+        return;
+    }
+
+    exp_cap = dev->config + pci_pcie_cap(dev);
+    if (dev->exp->aer_cap) {
+        aer_cap = dev->config + pcie_aer_cap(dev);
+        devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
+        devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
+    }
+    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
+    correctable_error:
+        devsta |= PCI_EXP_DEVSTA_CED;
+        if (is_unsupported_request) {
+            devsta |= PCI_EXP_DEVSTA_URD;
+        }
+        pci_set_word(exp_cap + PCI_EXP_DEVSTA, devsta);
+
+        if (aer_cap) {
+            pci_set_long(aer_cap + PCI_ERR_COR_STATUS,
+                         pci_get_long(aer_cap + PCI_ERR_COR_STATUS) | status);
+            mask = pci_get_long(aer_cap + PCI_ERR_COR_MASK);
+            if (mask & status) {
+                return;
+            }
+            if (is_advisory_nonfatal) {
+                uint32_t uncor_mask =
+                    pci_get_long(aer_cap + PCI_ERR_UNCOR_MASK);
+                if (!(uncor_mask & uncor_status)) {
+                    is_header_log_overflowed = pcie_aer_record_error(dev, err);
+                }
+                pci_set_long(aer_cap + PCI_ERR_UNCOR_STATUS,
+                             pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) |
+                             uncor_status);
+            }
+        }
+
+        if (is_unsupported_request && !(devctl & PCI_EXP_DEVCTL_URRE)) {
+            return;
+        }
+        if (!(devctl & PCI_EXP_DEVCTL_CERE)) {
+            return;
+        }
+        msg.severity = AER_ERR_COR;
+    } else {
+        bool is_fatal =
+            (pcie_aer_uncor_default_severity(status) == AER_ERR_FATAL);
+        uint16_t cmd;
+
+        if (aer_cap) {
+            is_fatal = status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+        }
+        if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
+            is_advisory_nonfatal = true;
+            uncor_status = status;
+            status = PCI_ERR_COR_ADV_NONFATAL;
+            goto correctable_error;
+        }
+        if (is_fatal) {
+            devsta |= PCI_EXP_DEVSTA_FED;
+        } else {
+            devsta |= PCI_EXP_DEVSTA_NFED;
+        }
+        if (is_unsupported_request) {
+            devsta |= PCI_EXP_DEVSTA_URD;
+        }
+        pci_set_long(exp_cap + PCI_EXP_DEVSTA, devsta);
+
+        if (aer_cap) {
+            mask = pci_get_long(aer_cap + PCI_ERR_UNCOR_MASK);
+            if (mask & status) {
+                pci_set_long(aer_cap + PCI_ERR_UNCOR_STATUS,
+                             pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) |
+                             status);
+                return;
+            }
+
+            is_header_log_overflowed = pcie_aer_record_error(dev, err);
+            pci_set_long(aer_cap + PCI_ERR_UNCOR_STATUS,
+                         pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) |
+                         status);
+        }
+
+        cmd = pci_get_word(dev->config + PCI_COMMAND);
+        if (is_unsupported_request &&
+            !(devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
+            return;
+        }
+        if (is_fatal) {
+            if (!((cmd & PCI_COMMAND_SERR) ||
+                  (devctl & PCI_EXP_DEVCTL_FERE))) {
+                return;
+            }
+            msg.severity = AER_ERR_FATAL;
+        } else {
+            if (!((cmd & PCI_COMMAND_SERR) ||
+                  (devctl & PCI_EXP_DEVCTL_NFERE))) {
+                return;
+            }
+            msg.severity = AER_ERR_NONFATAL;
+        }
+    }
+
+    /* send up error message */
+    msg.source_id = err->source_id;
+    pcie_aer_errmsg(dev, &msg);
+
+    if (is_header_log_overflowed) {
+        struct pcie_aer_err header_log_overflow = {
+            .status = PCI_ERR_COR_HL_OVERFLOW,
+            .flags = PCIE_AER_ERR_IS_CORRECTABLE,
+            .header = {0, 0, 0, 0},
+            .prefix = {0, 0, 0, 0},
+        };
+        pcie_aer_inject_error(dev, &header_log_overflow);
+    }
+}
+
+/* aer root error command/status */
+#define PCI_ERR_ROOT_CMD_EN_MASK        (PCI_ERR_ROOT_CMD_COR_EN |      \
+                                         PCI_ERR_ROOT_CMD_NONFATAL_EN | \
+                                         PCI_ERR_ROOT_CMD_FATAL_EN)
+
+#define PCI_ERR_ROOT_IRQ_SHIFT          26
+#define PCI_ERR_ROOT_IRQ                0xf8000000
+#define PCI_ERR_ROOT_STATUS_REPORT_MASK (PCI_ERR_ROOT_COR_RCV |         \
+                                         PCI_ERR_ROOT_MULTI_COR_RCV |   \
+                                         PCI_ERR_ROOT_UNCOR_RCV |       \
+                                         PCI_ERR_ROOT_MULTI_UNCOR_RCV | \
+                                         PCI_ERR_ROOT_FIRST_FATAL |     \
+                                         PCI_ERR_ROOT_NONFATAL_RCV |    \
+                                         PCI_ERR_ROOT_FATAL_RCV)
+
+void pcie_aer_root_set_vector(PCIDevice *dev, uint8_t vector)
+{
+    uint8_t *aer_cap = dev->config + pcie_aer_cap(dev);
+    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
+    root_status &= ~PCI_ERR_ROOT_IRQ;
+    root_status |=
+        (((uint32_t)vector) << PCI_ERR_ROOT_IRQ_SHIFT) & PCI_ERR_ROOT_IRQ;
+    pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
+}
+
+static uint8_t pcie_aer_root_get_vector(PCIDevice *dev)
+{
+    uint8_t *aer_cap = dev->config + pcie_aer_cap(dev);
+    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
+    return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
+}
+
+static void pcie_aer_root_notify(PCIDevice *dev, bool trigger, int level)
+{
+    pcie_notify(dev, pcie_aer_root_get_vector(dev), trigger, level);
+}
+
+void pcie_aer_root_init(PCIDevice *dev)
+{
+    uint16_t pos = pcie_aer_cap(dev);
+
+    pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
+                 PCI_ERR_ROOT_CMD_EN_MASK);
+    dev->exp->aer_errmsg = pcie_aer_errmsg_root_port;
+}
+
+void pcie_aer_root_reset(PCIDevice *dev)
+{
+    uint8_t* aer_cap = dev->config + pcie_aer_cap(dev);
+
+    pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
+
+    /*
+     * Advanced Error Interrupt Message Number in Root Error Status Register
+     * must be updated by chip dependent code.
+     */
+}
+
+static bool pcie_aer_root_does_trigger(uint32_t cmd, uint32_t sta)
+{
+    return
+        ((cmd & PCI_ERR_ROOT_CMD_COR_EN) && (sta & PCI_ERR_ROOT_COR_RCV)) ||
+        ((cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
+         (sta & PCI_ERR_ROOT_NONFATAL_RCV)) ||
+        ((cmd & PCI_ERR_ROOT_CMD_FATAL_EN) && (sta & PCI_ERR_ROOT_FATAL_RCV));
+}
+
+void pcie_aer_root_write_config(PCIDevice *dev,
+                                uint32_t addr, uint32_t val, int len,
+                                uint32_t root_cmd_prev)
+{
+    uint16_t pos = pcie_aer_cap(dev);
+    uint8_t *aer_cap = dev->config + pos;
+    uint32_t root_status;
+
+    if (ranges_overlap(addr, len, pos + PCI_ERR_ROOT_STATUS, 4)) {
+        /* RW1CS */
+        pcie_w1c_long(dev, pos + PCI_ERR_ROOT_STATUS,
+                      PCI_ERR_ROOT_STATUS_REPORT_MASK, addr, val);
+    }
+
+    /* root command */
+    if (ranges_overlap(addr, len, pos + PCI_ERR_ROOT_COMMAND, 4)) {
+        uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
+        if (root_cmd & PCI_ERR_ROOT_CMD_EN_MASK) {
+            bool trigger = false;
+            int level = 0;
+            uint32_t root_cmd_set = (root_cmd_prev ^ root_cmd) & root_cmd;
+
+            /* 0 -> 1 */
+            root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
+            if (pcie_aer_root_does_trigger(root_cmd_set, root_status)) {
+                trigger = true;
+            }
+            if (pcie_aer_root_does_trigger(root_cmd, root_status)) {
+                level = 1;
+            }
+            pcie_aer_root_notify(dev, trigger, level);
+        }
+    }
+}
+
+static const VMStateDescription vmstate_pcie_aer_err = {
+    .name = "PCIE_AER_ERROR",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields     = (VMStateField[]) {
+        VMSTATE_UINT32(status, struct pcie_aer_err),
+        VMSTATE_UINT16(source_id, struct pcie_aer_err),
+        VMSTATE_UINT16(flags, struct pcie_aer_err),
+        VMSTATE_UINT32_ARRAY(header, struct pcie_aer_err, 4),
+        VMSTATE_UINT32_ARRAY(prefix, struct pcie_aer_err, 4),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+#define VMSTATE_PCIE_AER_ERRS(_field, _state, _field_num, _vmsd, _type) { \
+    .name       = (stringify(_field)),                                    \
+    .version_id = 0,                                                      \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),     \
+    .size       = sizeof(_type),                                          \
+    .vmsd       = &(_vmsd),                                               \
+    .flags      = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT,           \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),          \
+}
+
+const VMStateDescription vmstate_pcie_aer_log = {
+    .name = "PCIE_AER_ERROR_LOG",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields     = (VMStateField[]) {
+        VMSTATE_UINT32(producer, struct pcie_aer_log),
+        VMSTATE_UINT32(consumer, struct pcie_aer_log),
+        VMSTATE_UINT16(log_max, struct pcie_aer_log),
+        VMSTATE_PCIE_AER_ERRS(log, struct pcie_aer_log, log_max,
+                              vmstate_pcie_aer_err, struct pcie_aer_err),
+        VMSTATE_END_OF_LIST()
+    }
+};
diff --git a/hw/pcie.h b/hw/pcie.h
new file mode 100644
index 0000000..07f42c6
--- /dev/null
+++ b/hw/pcie.h
@@ -0,0 +1,186 @@ 
+/*
+ * pcie.h
+ *
+ * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_PCIE_H
+#define QEMU_PCIE_H
+
+#include "hw.h"
+
+enum PCIExpressIndicator {
+    /* for attention and power indicator */
+    PCI_EXP_HP_IND_RESERVED     = 0b00,
+    PCI_EXP_HP_IND_ON           = 0b01,
+    PCI_EXP_HP_IND_BLINK        = 0b10,
+    PCI_EXP_HP_IND_OFF          = 0b11,
+};
+
+enum PCIExpressHotPlugEvent {
+    /* the bits match the bits in Slot Control/Status registers.
+     * PCI_EXP_HP_EV_xxx = PCI_EXP_SLTCTL_xxxE = PCI_EXP_SLTSTA_xxx
+     */
+    PCI_EXP_HP_EV_ABP   = 0b00001,      /* attention button preseed */
+    PCI_EXP_HP_EV_PDC   = 0b01000,      /* presence detect changed */
+    PCI_EXP_HP_EV_CCI   = 0b10000,      /* command completed */
+
+    PCI_EXP_HP_EV_SUPPORTED     = 0b11001,       /* supported event mask  */
+    /* events not listed aren't supported */
+};
+
+typedef void (*pcie_flr_fn)(PCIDevice *dev);
+
+struct pcie_aer_err_msg;
+enum AER_ERR_MSG_RESULT {
+    AER_ERR_MSG_MASKED,
+    AER_ERR_MSG_SENT,
+};
+typedef enum AER_ERR_MSG_RESULT AER_ERR_MSG_RESULT;
+typedef AER_ERR_MSG_RESULT (*pcie_aer_errmsg_fn)(PCIDevice *dev, const struct pcie_aer_err_msg *msg);
+
+struct PCIExpressDevice {
+    /* Offset of express capability in config space */
+    uint8_t exp_cap;
+
+    /* FLR */
+    pcie_flr_fn flr;
+
+    /* AER */
+    uint16_t aer_cap;
+    pcie_aer_errmsg_fn aer_errmsg;
+};
+
+struct pcie_aer_log {
+    uint32_t producer;
+    uint32_t consumer;
+
+#define PCIE_AER_LOG_MAX_DEFAULT        8
+#define PCIE_AER_LOG_MAX_MAX            128 /* what is appropriate? */
+#define PCIE_AER_LOG_MAX_UNSET          (~(uint16_t)0)
+    uint16_t log_max;
+
+    struct pcie_aer_err *log;
+};
+
+extern const VMStateDescription vmstate_pcie_aer_log;
+
+/* PCI express capability helper functions */
+int pci_pcie_cap_init(PCIDevice *dev,
+                      uint8_t offset, uint8_t type, uint8_t port);
+int pci_pcie_cap_exit(PCIDevice *dev);
+uint8_t pcie_cap_get_type(const PCIDevice *dev);
+void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector);
+uint8_t pcie_cap_flags_get_vector(PCIDevice *dev);
+
+void pcie_cap_deverr_init(PCIDevice *dev);
+void pcie_cap_deverr_reset(PCIDevice *dev);
+void pcie_cap_deverr_write_config(PCIDevice *dev,
+                                  uint32_t addr, uint32_t val, int len);
+
+void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot);
+void pcie_cap_slot_reset(PCIDevice *dev);
+void pcie_cap_slot_write_config(PCIDevice *dev,
+                                uint32_t addr, uint32_t val, int len,
+                                uint16_t sltctl_prev);
+void pcie_cap_slot_push_attention_button(PCIDevice *dev);
+
+void pcie_cap_root_init(PCIDevice *dev);
+void pcie_cap_root_reset(PCIDevice *dev);
+
+void pcie_cap_flr_init(PCIDevice *dev, pcie_flr_fn flr);
+void pcie_cap_flr_write_config(PCIDevice *dev,
+                           uint32_t addr, uint32_t val, int len);
+
+void pcie_cap_ari_init(PCIDevice *dev);
+void pcie_cap_ari_reset(PCIDevice *dev);
+bool pcie_cap_is_ari_enabled(const PCIDevice *dev);
+
+/* PCI express extended capability helper functions */
+uint16_t pcie_find_ext_capability(PCIDevice *dev, uint16_t cap_id);
+int pcie_add_ext_capability(PCIDevice *dev,
+                            uint16_t cap_id, uint8_t cap_ver, uint16_t size);
+int pcie_append_ext_capability(PCIDevice *dev,
+                               uint16_t cap_id, uint8_t cap_ver,
+                               uint16_t offset, uint16_t size);
+void pcie_del_ext_capability(PCIDevice *dev, uint16_t cap_id, uint16_t size);
+void pcie_reserve_ext_capability(PCIDevice *dev,
+                                 uint16_t offset, uint16_t size);
+
+int pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
+
+/* PCI express extended capabilities */
+
+/* AER */
+/* aer error severity */
+enum PCIE_AER_SEVERITY {
+    /* those value are same as
+     * Root error command register in aer extended cap and
+     * root control register in pci express cap.
+     */
+    AER_ERR_COR         = 0x1,
+    AER_ERR_NONFATAL    = 0x2,
+    AER_ERR_FATAL       = 0x4,
+};
+
+/* aer error message: error signaling message has only error sevirity and
+   source id. See 2.2.8.3 error signaling messages */
+struct pcie_aer_err_msg {
+    enum PCIE_AER_SEVERITY severity;
+    uint16_t source_id; /* bdf */
+};
+
+static inline bool
+pcie_aer_err_msg_is_uncor(const struct pcie_aer_err_msg *msg)
+{
+    return msg->severity == AER_ERR_NONFATAL || msg->severity == AER_ERR_FATAL;
+}
+
+/* error */
+struct pcie_aer_err {
+    uint32_t status;    /* error status bits */
+    uint16_t source_id; /* bdf */
+
+#define PCIE_AER_ERR_IS_CORRECTABLE     0x1     /* correctable/uncorrectable */
+#define PCIE_AER_ERR_MAYBE_ADVISORY     0x2     /* maybe advisory non-fatal */
+#define PCIE_AER_ERR_HEADER_VALID       0x4     /* TLP header is logged */
+#define PCIE_AER_ERR_TLP_PRESENT        0x8     /* TLP Prefix is logged */
+    uint16_t flags;
+
+    uint32_t header[4]; /* TLP header */
+    uint32_t prefix[4]; /* TLP header prefix */
+};
+
+int pcie_aer_init(PCIDevice *dev, uint16_t offset);
+void pcie_aer_exit(PCIDevice *dev);
+void pcie_aer_write_config(PCIDevice *dev,
+                           uint32_t addr, uint32_t val, int len);
+void pcie_aer_write_config_vbridge(PCIDevice *dev,
+                                   uint32_t addr, uint32_t val, int len);
+
+/* aer root port */
+void pcie_aer_root_set_vector(PCIDevice *dev, uint8_t vector);
+void pcie_aer_root_init(PCIDevice *dev);
+void pcie_aer_root_reset(PCIDevice *dev);
+void pcie_aer_root_write_config(PCIDevice *dev,
+                                uint32_t addr, uint32_t val, int len,
+                                uint32_t root_cmd_prev);
+
+/* error injection */
+void pcie_aer_inject_error(PCIDevice *dev, const struct pcie_aer_err *err);
+
+#endif /* QEMU_PCIE_H */
diff --git a/qemu-common.h b/qemu-common.h
index d735235..6d9ee26 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -219,6 +219,7 @@  typedef struct PCIHostState PCIHostState;
 typedef struct PCIExpressHost PCIExpressHost;
 typedef struct PCIBus PCIBus;
 typedef struct PCIDevice PCIDevice;
+typedef struct PCIExpressDevice PCIExpressDevice;
 typedef struct PCIBridge PCIBridge;
 typedef struct SerialState SerialState;
 typedef struct IRQState *qemu_irq;