Patchwork [v4,09/15] pcie/aer: helper functions for pcie aer capability

login
register
mail settings
Submitter Isaku Yamahata
Date Oct. 18, 2010, 3:17 a.m.
Message ID <b42a90ad6c61f104f2a874e238e439d78885d5c4.1287371107.git.yamahata@valinux.co.jp>
Download mbox | patch
Permalink /patch/68112/
State New
Headers show

Comments

Isaku Yamahata - Oct. 18, 2010, 3:17 a.m.
This patch implements helper functions for pcie aer capability
which will be used later.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
Changes v3 -> v4:
- various naming fixes.
- use pci bit operation helper function
- eliminate errmsg function pointer
- replace pci_shift_xxx() with PCIDevice::written
- uncorrect error status register.
- dropped pcie_aer_cap()

Changes v2 -> v3:
- split out from pcie.[ch] to pcie_aer.[ch] to make the files sorter.
- embeded PCIExpressDevice into PCIDevice.
- CodingStyle fix
---
 Makefile.objs |    2 +-
 hw/pcie.h     |    5 +
 hw/pcie_aer.c |  789 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pcie_aer.h |  105 ++++++++
 qemu-common.h |    3 +
 5 files changed, 903 insertions(+), 1 deletions(-)
 create mode 100644 hw/pcie_aer.c
 create mode 100644 hw/pcie_aer.h
Michael S. Tsirkin - Oct. 18, 2010, 5:45 a.m.
On Mon, Oct 18, 2010 at 12:17:50PM +0900, Isaku Yamahata wrote:
> This patch implements helper functions for pcie aer capability
> which will be used later.
> 
> Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
> ---
> Changes v3 -> v4:
> - various naming fixes.
> - use pci bit operation helper function
> - eliminate errmsg function pointer
> - replace pci_shift_xxx() with PCIDevice::written
> - uncorrect error status register.
> - dropped pcie_aer_cap()
> 
> Changes v2 -> v3:
> - split out from pcie.[ch] to pcie_aer.[ch] to make the files sorter.
> - embeded PCIExpressDevice into PCIDevice.
> - CodingStyle fix
> ---
>  Makefile.objs |    2 +-
>  hw/pcie.h     |    5 +
>  hw/pcie_aer.c |  789 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/pcie_aer.h |  105 ++++++++
>  qemu-common.h |    3 +
>  5 files changed, 903 insertions(+), 1 deletions(-)
>  create mode 100644 hw/pcie_aer.c
>  create mode 100644 hw/pcie_aer.h
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index eeb5134..68bcc48 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -186,7 +186,7 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o
>  # PCI watchdog devices
>  hw-obj-y += wdt_i6300esb.o
>  
> -hw-obj-y += pcie.o
> +hw-obj-y += pcie.o pcie_aer.o
>  hw-obj-y += msix.o msi.o
>  
>  # PCI network cards
> diff --git a/hw/pcie.h b/hw/pcie.h
> index 603bb8b..891e369 100644
> --- a/hw/pcie.h
> +++ b/hw/pcie.h
> @@ -24,6 +24,7 @@
>  #include "hw.h"
>  #include "pci_regs.h"
>  #include "pcie_regs.h"
> +#include "pcie_aer.h"
>  
>  typedef enum {
>      /* for attention and power indicator */
> @@ -66,6 +67,10 @@ struct PCIExpressDevice {
>  
>      /* FLR */
>      pcie_flr_fn flr;
> +
> +    /* AER */
> +    uint16_t aer_cap;
> +    PCIEAERLog aer_log;
>  };
>  
>  /* PCI express capability helper functions */
> diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c
> new file mode 100644
> index 0000000..c86f2b9
> --- /dev/null
> +++ b/hw/pcie_aer.c
> @@ -0,0 +1,789 @@
> +/*
> + * pcie_aer.c
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "sysemu.h"
> +#include "pci_bridge.h"
> +#include "pcie.h"
> +#include "msix.h"
> +#include "msi.h"
> +#include "pci_internals.h"
> +#include "pcie_regs.h"
> +
> +//#define DEBUG_PCIE
> +#ifdef DEBUG_PCIE
> +# define PCIE_DPRINTF(fmt, ...)                                         \
> +    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
> +#else
> +# define PCIE_DPRINTF(fmt, ...) do {} while (0)
> +#endif
> +#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
> +    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
> +
> +static void pcie_aer_clear_error(PCIDevice *dev);
> +static uint8_t pcie_aer_root_get_vector(PCIDevice *dev);
> +static AERMsgResult
> +pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg);
> +static AERMsgResult
> +pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg);
> +static AERMsgResult
> +pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg);
> +
> +/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
> +static PCIEAERSeverity pcie_aer_uncor_default_severity(uint32_t status)
> +{
> +    switch (status) {
> +    case PCI_ERR_UNC_INTN:
> +    case PCI_ERR_UNC_DLP:
> +    case PCI_ERR_UNC_SDN:
> +    case PCI_ERR_UNC_RX_OVER:
> +    case PCI_ERR_UNC_FCP:
> +    case PCI_ERR_UNC_MALF_TLP:
> +        return AER_ERR_FATAL;
> +    case PCI_ERR_UNC_POISON_TLP:
> +    case PCI_ERR_UNC_ECRC:
> +    case PCI_ERR_UNC_UNSUP:
> +    case PCI_ERR_UNC_COMP_TIME:
> +    case PCI_ERR_UNC_COMP_ABORT:
> +    case PCI_ERR_UNC_UNX_COMP:
> +    case PCI_ERR_UNC_ACSV:
> +    case PCI_ERR_UNC_MCBTLP:
> +    case PCI_ERR_UNC_ATOP_EBLOCKED:
> +    case PCI_ERR_UNC_TLP_PRF_BLOCKED:
> +        return AER_ERR_NONFATAL;
> +    default:
> +        break;
> +    }
> +    abort();
> +    return AER_ERR_FATAL;
> +}
> +
> +static uint32_t aer_log_next(uint32_t i, uint32_t max)
> +{
> +    return (i + 1) % max;
> +}
> +
> +static bool aer_log_empty_index(uint32_t producer, uint32_t consumer)
> +{
> +    return producer == consumer;
> +}
> +
> +static bool aer_log_empty(PCIEAERLog *aer_log)
> +{
> +    return aer_log_empty_index(aer_log->producer, aer_log->consumer);
> +}
> +
> +static bool aer_log_full(PCIEAERLog *aer_log)
> +{
> +    return aer_log_next(aer_log->producer, aer_log->log_max) ==
> +        aer_log->consumer;
> +}
> +
> +static uint32_t aer_log_add(PCIEAERLog *aer_log)
> +{
> +    uint32_t i = aer_log->producer;
> +    aer_log->producer = aer_log_next(aer_log->producer, aer_log->log_max);
> +    return i;
> +}
> +
> +static uint32_t aer_log_del(PCIEAERLog *aer_log)
> +{
> +    uint32_t i = aer_log->consumer;
> +    aer_log->consumer = aer_log_next(aer_log->consumer, aer_log->log_max);
> +    return i;
> +}
> +
> +static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
> +{
> +    uint32_t i;
> +    if (aer_log_full(aer_log)) {
> +        return -1;
> +    }
> +    i = aer_log_add(aer_log);
> +    memcpy(&aer_log->log[i], err, sizeof(*err));
> +    return 0;
> +}
> +
> +static const PCIEAERErr* aer_log_del_err(PCIEAERLog *aer_log)
> +{
> +    uint32_t i;
> +    assert(!aer_log_empty(aer_log));
> +    i = aer_log_del(aer_log);
> +    return &aer_log->log[i];
> +}
> +
> +static void aer_log_clear_all_err(PCIEAERLog *aer_log)
> +{
> +    aer_log->producer = 0;
> +    aer_log->consumer = 0;
> +}
> +
> +void pcie_aer_init(PCIDevice *dev, uint16_t offset)
> +{
> +    PCIExpressDevice *exp;
> +
> +    pci_set_bit_word(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
> +    pci_set_bit_word(dev->w1cmask + PCI_STATUS, PCI_STATUS_SIG_SYSTEM_ERROR);
> +
> +    pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
> +                        offset, PCI_ERR_SIZEOF);
> +    exp = &dev->exp;
> +    exp->aer_cap = offset;
> +    if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
> +        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
> +    }
> +    if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_MAX) {
> +        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_MAX;
> +    }
> +    dev->exp.aer_log.log = qemu_mallocz(sizeof(dev->exp.aer_log.log[0]) *
> +                                        dev->exp.aer_log.log_max);
> +
> +    /* On reset PCI_ERR_CAP_MHRE is disabled
> +     * PCI_ERR_CAP_MHRE is RWS so that reset doesn't affect related
> +     * registers
> +     */
> +    pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
> +                 PCI_ERR_UNC_SUPPORTED);
> +
> +    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
> +                 PCI_ERR_UNC_SUPPORTED);
> +
> +    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
> +                 PCI_ERR_UNC_SEVERITY_DEFAULT);
> +    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
> +                 PCI_ERR_UNC_SUPPORTED);
> +
> +    pci_set_bit_long(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
> +                     PCI_ERR_COR_STATUS);
> +
> +    pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
> +                 PCI_ERR_COR_MASK_DEFAULT);
> +    pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
> +                 PCI_ERR_COR_SUPPORTED);
> +
> +    /* capabilities and control. multiple header logging is supported */
> +    if (dev->exp.aer_log.log_max > 0) {
> +        pci_set_long(dev->config + offset + PCI_ERR_CAP,
> +                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
> +                     PCI_ERR_CAP_MHRC);
> +        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
> +                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
> +                     PCI_ERR_CAP_MHRE);
> +    } else {
> +        pci_set_long(dev->config + offset + PCI_ERR_CAP,
> +                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
> +        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
> +                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
> +    }
> +
> +    switch (pcie_cap_get_type(dev)) {
> +    case PCI_EXP_TYPE_ROOT_PORT:
> +        /* this case will be set by pcie_aer_root_init() */
> +        /* fallthrough */
> +    case PCI_EXP_TYPE_DOWNSTREAM:
> +    case PCI_EXP_TYPE_UPSTREAM:
> +        pci_set_bit_word(dev->wmask + PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_SERR);
> +        pci_set_bit_long(dev->w1cmask + PCI_STATUS,
> +                         PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
> +        break;
> +    default:
> +        /* nothing */
> +        break;
> +    }
> +}
> +
> +void pcie_aer_exit(PCIDevice *dev)
> +{
> +    qemu_free(dev->exp.aer_log.log);
> +}
> +
> +void pcie_aer_write_config(PCIDevice *dev,
> +                           uint32_t addr, uint32_t val, int len,
> +                           uint32_t uncorsta_old)
> +{
> +    uint32_t pos = dev->exp.aer_cap;
> +
> +    /* uncorrectable */
> +    uint32_t written =
> +        pci_get_long(dev->written + pos + PCI_ERR_UNCOR_STATUS) &
> +        PCI_ERR_UNC_SUPPORTED;
> +        /* if uncorrectable status register isn't touched, written == 0*/
> +    uint32_t errcap = pci_get_long(dev->config + pos + PCI_ERR_CAP);
> +    uint32_t first_error = (1U << PCI_ERR_CAP_FEP(errcap));
> +    if ((uncorsta_old & first_error) && (written & first_error)) {
> +        pcie_aer_clear_error(dev);
> +    }
> +
> +    /* capability & control */
> +    if (ranges_overlap(addr, len, pos + PCI_ERR_CAP, 4)) {
> +        uint32_t err_cap = pci_get_long(dev->config + pos + PCI_ERR_CAP);
> +        if (!(err_cap & PCI_ERR_CAP_MHRE)) {
> +            aer_log_clear_all_err(&dev->exp.aer_log);
> +        }
> +    }
> +}
> +
> +static inline void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
> +{
> +    uint8_t type;
> +    AERMsgResult result;
> +
> +    assert(pci_is_express(dev));
> +
> +    type = pcie_cap_get_type(dev);
> +    if (type == PCI_EXP_TYPE_ROOT_PORT ||
> +        type == PCI_EXP_TYPE_UPSTREAM ||
> +        type == PCI_EXP_TYPE_DOWNSTREAM) {
> +        result = pcie_aer_msg_vbridge(dev, msg);
> +        if (result != AER_MSG_SENT) {
> +            return;
> +        }
> +    }
> +    result = pcie_aer_msg_alldev(dev, msg);
> +    if (type == PCI_EXP_TYPE_ROOT_PORT && result == AER_MSG_SENT) {
> +        pcie_aer_msg_root_port(dev, msg);
> +    }
> +}
> +
> +static AERMsgResult
> +pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
> +{
> +    uint16_t cmd = pci_get_word(dev->config + PCI_COMMAND);
> +    bool transmit1 =
> +        pcie_aer_msg_is_uncor(msg) && (cmd & PCI_COMMAND_SERR);
> +    uint32_t devctl = pci_get_word(dev->config +
> +                                   dev->exp.exp_cap + PCI_EXP_DEVCTL);
> +    bool transmit2 = msg->severity & devctl;
> +    PCIDevice *parent_port;
> +
> +    if (transmit1) {
> +        if (pcie_aer_msg_is_uncor(msg)) {
> +            /* Signaled System Error */
> +            pci_set_bit_word(dev->config + PCI_STATUS,
> +                             PCI_STATUS_SIG_SYSTEM_ERROR);
> +        }
> +    }
> +
> +    if (!(transmit1 || transmit2)) {
> +        return AER_MSG_MASKED;
> +    }
> +
> +    /* send up error message */
> +    if (pci_is_express(dev) &&
> +        pcie_cap_get_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
> +        /* Root port notify system itself,
> +           or send the error message to root complex event collector. */
> +        /*
> +         * if root port is associated to event collector, set
> +         * parent_port = root complex event collector
> +         * For now root complex event collector isn't supported.
> +         */
> +        parent_port = NULL;
> +    } else {
> +        parent_port = pci_bridge_get_device(dev->bus);
> +    }
> +    if (parent_port) {
> +        if (!pci_is_express(parent_port)) {
> +            /* What to do? */
> +            return AER_MSG_MASKED;
> +        }
> +        pcie_aer_msg(parent_port, msg);
> +    }
> +    return AER_MSG_SENT;
> +}
> +
> +static AERMsgResult
> +pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
> +{
> +    uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
> +
> +    if (pcie_aer_msg_is_uncor(msg)) {
> +        /* Received System Error */
> +        pci_set_bit_word(dev->config + PCI_SEC_STATUS,
> +                         PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
> +    }
> +
> +    if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
> +        return AER_MSG_MASKED;
> +    }
> +    return AER_MSG_SENT;
> +}
> +
> +static void pcie_aer_root_port_notify(PCIDevice *dev,
> +                                      bool msi_trigger, int int_level)
> +{
> +    unsigned int msi_vector = pcie_aer_root_get_vector(dev);
> +    unsigned int int_irq = 0; /* INTA# is chosen for aer notification */
> +
> +    if (msix_enabled(dev)) {
> +        if (msi_trigger) {
> +            msix_notify(dev, msi_vector);
> +        }
> +    } else if (msi_enabled(dev)) {
> +        if (msi_trigger) {
> +            msi_notify(dev, msi_vector);
> +        }
> +    } else {
> +        qemu_set_irq(dev->irq[int_irq], int_level);
> +    }
> +}
> +
> +static AERMsgResult
> +pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
> +{
> +    AERMsgResult ret;
> +    uint16_t cmd;
> +    uint8_t *aer_cap;
> +    uint32_t root_cmd;
> +    uint32_t root_sta;
> +    bool msi_trigger;
> +
> +    ret = AER_MSG_MASKED;
> +    cmd = pci_get_word(dev->config + PCI_COMMAND);
> +    aer_cap = dev->config + dev->exp.aer_cap;
> +    root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
> +    root_sta = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
> +    msi_trigger = false;
> +
> +    if (cmd & PCI_COMMAND_SERR) {
> +        /* System Error. Platform Specific */
> +        /* ret = AER_MSG_SENT; */
> +    }
> +
> +    /* Errro Message Received: Root Error Status register */
> +    switch (msg->severity) {
> +    case AER_ERR_COR:
> +        if (root_sta & PCI_ERR_ROOT_COR_RCV) {
> +            root_sta |= PCI_ERR_ROOT_MULTI_COR_RCV;
> +        } else {
> +            if (root_cmd & PCI_ERR_ROOT_CMD_COR_EN) {
> +                msi_trigger = true;
> +            }
> +            pci_set_word(aer_cap + PCI_ERR_ROOT_COR_SRC, msg->source_id);
> +        }
> +        root_sta |= PCI_ERR_ROOT_COR_RCV;
> +        break;
> +    case AER_ERR_NONFATAL:
> +        if (!(root_sta & PCI_ERR_ROOT_NONFATAL_RCV) &&
> +            root_cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) {
> +            msi_trigger = true;
> +        }
> +        root_sta |= PCI_ERR_ROOT_NONFATAL_RCV;
> +        break;
> +    case AER_ERR_FATAL:
> +        if (!(root_sta & PCI_ERR_ROOT_FATAL_RCV) &&
> +            root_cmd & PCI_ERR_ROOT_CMD_FATAL_EN) {
> +            msi_trigger = true;
> +        }
> +        if (!(root_sta & PCI_ERR_ROOT_UNCOR_RCV)) {
> +            root_sta |= PCI_ERR_ROOT_FIRST_FATAL;
> +        }
> +        root_sta |= PCI_ERR_ROOT_FATAL_RCV;
> +        break;
> +    }
> +    if (pcie_aer_msg_is_uncor(msg)) {
> +        if (root_sta & PCI_ERR_ROOT_UNCOR_RCV) {
> +            root_sta |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
> +        } else {
> +            pci_set_word(aer_cap + PCI_ERR_ROOT_SRC, msg->source_id);
> +        }
> +        root_sta |= PCI_ERR_ROOT_UNCOR_RCV;
> +    }
> +    pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_sta);
> +
> +    if (root_cmd & msg->severity) {
> +        /* 6.2.4.1.2 Interrupt Generation */
> +        pcie_aer_root_port_notify(dev, msi_trigger, 1);
> +        ret = AER_MSG_SENT;
> +    }
> +    return ret;
> +}
> +
> +static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint8_t first_bit = ffsl(err->status) - 1;
> +    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
> +    int i;
> +    uint32_t dw;
> +
> +    errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
> +    errcap |= PCI_ERR_CAP_FEP(first_bit);
> +
> +    if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
> +        for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
> +            /* 7.10.8 Header Log Register */
> +            cpu_to_be32wu(&dw, err->header[i]);
> +            memcpy(aer_cap + PCI_ERR_HEADER_LOG + sizeof(err->header[0]) * i,
> +                   &dw, sizeof(dw));
> +        }
> +    } else {
> +        assert(!(err->flags & PCIE_AER_ERR_TLP_PRESENT));
> +        memset(aer_cap + PCI_ERR_HEADER_LOG, 0, sizeof(err->header));
> +    }
> +
> +    if ((err->flags & PCIE_AER_ERR_TLP_PRESENT) &&
> +        (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2) &
> +         PCI_EXP_DEVCAP2_EETLPP)) {
> +        for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
> +            /* 7.10.12 tlp prefix log register */
> +            cpu_to_be32wu(&dw, err->prefix[i]);
> +            memcpy(aer_cap + PCI_ERR_TLP_PREFIX_LOG +
> +                   sizeof(err->prefix[0]) * i, &dw, sizeof(dw));
> +        }
> +        errcap |= PCI_ERR_CAP_TLP;
> +    } else {
> +        memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, sizeof(err->prefix));
> +    }
> +    pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
> +}
> +
> +static void pcie_aer_clear_log(PCIDevice *dev)
> +{
> +    PCIEAERErr *err;
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +
> +    pci_clear_bit_long(aer_cap + PCI_ERR_CAP,
> +                       PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
> +
> +    memset(aer_cap + PCI_ERR_HEADER_LOG, 0, sizeof(err->header));
> +    memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, sizeof(err->prefix));
> +}
> +
> +static int pcie_aer_record_error(PCIDevice *dev,
> +                                 const PCIEAERErr *err)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
> +    int fep = PCI_ERR_CAP_FEP(errcap);
> +
> +    if (errcap & PCI_ERR_CAP_MHRE &&
> +        (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1ULL << fep))) {
> +        /*  Not first error. queue error */
> +        if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
> +            /* overflow */
> +            return -1;
> +        }
> +        return 0;
> +    }
> +
> +    pcie_aer_update_log(dev, err);
> +    return 0;
> +}
> +
> +static void pcie_aer_clear_error(PCIDevice *dev)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
> +    uint32_t old_err = (1U << PCI_ERR_CAP_FEP(errcap));
> +    PCIEAERLog *aer_log = &dev->exp.aer_log;
> +    const PCIEAERErr *err;
> +    uint32_t consumer;
> +
> +    if (!(errcap & PCI_ERR_CAP_MHRE) || aer_log_empty(aer_log)) {
> +        pcie_aer_clear_log(dev);
> +        pci_clear_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS, old_err);
> +        return;
> +    }
> +
> +    /*
> +     * If more errors are queued, set corresponding bits in uncorrectable
> +     * error status.
> +     * We emulates uncorrectable error status register as W1CS.
> +     * So set bit in uncorrectable error status here again for multiple
> +     * error recording support.
> +     *
> +     * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
> +     */
> +    for (consumer = dev->exp.aer_log.consumer;
> +         !aer_log_empty_index(dev->exp.aer_log.producer, consumer);
> +         consumer = aer_log_next(consumer, dev->exp.aer_log.log_max)) {
> +        pci_set_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS,
> +                         dev->exp.aer_log.log[consumer].status);
> +    }
> +
> +    err = aer_log_del_err(aer_log);
> +    pcie_aer_update_log(dev, err);
> +}
> +
> +/*
> + * non-Function specific error must be recorded in all functions.
> + * It is the responsibility of the caller of this function.
> + * It is also caller's responsiblity to determine which function should
> + * report the rerror.
> + *
> + * 6.2.4 Error Logging
> + * 6.2.5 Sqeucne of Device Error Signaling and Logging Operations
> + * table 6-2: Flowchard Showing Sequence of Device Error Signaling and Logging
> + *            Operations
> + *
> + * Although this implementation can be shortened/optimized, this is kept
> + * parallel to table 6-2.
> + */
> +void pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
> +{
> +    uint8_t *exp_cap;
> +    uint8_t *aer_cap = NULL;
> +    uint32_t devctl = 0;
> +    uint32_t devsta = 0;
> +    uint32_t status = err->status;
> +    uint32_t mask;
> +    bool is_unsupported_request =
> +        (!(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
> +         err->status == PCI_ERR_UNC_UNSUP);
> +    bool is_advisory_nonfatal = false;  /* for advisory non-fatal error */
> +    uint32_t uncor_status = 0;          /* for advisory non-fatal error */
> +    PCIEAERMsg msg;
> +    int is_header_log_overflowed = 0;
> +
> +    if (!pci_is_express(dev)) {
> +        /* What to do? */
> +        return;
> +    }
> +
> +    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
> +        status &= PCI_ERR_COR_SUPPORTED;
> +    } else {
> +        status &= PCI_ERR_UNC_SUPPORTED;
> +    }
> +    if (!status || status & (status - 1)) {
> +        /* invalid status bit. one and only one bit must be set */
> +        return;
> +    }
> +
> +    exp_cap = dev->config + dev->exp.exp_cap;
> +    if (dev->exp.aer_cap) {
> +        aer_cap = dev->config + dev->exp.aer_cap;
> +        devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
> +        devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
> +    }
> +    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
> +    correctable_error:
> +        devsta |= PCI_EXP_DEVSTA_CED;
> +        if (is_unsupported_request) {
> +            devsta |= PCI_EXP_DEVSTA_URD;
> +        }
> +        pci_set_word(exp_cap + PCI_EXP_DEVSTA, devsta);
> +
> +        if (aer_cap) {
> +            pci_set_bit_long(aer_cap + PCI_ERR_COR_STATUS, status);
> +            mask = pci_get_long(aer_cap + PCI_ERR_COR_MASK);
> +            if (mask & status) {
> +                return;
> +            }
> +            if (is_advisory_nonfatal) {
> +                uint32_t uncor_mask =
> +                    pci_get_long(aer_cap + PCI_ERR_UNCOR_MASK);
> +                if (!(uncor_mask & uncor_status)) {
> +                    is_header_log_overflowed = pcie_aer_record_error(dev, err);
> +                }
> +                pci_set_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS, uncor_status);
> +            }
> +        }
> +
> +        if (is_unsupported_request && !(devctl & PCI_EXP_DEVCTL_URRE)) {
> +            return;
> +        }
> +        if (!(devctl & PCI_EXP_DEVCTL_CERE)) {
> +            return;
> +        }
> +        msg.severity = AER_ERR_COR;
> +    } else {
> +        bool is_fatal =
> +            (pcie_aer_uncor_default_severity(status) == AER_ERR_FATAL);
> +        uint16_t cmd;
> +
> +        if (aer_cap) {
> +            is_fatal = status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
> +        }
> +        if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
> +            is_advisory_nonfatal = true;
> +            uncor_status = status;
> +            status = PCI_ERR_COR_ADV_NONFATAL;
> +            goto correctable_error;
> +        }
> +        if (is_fatal) {
> +            devsta |= PCI_EXP_DEVSTA_FED;
> +        } else {
> +            devsta |= PCI_EXP_DEVSTA_NFED;
> +        }
> +        if (is_unsupported_request) {
> +            devsta |= PCI_EXP_DEVSTA_URD;
> +        }
> +        pci_set_long(exp_cap + PCI_EXP_DEVSTA, devsta);
> +
> +        if (aer_cap) {
> +            mask = pci_get_long(aer_cap + PCI_ERR_UNCOR_MASK);
> +            if (mask & status) {
> +                pci_set_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS, status);
> +                return;
> +            }
> +
> +            is_header_log_overflowed = pcie_aer_record_error(dev, err);
> +            pci_set_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS, status);
> +        }
> +
> +        cmd = pci_get_word(dev->config + PCI_COMMAND);
> +        if (is_unsupported_request &&
> +            !(devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
> +            return;
> +        }
> +        if (is_fatal) {
> +            if (!((cmd & PCI_COMMAND_SERR) ||
> +                  (devctl & PCI_EXP_DEVCTL_FERE))) {
> +                return;
> +            }
> +            msg.severity = AER_ERR_FATAL;
> +        } else {
> +            if (!((cmd & PCI_COMMAND_SERR) ||
> +                  (devctl & PCI_EXP_DEVCTL_NFERE))) {
> +                return;
> +            }
> +            msg.severity = AER_ERR_NONFATAL;
> +        }
> +    }
> +
> +    /* send up error message */
> +    msg.source_id = err->source_id;
> +    pcie_aer_msg(dev, &msg);
> +
> +    if (is_header_log_overflowed) {
> +        PCIEAERErr header_log_overflow = {
> +            .status = PCI_ERR_COR_HL_OVERFLOW,
> +            .flags = PCIE_AER_ERR_IS_CORRECTABLE,
> +            .header = {0, 0, 0, 0},
> +            .prefix = {0, 0, 0, 0},
> +        };
> +        pcie_aer_inject_error(dev, &header_log_overflow);
> +    }
> +}
> +
> +void pcie_aer_root_set_vector(PCIDevice *dev, uint8_t vector)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    assert(vector < PCI_ERR_ROOT_IRQ_MAX);
> +    pci_clear_set_bit_long(aer_cap + PCI_ERR_ROOT_STATUS,
> +                           PCI_ERR_ROOT_IRQ,
> +                           ((uint32_t)vector) << PCI_ERR_ROOT_IRQ_SHIFT);

So this is an example why clear_set is not a good API.
Does this clear PCI_ERR_ROOT_IRQ and set vector?
clear vector and set PCI_ERR_ROOT_IRQ?
One can not tell.
Using set and clear separately will make it obvious.


> +}
> +
> +static uint8_t pcie_aer_root_get_vector(PCIDevice *dev)
> +{
> +    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
> +    return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
> +}
> +
> +void pcie_aer_root_init(PCIDevice *dev)
> +{
> +    uint16_t pos = dev->exp.aer_cap;
> +
> +    pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
> +                 PCI_ERR_ROOT_CMD_EN_MASK);
> +    pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
> +                 PCI_ERR_ROOT_STATUS_REPORT_MASK);
> +}
> +
> +void pcie_aer_root_reset(PCIDevice *dev)
> +{
> +    uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
> +
> +    pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
> +
> +    /*
> +     * Advanced Error Interrupt Message Number in Root Error Status Register
> +     * must be updated by chip dependent code because it's chip dependent
> +     * which number is used.
> +     */
> +}
> +
> +static bool pcie_aer_root_does_trigger(uint32_t cmd, uint32_t status)
> +{
> +    return
> +        ((cmd & PCI_ERR_ROOT_CMD_COR_EN) && (status & PCI_ERR_ROOT_COR_RCV)) ||
> +        ((cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
> +         (status & PCI_ERR_ROOT_NONFATAL_RCV)) ||
> +        ((cmd & PCI_ERR_ROOT_CMD_FATAL_EN) &&
> +         (status & PCI_ERR_ROOT_FATAL_RCV));
> +}
> +
> +void pcie_aer_root_write_config(PCIDevice *dev,
> +                                uint32_t addr, uint32_t val, int len,
> +                                uint32_t root_cmd_prev)
> +{
> +    uint16_t pos = dev->exp.aer_cap;
> +    uint8_t *aer_cap = dev->config + pos;
> +
> +    /* root command register */
> +    uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
> +    if (root_cmd & PCI_ERR_ROOT_CMD_EN_MASK) {
> +        /* 6.2.4.1.2 Interrupt Generation */
> +
> +        /* 0 -> 1 */
> +        uint32_t root_cmd_set = (root_cmd_prev ^ root_cmd) & root_cmd;
> +        uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
> +
> +        bool msi_trigger = pcie_aer_root_does_trigger(root_cmd_set,
> +                                                      root_status);
> +        int int_level = pcie_aer_root_does_trigger(root_cmd, root_status);
> +
> +        pcie_aer_root_port_notify(dev, msi_trigger, int_level);
> +    }
> +}
> +
> +static const VMStateDescription vmstate_pcie_aer_err = {
> +    .name = "PCIE_AER_ERROR",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .minimum_version_id_old = 1,
> +    .fields     = (VMStateField[]) {
> +        VMSTATE_UINT32(status, PCIEAERErr),
> +        VMSTATE_UINT16(source_id, PCIEAERErr),
> +        VMSTATE_UINT16(flags, PCIEAERErr),
> +        VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
> +        VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +#define VMSTATE_PCIE_AER_ERRS(_field, _state, _field_num, _vmsd, _type) { \
> +    .name       = (stringify(_field)),                                    \
> +    .version_id = 0,                                                      \
> +    .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),     \
> +    .size       = sizeof(_type),                                          \
> +    .vmsd       = &(_vmsd),                                               \
> +    .flags      = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT,           \
> +    .offset     = vmstate_offset_pointer(_state, _field, _type),          \
> +}
> +
> +const VMStateDescription vmstate_pcie_aer_log = {
> +    .name = "PCIE_AER_ERROR_LOG",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .minimum_version_id_old = 1,
> +    .fields     = (VMStateField[]) {
> +        VMSTATE_UINT32(producer, PCIEAERLog),
> +        VMSTATE_UINT32(consumer, PCIEAERLog),
> +        VMSTATE_UINT16(log_max, PCIEAERLog),
> +        VMSTATE_PCIE_AER_ERRS(log, PCIEAERLog, log_max,
> +                              vmstate_pcie_aer_err, PCIEAERErr),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> diff --git a/hw/pcie_aer.h b/hw/pcie_aer.h
> new file mode 100644
> index 0000000..7d1540b
> --- /dev/null
> +++ b/hw/pcie_aer.h
> @@ -0,0 +1,105 @@
> +/*
> + * pcie_aer.h
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef QEMU_PCIE_AER_H
> +#define QEMU_PCIE_AER_H
> +
> +#include "hw.h"
> +
> +/* definitions which PCIExpressDevice uses */
> +typedef enum {
> +    AER_MSG_MASKED,
> +    AER_MSG_SENT,
> +} AERMsgResult;
> +
> +/* AER log */
> +struct PCIEAERLog {
> +    /* This structure is saved/loaded.
> +       So explicitly size them instead of unsigned int */
> +    uint32_t producer;
> +    uint32_t consumer;
> +
> +#define PCIE_AER_LOG_MAX_DEFAULT        8
> +#define PCIE_AER_LOG_MAX_MAX            128 /* what is appropriate? */
> +#define PCIE_AER_LOG_MAX_UNSET          0xffff
> +    uint16_t log_max;
> +
> +    PCIEAERErr *log;    /* ringed buffer */
> +};
> +
> +/* aer error severity */
> +typedef enum {
> +    /* those value are same as
> +     * Root error command register in aer extended cap and
> +     * root control register in pci express cap.
> +     */
> +    AER_ERR_COR         = 0x1,
> +    AER_ERR_NONFATAL    = 0x2,
> +    AER_ERR_FATAL       = 0x4,
> +} PCIEAERSeverity;
> +
> +/* aer error message: error signaling message has only error sevirity and
> +   source id. See 2.2.8.3 error signaling messages */
> +struct PCIEAERMsg {
> +    PCIEAERSeverity severity;
> +    uint16_t source_id; /* bdf */
> +};
> +
> +static inline bool
> +pcie_aer_msg_is_uncor(const PCIEAERMsg *msg)
> +{
> +    return msg->severity == AER_ERR_NONFATAL || msg->severity == AER_ERR_FATAL;
> +}
> +
> +/* error */
> +struct PCIEAERErr {
> +    uint32_t status;    /* error status bits */
> +    uint16_t source_id; /* bdf */
> +
> +#define PCIE_AER_ERR_IS_CORRECTABLE     0x1     /* correctable/uncorrectable */
> +#define PCIE_AER_ERR_MAYBE_ADVISORY     0x2     /* maybe advisory non-fatal */
> +#define PCIE_AER_ERR_HEADER_VALID       0x4     /* TLP header is logged */
> +#define PCIE_AER_ERR_TLP_PRESENT        0x8     /* TLP Prefix is logged */
> +    uint16_t flags;
> +
> +    uint32_t header[4]; /* TLP header */
> +    uint32_t prefix[4]; /* TLP header prefix */
> +};
> +
> +extern const VMStateDescription vmstate_pcie_aer_log;
> +
> +void pcie_aer_init(PCIDevice *dev, uint16_t offset);
> +void pcie_aer_exit(PCIDevice *dev);
> +void pcie_aer_write_config(PCIDevice *dev,
> +                           uint32_t addr, uint32_t val, int len,
> +                           uint32_t uncorsta_prev);
> +
> +/* aer root port */
> +void pcie_aer_root_set_vector(PCIDevice *dev, uint8_t vector);
> +void pcie_aer_root_init(PCIDevice *dev);
> +void pcie_aer_root_reset(PCIDevice *dev);
> +void pcie_aer_root_write_config(PCIDevice *dev,
> +                                uint32_t addr, uint32_t val, int len,
> +                                uint32_t root_cmd_prev);
> +
> +/* error injection */
> +void pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
> +
> +#endif /* QEMU_PCIE_AER_H */
> diff --git a/qemu-common.h b/qemu-common.h
> index 6d9ee26..94b2c11 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -221,6 +221,9 @@ typedef struct PCIBus PCIBus;
>  typedef struct PCIDevice PCIDevice;
>  typedef struct PCIExpressDevice PCIExpressDevice;
>  typedef struct PCIBridge PCIBridge;
> +typedef struct PCIEAERMsg PCIEAERMsg;
> +typedef struct PCIEAERLog PCIEAERLog;
> +typedef struct PCIEAERErr PCIEAERErr;
>  typedef struct SerialState SerialState;
>  typedef struct IRQState *qemu_irq;
>  typedef struct PCMCIACardState PCMCIACardState;
> -- 
> 1.7.1.1

Patch

diff --git a/Makefile.objs b/Makefile.objs
index eeb5134..68bcc48 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -186,7 +186,7 @@  hw-obj-$(CONFIG_PIIX4) += piix4.o
 # PCI watchdog devices
 hw-obj-y += wdt_i6300esb.o
 
-hw-obj-y += pcie.o
+hw-obj-y += pcie.o pcie_aer.o
 hw-obj-y += msix.o msi.o
 
 # PCI network cards
diff --git a/hw/pcie.h b/hw/pcie.h
index 603bb8b..891e369 100644
--- a/hw/pcie.h
+++ b/hw/pcie.h
@@ -24,6 +24,7 @@ 
 #include "hw.h"
 #include "pci_regs.h"
 #include "pcie_regs.h"
+#include "pcie_aer.h"
 
 typedef enum {
     /* for attention and power indicator */
@@ -66,6 +67,10 @@  struct PCIExpressDevice {
 
     /* FLR */
     pcie_flr_fn flr;
+
+    /* AER */
+    uint16_t aer_cap;
+    PCIEAERLog aer_log;
 };
 
 /* PCI express capability helper functions */
diff --git a/hw/pcie_aer.c b/hw/pcie_aer.c
new file mode 100644
index 0000000..c86f2b9
--- /dev/null
+++ b/hw/pcie_aer.c
@@ -0,0 +1,789 @@ 
+/*
+ * pcie_aer.c
+ *
+ * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "sysemu.h"
+#include "pci_bridge.h"
+#include "pcie.h"
+#include "msix.h"
+#include "msi.h"
+#include "pci_internals.h"
+#include "pcie_regs.h"
+
+//#define DEBUG_PCIE
+#ifdef DEBUG_PCIE
+# define PCIE_DPRINTF(fmt, ...)                                         \
+    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
+#else
+# define PCIE_DPRINTF(fmt, ...) do {} while (0)
+#endif
+#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
+    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
+
+static void pcie_aer_clear_error(PCIDevice *dev);
+static uint8_t pcie_aer_root_get_vector(PCIDevice *dev);
+static AERMsgResult
+pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg);
+static AERMsgResult
+pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg);
+static AERMsgResult
+pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg);
+
+/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
+static PCIEAERSeverity pcie_aer_uncor_default_severity(uint32_t status)
+{
+    switch (status) {
+    case PCI_ERR_UNC_INTN:
+    case PCI_ERR_UNC_DLP:
+    case PCI_ERR_UNC_SDN:
+    case PCI_ERR_UNC_RX_OVER:
+    case PCI_ERR_UNC_FCP:
+    case PCI_ERR_UNC_MALF_TLP:
+        return AER_ERR_FATAL;
+    case PCI_ERR_UNC_POISON_TLP:
+    case PCI_ERR_UNC_ECRC:
+    case PCI_ERR_UNC_UNSUP:
+    case PCI_ERR_UNC_COMP_TIME:
+    case PCI_ERR_UNC_COMP_ABORT:
+    case PCI_ERR_UNC_UNX_COMP:
+    case PCI_ERR_UNC_ACSV:
+    case PCI_ERR_UNC_MCBTLP:
+    case PCI_ERR_UNC_ATOP_EBLOCKED:
+    case PCI_ERR_UNC_TLP_PRF_BLOCKED:
+        return AER_ERR_NONFATAL;
+    default:
+        break;
+    }
+    abort();
+    return AER_ERR_FATAL;
+}
+
+static uint32_t aer_log_next(uint32_t i, uint32_t max)
+{
+    return (i + 1) % max;
+}
+
+static bool aer_log_empty_index(uint32_t producer, uint32_t consumer)
+{
+    return producer == consumer;
+}
+
+static bool aer_log_empty(PCIEAERLog *aer_log)
+{
+    return aer_log_empty_index(aer_log->producer, aer_log->consumer);
+}
+
+static bool aer_log_full(PCIEAERLog *aer_log)
+{
+    return aer_log_next(aer_log->producer, aer_log->log_max) ==
+        aer_log->consumer;
+}
+
+static uint32_t aer_log_add(PCIEAERLog *aer_log)
+{
+    uint32_t i = aer_log->producer;
+    aer_log->producer = aer_log_next(aer_log->producer, aer_log->log_max);
+    return i;
+}
+
+static uint32_t aer_log_del(PCIEAERLog *aer_log)
+{
+    uint32_t i = aer_log->consumer;
+    aer_log->consumer = aer_log_next(aer_log->consumer, aer_log->log_max);
+    return i;
+}
+
+static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
+{
+    uint32_t i;
+    if (aer_log_full(aer_log)) {
+        return -1;
+    }
+    i = aer_log_add(aer_log);
+    memcpy(&aer_log->log[i], err, sizeof(*err));
+    return 0;
+}
+
+static const PCIEAERErr* aer_log_del_err(PCIEAERLog *aer_log)
+{
+    uint32_t i;
+    assert(!aer_log_empty(aer_log));
+    i = aer_log_del(aer_log);
+    return &aer_log->log[i];
+}
+
+static void aer_log_clear_all_err(PCIEAERLog *aer_log)
+{
+    aer_log->producer = 0;
+    aer_log->consumer = 0;
+}
+
+void pcie_aer_init(PCIDevice *dev, uint16_t offset)
+{
+    PCIExpressDevice *exp;
+
+    pci_set_bit_word(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
+    pci_set_bit_word(dev->w1cmask + PCI_STATUS, PCI_STATUS_SIG_SYSTEM_ERROR);
+
+    pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
+                        offset, PCI_ERR_SIZEOF);
+    exp = &dev->exp;
+    exp->aer_cap = offset;
+    if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
+        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    }
+    if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_MAX) {
+        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_MAX;
+    }
+    dev->exp.aer_log.log = qemu_mallocz(sizeof(dev->exp.aer_log.log[0]) *
+                                        dev->exp.aer_log.log_max);
+
+    /* On reset PCI_ERR_CAP_MHRE is disabled
+     * PCI_ERR_CAP_MHRE is RWS so that reset doesn't affect related
+     * registers
+     */
+    pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
+                 PCI_ERR_UNC_SUPPORTED);
+
+    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
+                 PCI_ERR_UNC_SUPPORTED);
+
+    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
+                 PCI_ERR_UNC_SEVERITY_DEFAULT);
+    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
+                 PCI_ERR_UNC_SUPPORTED);
+
+    pci_set_bit_long(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
+                     PCI_ERR_COR_STATUS);
+
+    pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
+                 PCI_ERR_COR_MASK_DEFAULT);
+    pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
+                 PCI_ERR_COR_SUPPORTED);
+
+    /* capabilities and control. multiple header logging is supported */
+    if (dev->exp.aer_log.log_max > 0) {
+        pci_set_long(dev->config + offset + PCI_ERR_CAP,
+                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
+                     PCI_ERR_CAP_MHRC);
+        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
+                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
+                     PCI_ERR_CAP_MHRE);
+    } else {
+        pci_set_long(dev->config + offset + PCI_ERR_CAP,
+                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
+        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
+                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+    }
+
+    switch (pcie_cap_get_type(dev)) {
+    case PCI_EXP_TYPE_ROOT_PORT:
+        /* this case will be set by pcie_aer_root_init() */
+        /* fallthrough */
+    case PCI_EXP_TYPE_DOWNSTREAM:
+    case PCI_EXP_TYPE_UPSTREAM:
+        pci_set_bit_word(dev->wmask + PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_SERR);
+        pci_set_bit_long(dev->w1cmask + PCI_STATUS,
+                         PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
+        break;
+    default:
+        /* nothing */
+        break;
+    }
+}
+
+void pcie_aer_exit(PCIDevice *dev)
+{
+    qemu_free(dev->exp.aer_log.log);
+}
+
+void pcie_aer_write_config(PCIDevice *dev,
+                           uint32_t addr, uint32_t val, int len,
+                           uint32_t uncorsta_old)
+{
+    uint32_t pos = dev->exp.aer_cap;
+
+    /* uncorrectable */
+    uint32_t written =
+        pci_get_long(dev->written + pos + PCI_ERR_UNCOR_STATUS) &
+        PCI_ERR_UNC_SUPPORTED;
+        /* if uncorrectable status register isn't touched, written == 0*/
+    uint32_t errcap = pci_get_long(dev->config + pos + PCI_ERR_CAP);
+    uint32_t first_error = (1U << PCI_ERR_CAP_FEP(errcap));
+    if ((uncorsta_old & first_error) && (written & first_error)) {
+        pcie_aer_clear_error(dev);
+    }
+
+    /* capability & control */
+    if (ranges_overlap(addr, len, pos + PCI_ERR_CAP, 4)) {
+        uint32_t err_cap = pci_get_long(dev->config + pos + PCI_ERR_CAP);
+        if (!(err_cap & PCI_ERR_CAP_MHRE)) {
+            aer_log_clear_all_err(&dev->exp.aer_log);
+        }
+    }
+}
+
+static inline void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
+{
+    uint8_t type;
+    AERMsgResult result;
+
+    assert(pci_is_express(dev));
+
+    type = pcie_cap_get_type(dev);
+    if (type == PCI_EXP_TYPE_ROOT_PORT ||
+        type == PCI_EXP_TYPE_UPSTREAM ||
+        type == PCI_EXP_TYPE_DOWNSTREAM) {
+        result = pcie_aer_msg_vbridge(dev, msg);
+        if (result != AER_MSG_SENT) {
+            return;
+        }
+    }
+    result = pcie_aer_msg_alldev(dev, msg);
+    if (type == PCI_EXP_TYPE_ROOT_PORT && result == AER_MSG_SENT) {
+        pcie_aer_msg_root_port(dev, msg);
+    }
+}
+
+static AERMsgResult
+pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
+{
+    uint16_t cmd = pci_get_word(dev->config + PCI_COMMAND);
+    bool transmit1 =
+        pcie_aer_msg_is_uncor(msg) && (cmd & PCI_COMMAND_SERR);
+    uint32_t devctl = pci_get_word(dev->config +
+                                   dev->exp.exp_cap + PCI_EXP_DEVCTL);
+    bool transmit2 = msg->severity & devctl;
+    PCIDevice *parent_port;
+
+    if (transmit1) {
+        if (pcie_aer_msg_is_uncor(msg)) {
+            /* Signaled System Error */
+            pci_set_bit_word(dev->config + PCI_STATUS,
+                             PCI_STATUS_SIG_SYSTEM_ERROR);
+        }
+    }
+
+    if (!(transmit1 || transmit2)) {
+        return AER_MSG_MASKED;
+    }
+
+    /* send up error message */
+    if (pci_is_express(dev) &&
+        pcie_cap_get_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
+        /* Root port notify system itself,
+           or send the error message to root complex event collector. */
+        /*
+         * if root port is associated to event collector, set
+         * parent_port = root complex event collector
+         * For now root complex event collector isn't supported.
+         */
+        parent_port = NULL;
+    } else {
+        parent_port = pci_bridge_get_device(dev->bus);
+    }
+    if (parent_port) {
+        if (!pci_is_express(parent_port)) {
+            /* What to do? */
+            return AER_MSG_MASKED;
+        }
+        pcie_aer_msg(parent_port, msg);
+    }
+    return AER_MSG_SENT;
+}
+
+static AERMsgResult
+pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
+{
+    uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
+
+    if (pcie_aer_msg_is_uncor(msg)) {
+        /* Received System Error */
+        pci_set_bit_word(dev->config + PCI_SEC_STATUS,
+                         PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
+    }
+
+    if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
+        return AER_MSG_MASKED;
+    }
+    return AER_MSG_SENT;
+}
+
+static void pcie_aer_root_port_notify(PCIDevice *dev,
+                                      bool msi_trigger, int int_level)
+{
+    unsigned int msi_vector = pcie_aer_root_get_vector(dev);
+    unsigned int int_irq = 0; /* INTA# is chosen for aer notification */
+
+    if (msix_enabled(dev)) {
+        if (msi_trigger) {
+            msix_notify(dev, msi_vector);
+        }
+    } else if (msi_enabled(dev)) {
+        if (msi_trigger) {
+            msi_notify(dev, msi_vector);
+        }
+    } else {
+        qemu_set_irq(dev->irq[int_irq], int_level);
+    }
+}
+
+static AERMsgResult
+pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
+{
+    AERMsgResult ret;
+    uint16_t cmd;
+    uint8_t *aer_cap;
+    uint32_t root_cmd;
+    uint32_t root_sta;
+    bool msi_trigger;
+
+    ret = AER_MSG_MASKED;
+    cmd = pci_get_word(dev->config + PCI_COMMAND);
+    aer_cap = dev->config + dev->exp.aer_cap;
+    root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
+    root_sta = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
+    msi_trigger = false;
+
+    if (cmd & PCI_COMMAND_SERR) {
+        /* System Error. Platform Specific */
+        /* ret = AER_MSG_SENT; */
+    }
+
+    /* Errro Message Received: Root Error Status register */
+    switch (msg->severity) {
+    case AER_ERR_COR:
+        if (root_sta & PCI_ERR_ROOT_COR_RCV) {
+            root_sta |= PCI_ERR_ROOT_MULTI_COR_RCV;
+        } else {
+            if (root_cmd & PCI_ERR_ROOT_CMD_COR_EN) {
+                msi_trigger = true;
+            }
+            pci_set_word(aer_cap + PCI_ERR_ROOT_COR_SRC, msg->source_id);
+        }
+        root_sta |= PCI_ERR_ROOT_COR_RCV;
+        break;
+    case AER_ERR_NONFATAL:
+        if (!(root_sta & PCI_ERR_ROOT_NONFATAL_RCV) &&
+            root_cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) {
+            msi_trigger = true;
+        }
+        root_sta |= PCI_ERR_ROOT_NONFATAL_RCV;
+        break;
+    case AER_ERR_FATAL:
+        if (!(root_sta & PCI_ERR_ROOT_FATAL_RCV) &&
+            root_cmd & PCI_ERR_ROOT_CMD_FATAL_EN) {
+            msi_trigger = true;
+        }
+        if (!(root_sta & PCI_ERR_ROOT_UNCOR_RCV)) {
+            root_sta |= PCI_ERR_ROOT_FIRST_FATAL;
+        }
+        root_sta |= PCI_ERR_ROOT_FATAL_RCV;
+        break;
+    }
+    if (pcie_aer_msg_is_uncor(msg)) {
+        if (root_sta & PCI_ERR_ROOT_UNCOR_RCV) {
+            root_sta |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
+        } else {
+            pci_set_word(aer_cap + PCI_ERR_ROOT_SRC, msg->source_id);
+        }
+        root_sta |= PCI_ERR_ROOT_UNCOR_RCV;
+    }
+    pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_sta);
+
+    if (root_cmd & msg->severity) {
+        /* 6.2.4.1.2 Interrupt Generation */
+        pcie_aer_root_port_notify(dev, msi_trigger, 1);
+        ret = AER_MSG_SENT;
+    }
+    return ret;
+}
+
+static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
+{
+    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+    uint8_t first_bit = ffsl(err->status) - 1;
+    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
+    int i;
+    uint32_t dw;
+
+    errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
+    errcap |= PCI_ERR_CAP_FEP(first_bit);
+
+    if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
+        for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
+            /* 7.10.8 Header Log Register */
+            cpu_to_be32wu(&dw, err->header[i]);
+            memcpy(aer_cap + PCI_ERR_HEADER_LOG + sizeof(err->header[0]) * i,
+                   &dw, sizeof(dw));
+        }
+    } else {
+        assert(!(err->flags & PCIE_AER_ERR_TLP_PRESENT));
+        memset(aer_cap + PCI_ERR_HEADER_LOG, 0, sizeof(err->header));
+    }
+
+    if ((err->flags & PCIE_AER_ERR_TLP_PRESENT) &&
+        (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2) &
+         PCI_EXP_DEVCAP2_EETLPP)) {
+        for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
+            /* 7.10.12 tlp prefix log register */
+            cpu_to_be32wu(&dw, err->prefix[i]);
+            memcpy(aer_cap + PCI_ERR_TLP_PREFIX_LOG +
+                   sizeof(err->prefix[0]) * i, &dw, sizeof(dw));
+        }
+        errcap |= PCI_ERR_CAP_TLP;
+    } else {
+        memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, sizeof(err->prefix));
+    }
+    pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
+}
+
+static void pcie_aer_clear_log(PCIDevice *dev)
+{
+    PCIEAERErr *err;
+    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+
+    pci_clear_bit_long(aer_cap + PCI_ERR_CAP,
+                       PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
+
+    memset(aer_cap + PCI_ERR_HEADER_LOG, 0, sizeof(err->header));
+    memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, sizeof(err->prefix));
+}
+
+static int pcie_aer_record_error(PCIDevice *dev,
+                                 const PCIEAERErr *err)
+{
+    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
+    int fep = PCI_ERR_CAP_FEP(errcap);
+
+    if (errcap & PCI_ERR_CAP_MHRE &&
+        (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1ULL << fep))) {
+        /*  Not first error. queue error */
+        if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
+            /* overflow */
+            return -1;
+        }
+        return 0;
+    }
+
+    pcie_aer_update_log(dev, err);
+    return 0;
+}
+
+static void pcie_aer_clear_error(PCIDevice *dev)
+{
+    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
+    uint32_t old_err = (1U << PCI_ERR_CAP_FEP(errcap));
+    PCIEAERLog *aer_log = &dev->exp.aer_log;
+    const PCIEAERErr *err;
+    uint32_t consumer;
+
+    if (!(errcap & PCI_ERR_CAP_MHRE) || aer_log_empty(aer_log)) {
+        pcie_aer_clear_log(dev);
+        pci_clear_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS, old_err);
+        return;
+    }
+
+    /*
+     * If more errors are queued, set corresponding bits in uncorrectable
+     * error status.
+     * We emulates uncorrectable error status register as W1CS.
+     * So set bit in uncorrectable error status here again for multiple
+     * error recording support.
+     *
+     * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
+     */
+    for (consumer = dev->exp.aer_log.consumer;
+         !aer_log_empty_index(dev->exp.aer_log.producer, consumer);
+         consumer = aer_log_next(consumer, dev->exp.aer_log.log_max)) {
+        pci_set_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS,
+                         dev->exp.aer_log.log[consumer].status);
+    }
+
+    err = aer_log_del_err(aer_log);
+    pcie_aer_update_log(dev, err);
+}
+
+/*
+ * non-Function specific error must be recorded in all functions.
+ * It is the responsibility of the caller of this function.
+ * It is also caller's responsiblity to determine which function should
+ * report the rerror.
+ *
+ * 6.2.4 Error Logging
+ * 6.2.5 Sqeucne of Device Error Signaling and Logging Operations
+ * table 6-2: Flowchard Showing Sequence of Device Error Signaling and Logging
+ *            Operations
+ *
+ * Although this implementation can be shortened/optimized, this is kept
+ * parallel to table 6-2.
+ */
+void pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
+{
+    uint8_t *exp_cap;
+    uint8_t *aer_cap = NULL;
+    uint32_t devctl = 0;
+    uint32_t devsta = 0;
+    uint32_t status = err->status;
+    uint32_t mask;
+    bool is_unsupported_request =
+        (!(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
+         err->status == PCI_ERR_UNC_UNSUP);
+    bool is_advisory_nonfatal = false;  /* for advisory non-fatal error */
+    uint32_t uncor_status = 0;          /* for advisory non-fatal error */
+    PCIEAERMsg msg;
+    int is_header_log_overflowed = 0;
+
+    if (!pci_is_express(dev)) {
+        /* What to do? */
+        return;
+    }
+
+    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
+        status &= PCI_ERR_COR_SUPPORTED;
+    } else {
+        status &= PCI_ERR_UNC_SUPPORTED;
+    }
+    if (!status || status & (status - 1)) {
+        /* invalid status bit. one and only one bit must be set */
+        return;
+    }
+
+    exp_cap = dev->config + dev->exp.exp_cap;
+    if (dev->exp.aer_cap) {
+        aer_cap = dev->config + dev->exp.aer_cap;
+        devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
+        devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
+    }
+    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
+    correctable_error:
+        devsta |= PCI_EXP_DEVSTA_CED;
+        if (is_unsupported_request) {
+            devsta |= PCI_EXP_DEVSTA_URD;
+        }
+        pci_set_word(exp_cap + PCI_EXP_DEVSTA, devsta);
+
+        if (aer_cap) {
+            pci_set_bit_long(aer_cap + PCI_ERR_COR_STATUS, status);
+            mask = pci_get_long(aer_cap + PCI_ERR_COR_MASK);
+            if (mask & status) {
+                return;
+            }
+            if (is_advisory_nonfatal) {
+                uint32_t uncor_mask =
+                    pci_get_long(aer_cap + PCI_ERR_UNCOR_MASK);
+                if (!(uncor_mask & uncor_status)) {
+                    is_header_log_overflowed = pcie_aer_record_error(dev, err);
+                }
+                pci_set_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS, uncor_status);
+            }
+        }
+
+        if (is_unsupported_request && !(devctl & PCI_EXP_DEVCTL_URRE)) {
+            return;
+        }
+        if (!(devctl & PCI_EXP_DEVCTL_CERE)) {
+            return;
+        }
+        msg.severity = AER_ERR_COR;
+    } else {
+        bool is_fatal =
+            (pcie_aer_uncor_default_severity(status) == AER_ERR_FATAL);
+        uint16_t cmd;
+
+        if (aer_cap) {
+            is_fatal = status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+        }
+        if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
+            is_advisory_nonfatal = true;
+            uncor_status = status;
+            status = PCI_ERR_COR_ADV_NONFATAL;
+            goto correctable_error;
+        }
+        if (is_fatal) {
+            devsta |= PCI_EXP_DEVSTA_FED;
+        } else {
+            devsta |= PCI_EXP_DEVSTA_NFED;
+        }
+        if (is_unsupported_request) {
+            devsta |= PCI_EXP_DEVSTA_URD;
+        }
+        pci_set_long(exp_cap + PCI_EXP_DEVSTA, devsta);
+
+        if (aer_cap) {
+            mask = pci_get_long(aer_cap + PCI_ERR_UNCOR_MASK);
+            if (mask & status) {
+                pci_set_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS, status);
+                return;
+            }
+
+            is_header_log_overflowed = pcie_aer_record_error(dev, err);
+            pci_set_bit_long(aer_cap + PCI_ERR_UNCOR_STATUS, status);
+        }
+
+        cmd = pci_get_word(dev->config + PCI_COMMAND);
+        if (is_unsupported_request &&
+            !(devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
+            return;
+        }
+        if (is_fatal) {
+            if (!((cmd & PCI_COMMAND_SERR) ||
+                  (devctl & PCI_EXP_DEVCTL_FERE))) {
+                return;
+            }
+            msg.severity = AER_ERR_FATAL;
+        } else {
+            if (!((cmd & PCI_COMMAND_SERR) ||
+                  (devctl & PCI_EXP_DEVCTL_NFERE))) {
+                return;
+            }
+            msg.severity = AER_ERR_NONFATAL;
+        }
+    }
+
+    /* send up error message */
+    msg.source_id = err->source_id;
+    pcie_aer_msg(dev, &msg);
+
+    if (is_header_log_overflowed) {
+        PCIEAERErr header_log_overflow = {
+            .status = PCI_ERR_COR_HL_OVERFLOW,
+            .flags = PCIE_AER_ERR_IS_CORRECTABLE,
+            .header = {0, 0, 0, 0},
+            .prefix = {0, 0, 0, 0},
+        };
+        pcie_aer_inject_error(dev, &header_log_overflow);
+    }
+}
+
+void pcie_aer_root_set_vector(PCIDevice *dev, uint8_t vector)
+{
+    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+    assert(vector < PCI_ERR_ROOT_IRQ_MAX);
+    pci_clear_set_bit_long(aer_cap + PCI_ERR_ROOT_STATUS,
+                           PCI_ERR_ROOT_IRQ,
+                           ((uint32_t)vector) << PCI_ERR_ROOT_IRQ_SHIFT);
+}
+
+static uint8_t pcie_aer_root_get_vector(PCIDevice *dev)
+{
+    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
+    return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
+}
+
+void pcie_aer_root_init(PCIDevice *dev)
+{
+    uint16_t pos = dev->exp.aer_cap;
+
+    pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
+                 PCI_ERR_ROOT_CMD_EN_MASK);
+    pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
+                 PCI_ERR_ROOT_STATUS_REPORT_MASK);
+}
+
+void pcie_aer_root_reset(PCIDevice *dev)
+{
+    uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
+
+    pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
+
+    /*
+     * Advanced Error Interrupt Message Number in Root Error Status Register
+     * must be updated by chip dependent code because it's chip dependent
+     * which number is used.
+     */
+}
+
+static bool pcie_aer_root_does_trigger(uint32_t cmd, uint32_t status)
+{
+    return
+        ((cmd & PCI_ERR_ROOT_CMD_COR_EN) && (status & PCI_ERR_ROOT_COR_RCV)) ||
+        ((cmd & PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
+         (status & PCI_ERR_ROOT_NONFATAL_RCV)) ||
+        ((cmd & PCI_ERR_ROOT_CMD_FATAL_EN) &&
+         (status & PCI_ERR_ROOT_FATAL_RCV));
+}
+
+void pcie_aer_root_write_config(PCIDevice *dev,
+                                uint32_t addr, uint32_t val, int len,
+                                uint32_t root_cmd_prev)
+{
+    uint16_t pos = dev->exp.aer_cap;
+    uint8_t *aer_cap = dev->config + pos;
+
+    /* root command register */
+    uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
+    if (root_cmd & PCI_ERR_ROOT_CMD_EN_MASK) {
+        /* 6.2.4.1.2 Interrupt Generation */
+
+        /* 0 -> 1 */
+        uint32_t root_cmd_set = (root_cmd_prev ^ root_cmd) & root_cmd;
+        uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
+
+        bool msi_trigger = pcie_aer_root_does_trigger(root_cmd_set,
+                                                      root_status);
+        int int_level = pcie_aer_root_does_trigger(root_cmd, root_status);
+
+        pcie_aer_root_port_notify(dev, msi_trigger, int_level);
+    }
+}
+
+static const VMStateDescription vmstate_pcie_aer_err = {
+    .name = "PCIE_AER_ERROR",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields     = (VMStateField[]) {
+        VMSTATE_UINT32(status, PCIEAERErr),
+        VMSTATE_UINT16(source_id, PCIEAERErr),
+        VMSTATE_UINT16(flags, PCIEAERErr),
+        VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
+        VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+#define VMSTATE_PCIE_AER_ERRS(_field, _state, _field_num, _vmsd, _type) { \
+    .name       = (stringify(_field)),                                    \
+    .version_id = 0,                                                      \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),     \
+    .size       = sizeof(_type),                                          \
+    .vmsd       = &(_vmsd),                                               \
+    .flags      = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT,           \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),          \
+}
+
+const VMStateDescription vmstate_pcie_aer_log = {
+    .name = "PCIE_AER_ERROR_LOG",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields     = (VMStateField[]) {
+        VMSTATE_UINT32(producer, PCIEAERLog),
+        VMSTATE_UINT32(consumer, PCIEAERLog),
+        VMSTATE_UINT16(log_max, PCIEAERLog),
+        VMSTATE_PCIE_AER_ERRS(log, PCIEAERLog, log_max,
+                              vmstate_pcie_aer_err, PCIEAERErr),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
diff --git a/hw/pcie_aer.h b/hw/pcie_aer.h
new file mode 100644
index 0000000..7d1540b
--- /dev/null
+++ b/hw/pcie_aer.h
@@ -0,0 +1,105 @@ 
+/*
+ * pcie_aer.h
+ *
+ * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_PCIE_AER_H
+#define QEMU_PCIE_AER_H
+
+#include "hw.h"
+
+/* definitions which PCIExpressDevice uses */
+typedef enum {
+    AER_MSG_MASKED,
+    AER_MSG_SENT,
+} AERMsgResult;
+
+/* AER log */
+struct PCIEAERLog {
+    /* This structure is saved/loaded.
+       So explicitly size them instead of unsigned int */
+    uint32_t producer;
+    uint32_t consumer;
+
+#define PCIE_AER_LOG_MAX_DEFAULT        8
+#define PCIE_AER_LOG_MAX_MAX            128 /* what is appropriate? */
+#define PCIE_AER_LOG_MAX_UNSET          0xffff
+    uint16_t log_max;
+
+    PCIEAERErr *log;    /* ringed buffer */
+};
+
+/* aer error severity */
+typedef enum {
+    /* those value are same as
+     * Root error command register in aer extended cap and
+     * root control register in pci express cap.
+     */
+    AER_ERR_COR         = 0x1,
+    AER_ERR_NONFATAL    = 0x2,
+    AER_ERR_FATAL       = 0x4,
+} PCIEAERSeverity;
+
+/* aer error message: error signaling message has only error sevirity and
+   source id. See 2.2.8.3 error signaling messages */
+struct PCIEAERMsg {
+    PCIEAERSeverity severity;
+    uint16_t source_id; /* bdf */
+};
+
+static inline bool
+pcie_aer_msg_is_uncor(const PCIEAERMsg *msg)
+{
+    return msg->severity == AER_ERR_NONFATAL || msg->severity == AER_ERR_FATAL;
+}
+
+/* error */
+struct PCIEAERErr {
+    uint32_t status;    /* error status bits */
+    uint16_t source_id; /* bdf */
+
+#define PCIE_AER_ERR_IS_CORRECTABLE     0x1     /* correctable/uncorrectable */
+#define PCIE_AER_ERR_MAYBE_ADVISORY     0x2     /* maybe advisory non-fatal */
+#define PCIE_AER_ERR_HEADER_VALID       0x4     /* TLP header is logged */
+#define PCIE_AER_ERR_TLP_PRESENT        0x8     /* TLP Prefix is logged */
+    uint16_t flags;
+
+    uint32_t header[4]; /* TLP header */
+    uint32_t prefix[4]; /* TLP header prefix */
+};
+
+extern const VMStateDescription vmstate_pcie_aer_log;
+
+void pcie_aer_init(PCIDevice *dev, uint16_t offset);
+void pcie_aer_exit(PCIDevice *dev);
+void pcie_aer_write_config(PCIDevice *dev,
+                           uint32_t addr, uint32_t val, int len,
+                           uint32_t uncorsta_prev);
+
+/* aer root port */
+void pcie_aer_root_set_vector(PCIDevice *dev, uint8_t vector);
+void pcie_aer_root_init(PCIDevice *dev);
+void pcie_aer_root_reset(PCIDevice *dev);
+void pcie_aer_root_write_config(PCIDevice *dev,
+                                uint32_t addr, uint32_t val, int len,
+                                uint32_t root_cmd_prev);
+
+/* error injection */
+void pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
+
+#endif /* QEMU_PCIE_AER_H */
diff --git a/qemu-common.h b/qemu-common.h
index 6d9ee26..94b2c11 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -221,6 +221,9 @@  typedef struct PCIBus PCIBus;
 typedef struct PCIDevice PCIDevice;
 typedef struct PCIExpressDevice PCIExpressDevice;
 typedef struct PCIBridge PCIBridge;
+typedef struct PCIEAERMsg PCIEAERMsg;
+typedef struct PCIEAERLog PCIEAERLog;
+typedef struct PCIEAERErr PCIEAERErr;
 typedef struct SerialState SerialState;
 typedef struct IRQState *qemu_irq;
 typedef struct PCMCIACardState PCMCIACardState;