Patchwork [v4,08/15] pcie: helper functions for pcie capability and extended capability

login
register
mail settings
Submitter Isaku Yamahata
Date Oct. 18, 2010, 3:17 a.m.
Message ID <e69aefe1308b235abe9447d8f6d79c9ba783227a.1287371107.git.yamahata@valinux.co.jp>
Download mbox | patch
Permalink /patch/68114/
State New
Headers show

Comments

Isaku Yamahata - Oct. 18, 2010, 3:17 a.m.
This patch implements helper functions for pci express capability
and pci express extended capability allocation.
NOTE: presence detection depends on pci_qdev_init() change.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
Changes v3 -> v4:
- various clean up
- dropped pcie_notify(), pcie_del_capability()
- use pci_{clear_set, clear}_bit_xxx() helper functions.
- dropped pci_exp_cap()
---
 Makefile.objs |    1 +
 hw/pci.h      |    5 +
 hw/pcie.c     |  529 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pcie.h     |  107 ++++++++++++
 qemu-common.h |    1 +
 5 files changed, 643 insertions(+), 0 deletions(-)
 create mode 100644 hw/pcie.c
 create mode 100644 hw/pcie.h
Michael S. Tsirkin - Oct. 18, 2010, 5:38 a.m.
On Mon, Oct 18, 2010 at 12:17:49PM +0900, Isaku Yamahata wrote:
> This patch implements helper functions for pci express capability
> and pci express extended capability allocation.
> NOTE: presence detection depends on pci_qdev_init() change.
> 
> Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>

OK, this is getting good.

> ---
> Changes v3 -> v4:
> - various clean up
> - dropped pcie_notify(), pcie_del_capability()
> - use pci_{clear_set, clear}_bit_xxx() helper functions.
> - dropped pci_exp_cap()
> ---
>  Makefile.objs |    1 +
>  hw/pci.h      |    5 +
>  hw/pcie.c     |  529 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/pcie.h     |  107 ++++++++++++
>  qemu-common.h |    1 +
>  5 files changed, 643 insertions(+), 0 deletions(-)
>  create mode 100644 hw/pcie.c
>  create mode 100644 hw/pcie.h
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index 5f5a4c5..eeb5134 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -186,6 +186,7 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o
>  # PCI watchdog devices
>  hw-obj-y += wdt_i6300esb.o
>  
> +hw-obj-y += pcie.o
>  hw-obj-y += msix.o msi.o
>  
>  # PCI network cards
> diff --git a/hw/pci.h b/hw/pci.h
> index 3dc2099..3c9c228 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -9,6 +9,8 @@
>  /* PCI includes legacy ISA access.  */
>  #include "isa.h"
>  
> +#include "pcie.h"
> +
>  /* PCI bus */
>  
>  #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
> @@ -178,6 +180,9 @@ struct PCIDevice {
>      /* Offset of MSI capability in config space */
>      uint8_t msi_cap;
>  
> +    /* PCI Express */
> +    PCIExpressDevice exp;
> +
>      /* Location of option rom */
>      char *romfile;
>      ram_addr_t rom_offset;
> diff --git a/hw/pcie.c b/hw/pcie.c
> new file mode 100644
> index 0000000..0f2bf50
> --- /dev/null
> +++ b/hw/pcie.c
> @@ -0,0 +1,529 @@
> +/*
> + * pcie.c
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "sysemu.h"
> +#include "pci_bridge.h"
> +#include "pcie.h"
> +#include "msix.h"
> +#include "msi.h"
> +#include "pci_internals.h"
> +#include "pcie_regs.h"
> +
> +//#define DEBUG_PCIE
> +#ifdef DEBUG_PCIE
> +# define PCIE_DPRINTF(fmt, ...)                                         \
> +    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
> +#else
> +# define PCIE_DPRINTF(fmt, ...) do {} while (0)
> +#endif
> +#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
> +    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
> +
> +
> +/***************************************************************************
> + * pci express capability helper functions
> + */
> +int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port)
> +{
> +    int pos;
> +    uint8_t *exp_cap;
> +
> +    assert(pci_is_express(dev));
> +
> +    pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset,
> +                                 PCI_EXP_VER2_SIZEOF);
> +    if (pos < 0) {
> +        return pos;
> +    }
> +    dev->exp.exp_cap = pos;
> +    exp_cap = dev->config + pos;
> +
> +    /* capability register
> +       interrupt message number defaults to 0 */
> +    pci_set_word(exp_cap + PCI_EXP_FLAGS,
> +                 ((type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE) |
> +                 PCI_EXP_FLAGS_VER2);
> +
> +    /* device capability register
> +     * table 7-12:
> +     * roll based error reporting bit must be set by all
> +     * Functions conforming to the ECN, PCI Express Base
> +     * Specification, Revision 1.1., or subsequent PCI Express Base
> +     * Specification revisions.
> +     */
> +    pci_set_long(exp_cap + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER);
> +
> +    pci_set_long(exp_cap + PCI_EXP_LNKCAP,
> +                 (port << PCI_EXP_LNKCAP_PN_SHIFT) |
> +                 PCI_EXP_LNKCAP_ASPMS_0S |
> +                 PCI_EXP_LNK_MLW_1 |
> +                 PCI_EXP_LNK_LS_25);
> +
> +    pci_set_word(exp_cap + PCI_EXP_LNKSTA,
> +                 PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25);
> +
> +    pci_set_long(exp_cap + PCI_EXP_DEVCAP2,
> +                 PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);
> +
> +    pci_set_word(dev->wmask + pos, PCI_EXP_DEVCTL2_EETLPPB);
> +    return pos;
> +}
> +
> +void pcie_cap_exit(PCIDevice *dev)
> +{
> +    pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER2_SIZEOF);
> +}
> +
> +uint8_t pcie_cap_get_type(const PCIDevice *dev)
> +{
> +    uint32_t pos = dev->exp.exp_cap;
> +    assert(pos > 0);
> +    return (pci_get_word(dev->config + pos + PCI_EXP_FLAGS) &
> +            PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT;
> +}
> +
> +/* MSI/MSI-X */
> +/* pci express interrupt message number */
> +/* 7.8.2 PCI Express Capabilities Register: Interrupt Message Number */
> +void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector)
> +{
> +    assert(vector < 32);
> +    pci_clear_set_bit_word(dev->config + dev->exp.exp_cap + PCI_EXP_FLAGS,
> +                           PCI_EXP_FLAGS_IRQ,
> +                           vector << PCI_EXP_FLAGS_IRQ_SHIFT);
> +}
> +
> +uint8_t pcie_cap_flags_get_vector(PCIDevice *dev)
> +{
> +    return (pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_FLAGS) &
> +            PCI_EXP_FLAGS_IRQ) >> PCI_EXP_FLAGS_IRQ_SHIFT;
> +}
> +
> +void pcie_cap_deverr_init(PCIDevice *dev)
> +{
> +    uint32_t pos = dev->exp.exp_cap;
> +    pci_set_bit_long(dev->config + pos + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER);
> +    pci_set_bit_long(dev->wmask + pos + PCI_EXP_DEVCTL,
> +                     PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
> +                     PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
> +    pci_set_bit_long(dev->w1cmask + pos + PCI_EXP_DEVSTA,
> +                     PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED |
> +                     PCI_EXP_DEVSTA_URD | PCI_EXP_DEVSTA_URD);
> +}
> +
> +void pcie_cap_deverr_reset(PCIDevice *dev)
> +{
> +    pci_clear_bit_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL,
> +                       PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
> +                       PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
> +}
> +
> +static void pcie_cap_slot_notify(PCIDevice *dev,
> +                                 bool msi_trigger, int int_level)

IMO this is not a good API because oly one msi/int level is ever used.
Can this get slot control/event and figure out what to do itself?


> +{
> +    unsigned int msi_vector = pcie_cap_flags_get_vector(dev);
> +    unsigned int int_irq = 0; /* INTA# is chosen for slot event notification */

For the specific devices we implement, right?
I think this needs a TODO, other devices might
need to use another interrupt.

> +
> +    if (msix_enabled(dev)) {
> +        if (msi_trigger) {
> +            msix_notify(dev, msi_vector);
> +        }
> +    } else if (msi_enabled(dev)) {
> +        if (msi_trigger) {
> +            msi_notify(dev, msi_vector);
> +        }
> +    } else {
> +        qemu_set_irq(dev->irq[int_irq], int_level);
> +    }
> +}
> +
> +/*
> + * A PCI Express Hot-Plug Event has occured, so update slot status register
> + * and notify OS of the event if necessary.
> + *
> + * 6.7.3 PCI Express Hot-Plug Events
> + * 6.7.3.4 Software Notification of Hot-Plug Events
> + */
> +static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event)
> +{
> +    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
> +    uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
> +    uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> +    bool msi_trigger;
> +    int int_level;
> +
> +    PCIE_DEV_PRINTF(dev,
> +                    "sltctl: 0x%02"PRIx16" sltsta: 0x%02"PRIx16" event: %x\n",
> +                    sltctl, sltsta, event);
> +
> +    msi_trigger = (sltctl & PCI_EXP_SLTCTL_HPIE) && (sltctl & event) &&
> +        ((sltsta ^ event) & event); /* 0 -> 1 */
> +
> +    sltsta |= event;
> +    pci_set_word(exp_cap + PCI_EXP_SLTSTA, sltsta);
> +    PCIE_DEV_PRINTF(dev, "sltsta -> %02"PRIx16"\n", sltsta);
> +
> +    int_level = ((sltctl & PCI_EXP_SLTCTL_HPIE) &&
> +                 (sltsta & PCI_EXP_HP_EV_SUPPORTED));

int_level ignores whether event is masked in sltctl?


> +
> +    pcie_cap_slot_notify(dev, msi_trigger, int_level);

Both places must test HPIE.
I suggest simply moving this test into pcie_cap_slot_notify:
	if (!sltctl & PCI_EXP_SLTCTL_HPIE) {
		return;
	}
> +}
> +
> +static int pcie_cap_slot_hotplug(DeviceState *qdev,
> +                                 PCIDevice *pci_dev, int state)
> +{
> +    PCIDevice *d = DO_UPCAST(PCIDevice, qdev, qdev);
> +    uint8_t *exp_cap = d->config + d->exp.exp_cap;
> +    uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> +
> +    if (!pci_dev->qdev.hotplugged) {
> +        assert(state); /* this case only happens at machine creation. */
> +        pci_set_bit_word(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
> +        return 0;
> +    }
> +
> +    PCIE_DEV_PRINTF(pci_dev, "hotplug state: %d\n", state);
> +    if (sltsta & PCI_EXP_SLTSTA_EIS) {
> +        /* the slot is electromechanically locked.
> +         * This error is propagated up to qdev and then to HMP/QMP.
> +         */
> +        return -EBUSY;
> +    }
> +
> +    /* TODO: multifunction hot-plug.
> +     * Right now, only a device of function = 0 is allowed to be
> +     * hot plugged/unplugged.
> +     */
> +    assert(PCI_FUNC(pci_dev->devfn) == 0);
> +
> +    if (state) {
> +        pci_set_bit_word(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
> +        pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC);
> +    } else {
> +        qdev_free(&pci_dev->qdev);
> +        pci_clear_bit_word(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
> +        pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC);
> +    }
> +    return 0;
> +}
> +
> +/* pci express slot for pci express root/downstream port
> +   PCI express capability slot registers */
> +void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot)
> +{
> +    uint32_t pos = dev->exp.exp_cap;
> +
> +    pci_set_bit_word(dev->config + pos + PCI_EXP_FLAGS, PCI_EXP_FLAGS_SLOT);
> +
> +    pci_clear_set_bit_long(dev->config + pos + PCI_EXP_SLTCAP,
> +                           ~PCI_EXP_SLTCAP_PSN,
> +                           (slot << PCI_EXP_SLTCAP_PSN_SHIFT) |
> +                           PCI_EXP_SLTCAP_EIP |
> +                           PCI_EXP_SLTCAP_HPS |
> +                           PCI_EXP_SLTCAP_HPC |
> +                           PCI_EXP_SLTCAP_PIP |
> +                           PCI_EXP_SLTCAP_AIP |
> +                           PCI_EXP_SLTCAP_ABP);
> +
> +    pci_clear_set_bit_word(dev->config + pos + PCI_EXP_SLTCTL,
> +                           PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC,
> +                           PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF);
> +    pci_set_bit_word(dev->wmask + pos + PCI_EXP_SLTCTL,
> +                     PCI_EXP_SLTCTL_PIC |
> +                     PCI_EXP_SLTCTL_AIC |
> +                     PCI_EXP_SLTCTL_HPIE |
> +                     PCI_EXP_SLTCTL_CCIE |
> +                     PCI_EXP_SLTCTL_PDCE |
> +                     PCI_EXP_SLTCTL_ABPE);
> +
> +    pci_set_bit_word(dev->w1cmask + pos + PCI_EXP_SLTSTA,
> +                     PCI_EXP_HP_EV_SUPPORTED);
> +
> +    pci_bus_hotplug(pci_bridge_get_sec_bus(DO_UPCAST(PCIBridge, dev, dev)),
> +                    pcie_cap_slot_hotplug, &dev->qdev);
> +}
> +
> +void pcie_cap_slot_reset(PCIDevice *dev)
> +{
> +    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
> +
> +    PCIE_DEV_PRINTF(dev, "reset\n");
> +
> +    pci_clear_set_bit_word(exp_cap + PCI_EXP_SLTCTL,
> +                           PCI_EXP_SLTCTL_EIC |
> +                           PCI_EXP_SLTCTL_PIC |
> +                           PCI_EXP_SLTCTL_AIC |
> +                           PCI_EXP_SLTCTL_HPIE |
> +                           PCI_EXP_SLTCTL_CCIE |
> +                           PCI_EXP_SLTCTL_PDCE |
> +                           PCI_EXP_SLTCTL_ABPE,
> +                           PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF);
> +
> +    pci_clear_bit_word(exp_cap + PCI_EXP_SLTSTA,
> +                       PCI_EXP_SLTSTA_EIS |/* by reset, the lock is released */
> +                       PCI_EXP_SLTSTA_CC |
> +                       PCI_EXP_SLTSTA_PDC |
> +                       PCI_EXP_SLTSTA_ABP);
> +}
> +
> +void pcie_cap_slot_write_config(PCIDevice *dev,
> +                                uint32_t addr, uint32_t val, int len,
> +                                uint16_t sltctl_prev)
> +{
> +    uint32_t pos = dev->exp.exp_cap;
> +    uint8_t *exp_cap = dev->config + pos;
> +    uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
> +    uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> +
> +    PCIE_DEV_PRINTF(dev,
> +                    "addr: 0x%"PRIx32" val: 0x%"PRIx32" len: %d\n"
> +                    "\tsltctl_prev: 0x%02"PRIx16" sltctl: 0x%02"PRIx16
> +                    " sltsta: 0x%02"PRIx16"\n",
> +                    addr, val, len, sltctl_prev, sltctl, sltsta);
> +    /* SLTSTA: process SLTSTA before SLTCTL to avoid spurious interrupt */
> +    if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
> +        sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> +
> +        /* write to stlsta results in clearing bits,
> +           so new interrupts won't be generated. */
> +        PCIE_DEV_PRINTF(dev, "sltsta -> 0x%02"PRIx16"\n", sltsta);
> +    }
> +

above does not do anything.

> +    /* SLTCTL */
> +    if (ranges_overlap(addr, len, pos + PCI_EXP_SLTCTL, 2)) {
> +        PCIE_DEV_PRINTF(dev, "sltctl: 0x%02"PRIx16" -> 0x%02"PRIx16"\n",
> +                        sltctl_prev, sltctl);
> +        if (pci_get_word(dev->written + pos + PCI_EXP_SLTCTL) &
> +            PCI_EXP_SLTCTL_EIC) {
> +            sltsta ^= PCI_EXP_SLTSTA_EIS; /* toggle PCI_EXP_SLTSTA_EIS bit */
> +            pci_set_word(exp_cap + PCI_EXP_SLTSTA, sltsta);
> +            PCIE_DEV_PRINTF(dev, "PCI_EXP_SLTCTL_EIC: "
> +                            "sltsta -> 0x%02"PRIx16"\n",
> +                            sltsta);
> +        }

Make EIC writeable then you will not need ranges_overlap
and will not need written register:
	w = pci_get_word(dev->config + pos + PCI_EXP_SLTCTL) &
		    PCI_EXP_SLTCTL_EIC) {
		pci_set_word(dev->config + pos + PCI_EXP_SLTCTL,
			w & ~PCI_EXP_SLTCTL_EIC);
	}

> +
> +        /*
> +         * The events might be enabled or disabled,
> +         * Check if the software notificastion condition is satisfied
> +         * or disatisfied.
> +         *
> +         * 6.7.3.4 Software Notification of Hot-plug events
> +         */
> +        if (sltctl & PCI_EXP_SLTCTL_HPIE) {
> +            bool msi_trigger =
> +                (sltctl_prev ^ sltctl) & sltctl & /* 0 -> 1 */
> +                sltsta & PCI_EXP_HP_EV_SUPPORTED;
> +            int int_level = sltctl & sltsta & PCI_EXP_HP_EV_SUPPORTED;
> +
> +            pcie_cap_slot_notify(dev, msi_trigger, int_level);
> +        }

What happens if level is 1 (Asserted) and user clears
PCI_EXP_SLTCTL_HPIE?

Why can not this function use pcie_cap_slot_event?
There seems to be duplicated logic here.

> +
> +        if (!((sltctl_prev ^ sltctl) & PCI_EXP_SLTCTL_SUPPORTED)) {
> +            PCIE_DEV_PRINTF(dev,
> +                            "sprious command completion slctl "
> +                            "0x%"PRIx16" -> 0x%"PRIx16"\n",
> +                            sltctl_prev, sltctl);
> +        }
> +
> +        /* command completion.
> +         * Real hardware might take a while to complete
> +         * requested command because physical movement would be involved
> +         * like locking the electromechanical lock.
> +         * However in our case, command is completed instantaneously above,
> +         * so send a command completion event right now.
> +         *
> +         * 6.7.3.2 Command Completed Events
> +         */
> +        /* set command completed bit */
> +        pcie_cap_slot_event(dev, PCI_EXP_HP_EV_CCI);
> +    }
> +}
> +
> +void pcie_cap_slot_push_attention_button(PCIDevice *dev)
> +{
> +    pcie_cap_slot_event(dev, PCI_EXP_HP_EV_ABP);
> +}
> +
> +/* root control/capabilities/status. PME isn't emulated for now */
> +void pcie_cap_root_init(PCIDevice *dev)
> +{
> +    pci_set_word(dev->wmask + dev->exp.exp_cap + PCI_EXP_RTCTL,
> +                 PCI_EXP_RTCTL_SECEE | PCI_EXP_RTCTL_SENFEE |
> +                 PCI_EXP_RTCTL_SEFEE);
> +}
> +
> +void pcie_cap_root_reset(PCIDevice *dev)
> +{
> +    pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_RTCTL, 0);
> +}
> +
> +/* function level reset(FLR) */
> +void pcie_cap_flr_init(PCIDevice *dev, pcie_flr_fn flr)
> +{
> +    pci_set_bit_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP,
> +                     PCI_EXP_DEVCAP_FLR);
> +    dev->exp.flr = flr;
> +}
> +
> +void pcie_cap_flr_write_config(PCIDevice *dev,
> +                               uint32_t addr, uint32_t val, int len)
> +{
> +    /* if ranges_overlap(addr, len, pos + PCI_EXP_DEVCTL, 2) is false,
> +       written == 0 */
> +    uint16_t written = pci_get_word(dev->written +
> +                                    dev->exp.exp_cap + PCI_EXP_DEVCTL);
> +    if ((written & PCI_EXP_DEVCTL_BCR_FLR) && dev->exp.flr) {
> +        dev->exp.flr(dev);
> +    }
> +}

So if flr is NULL you don't do anything on FLR?  This is obviously
wrong, isn't it?  But we will only see when this is made to really work.
For now, just comment this all out as you are waiting for infrastructure
to be implemented, and make bit simple RO 0.

Long term, make flr bit writeable and 
	uint16_t written = pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL);
	if ((written & PCI_EXP_DEVCTL_BCR_FLR)) {
		/* TODO: reset device */
		pci_set_word(dev->config + dev->exp.exp_cap,
				written & ~PCI_EXP_DEVCTL_BCR_FLR);
	}
> +
> +/* Alternative Routing-ID Interpretation (ARI) */
> +/* ari forwarding support for down stream port */
> +void pcie_cap_ari_init(PCIDevice *dev)
> +{
> +    uint32_t pos = dev->exp.exp_cap;
> +    pci_set_bit_long(dev->config + pos + PCI_EXP_DEVCAP2, PCI_EXP_DEVCAP2_ARI);
> +    pci_set_bit_long(dev->wmask + pos + PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_ARI);
> +}
> +
> +void pcie_cap_ari_reset(PCIDevice *dev)
> +{
> +    pci_clear_bit_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2,
> +                       PCI_EXP_DEVCTL2_ARI);
> +}
> +
> +bool pcie_cap_is_ari_enabled(const PCIDevice *dev)
> +{
> +    if (!pci_is_express(dev)) {
> +        return false;
> +    }
> +    if (!dev->exp.exp_cap) {
> +        return false;
> +    }
> +
> +    return pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2) &
> +        PCI_EXP_DEVCTL2_ARI;
> +}
> +
> +/**************************************************************************
> + * pci express extended capability allocation functions
> + * uint16_t ext_cap_id (16 bit)
> + * uint8_t cap_ver (4 bit)
> + * uint16_t cap_offset (12 bit)
> + * uint16_t ext_cap_size
> + */
> +
> +static uint16_t pcie_find_capability_list(PCIDevice *dev, uint16_t cap_id,
> +                                          uint16_t *prev_p)
> +{
> +    uint16_t prev = 0;
> +    uint16_t next;
> +    uint32_t header = pci_get_long(dev->config + PCI_CONFIG_SPACE_SIZE);
> +
> +    if (!header) {
> +        /* no extended capability */
> +        next = 0;
> +        goto out;
> +    }
> +    for (next = PCI_CONFIG_SPACE_SIZE; next;
> +         prev = next, next = PCI_EXT_CAP_NEXT(header)) {
> +
> +        assert(next >= PCI_CONFIG_SPACE_SIZE);
> +        assert(next <= PCIE_CONFIG_SPACE_SIZE - 8);
> +
> +        header = pci_get_long(dev->config + next);
> +        if (PCI_EXT_CAP_ID(header) == cap_id) {
> +            break;
> +        }
> +    }
> +
> +out:
> +    if (prev_p) {
> +        *prev_p = prev;
> +    }
> +    return next;
> +}
> +
> +uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id)
> +{
> +    return pcie_find_capability_list(dev, cap_id, NULL);
> +}
> +
> +static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t next)
> +{
> +    uint16_t header = pci_get_long(dev->config + pos);
> +    assert(!(next & (PCI_EXT_CAP_ALIGN - 1)));
> +    header = (header & ~PCI_EXT_CAP_NEXT_MASK) |
> +        ((next << PCI_EXT_CAP_NEXT_SHIFT) & PCI_EXT_CAP_NEXT_MASK);
> +    pci_set_long(dev->config + pos, header);
> +}
> +
> +/*
> + * caller must supply valid (offset, size) * such that the range shouldn't
> + * overlap with other capability or other registers.
> + * This function doesn't check it.
> + */
> +void pcie_add_capability(PCIDevice *dev,
> +                         uint16_t cap_id, uint8_t cap_ver,
> +                         uint16_t offset, uint16_t size)
> +{
> +    uint32_t header;
> +    uint16_t next;
> +
> +    assert(offset >= PCI_CONFIG_SPACE_SIZE);
> +    assert(offset < offset + size);
> +    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
> +    assert(size >= 8);
> +    assert(pci_is_express(dev));
> +
> +    if (offset == PCI_CONFIG_SPACE_SIZE) {
> +        header = pci_get_long(dev->config + offset);
> +        next = PCI_EXT_CAP_NEXT(header);
> +    } else {
> +        uint16_t prev;
> +
> +        /* 0 is reserved cap id. use internally to find the last capability
> +           in the linked list */
> +        next = pcie_find_capability_list(dev, 0, &prev);
> +
> +        assert(prev >= PCI_CONFIG_SPACE_SIZE);
> +        assert(next == 0);
> +        pcie_ext_cap_set_next(dev, prev, offset);
> +    }
> +    pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next));
> +
> +    /* Make capability read-only by default */
> +    memset(dev->wmask + offset, 0, size);
> +    memset(dev->w1cmask + offset, 0, size);
> +    /* Check capability by default */
> +    memset(dev->cmask + offset, 0xFF, size);
> +}
> +
> +/**************************************************************************
> + * pci express extended capability helper functions
> + */
> +
> +/* ARI */
> +void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn)
> +{
> +    pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
> +                        offset, PCI_ARI_SIZEOF);
> +    pci_set_long(dev->config + offset + PCI_ARI_CAP, PCI_ARI_CAP_NFN(nextfn));
> +}
> diff --git a/hw/pcie.h b/hw/pcie.h
> new file mode 100644
> index 0000000..603bb8b
> --- /dev/null
> +++ b/hw/pcie.h
> @@ -0,0 +1,107 @@
> +/*
> + * pcie.h
> + *
> + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
> + *                    VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef QEMU_PCIE_H
> +#define QEMU_PCIE_H
> +
> +#include "hw.h"
> +#include "pci_regs.h"
> +#include "pcie_regs.h"
> +
> +typedef enum {
> +    /* for attention and power indicator */
> +    PCI_EXP_HP_IND_RESERVED     = PCI_EXP_SLTCTL_IND_RESERVED,
> +    PCI_EXP_HP_IND_ON           = PCI_EXP_SLTCTL_IND_ON,
> +    PCI_EXP_HP_IND_BLINK        = PCI_EXP_SLTCTL_IND_BLINK,
> +    PCI_EXP_HP_IND_OFF          = PCI_EXP_SLTCTL_IND_OFF,
> +} PCIExpressIndicator;
> +
> +typedef enum {
> +    /* these bits must match the bits in Slot Control/Status registers.
> +     * PCI_EXP_HP_EV_xxx = PCI_EXP_SLTCTL_xxxE = PCI_EXP_SLTSTA_xxx
> +     *
> +     * Not all the bits of slot control register match with the ones of
> +     * slot status. Not some bits of slot status register is used to
> +     * show status, not to report event occurence.
> +     * So such bits must be masked out when checking the software
> +     * notification condition.
> +     */
> +    PCI_EXP_HP_EV_ABP           = PCI_EXP_SLTCTL_ABPE,
> +                                        /* attention button pressed */
> +    PCI_EXP_HP_EV_PDC           = PCI_EXP_SLTCTL_PDCE,
> +                                        /* presence detect changed */
> +    PCI_EXP_HP_EV_CCI           = PCI_EXP_SLTCTL_CCIE,
> +                                        /* command completed */
> +
> +    PCI_EXP_HP_EV_SUPPORTED     = PCI_EXP_HP_EV_ABP |
> +                                  PCI_EXP_HP_EV_PDC |
> +                                  PCI_EXP_HP_EV_CCI,
> +                                                /* supported event mask  */
> +
> +    /* events not listed aren't supported */
> +} PCIExpressHotPlugEvent;
> +
> +typedef void (*pcie_flr_fn)(PCIDevice *dev);
> +
> +struct PCIExpressDevice {
> +    /* Offset of express capability in config space */
> +    uint8_t exp_cap;
> +
> +    /* FLR */
> +    pcie_flr_fn flr;
> +};

From what I saw, sticky registers are mostly standard so
even when implemented they will be common, not per device.
As suggested previously, let's just invoke reset callback,
devices can test FLR bit if they like to do things
differently.

But it's hard to say, as currently this is just dead code
all commneted out.
Let's not have any callback for now, and add it later when
infrastructure is in place and we see there is in fact
a lot of device specific code.

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 5f5a4c5..eeb5134 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -186,6 +186,7 @@  hw-obj-$(CONFIG_PIIX4) += piix4.o
 # PCI watchdog devices
 hw-obj-y += wdt_i6300esb.o
 
+hw-obj-y += pcie.o
 hw-obj-y += msix.o msi.o
 
 # PCI network cards
diff --git a/hw/pci.h b/hw/pci.h
index 3dc2099..3c9c228 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -9,6 +9,8 @@ 
 /* PCI includes legacy ISA access.  */
 #include "isa.h"
 
+#include "pcie.h"
+
 /* PCI bus */
 
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
@@ -178,6 +180,9 @@  struct PCIDevice {
     /* Offset of MSI capability in config space */
     uint8_t msi_cap;
 
+    /* PCI Express */
+    PCIExpressDevice exp;
+
     /* Location of option rom */
     char *romfile;
     ram_addr_t rom_offset;
diff --git a/hw/pcie.c b/hw/pcie.c
new file mode 100644
index 0000000..0f2bf50
--- /dev/null
+++ b/hw/pcie.c
@@ -0,0 +1,529 @@ 
+/*
+ * pcie.c
+ *
+ * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "sysemu.h"
+#include "pci_bridge.h"
+#include "pcie.h"
+#include "msix.h"
+#include "msi.h"
+#include "pci_internals.h"
+#include "pcie_regs.h"
+
+//#define DEBUG_PCIE
+#ifdef DEBUG_PCIE
+# define PCIE_DPRINTF(fmt, ...)                                         \
+    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
+#else
+# define PCIE_DPRINTF(fmt, ...) do {} while (0)
+#endif
+#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
+    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
+
+
+/***************************************************************************
+ * pci express capability helper functions
+ */
+int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port)
+{
+    int pos;
+    uint8_t *exp_cap;
+
+    assert(pci_is_express(dev));
+
+    pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset,
+                                 PCI_EXP_VER2_SIZEOF);
+    if (pos < 0) {
+        return pos;
+    }
+    dev->exp.exp_cap = pos;
+    exp_cap = dev->config + pos;
+
+    /* capability register
+       interrupt message number defaults to 0 */
+    pci_set_word(exp_cap + PCI_EXP_FLAGS,
+                 ((type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE) |
+                 PCI_EXP_FLAGS_VER2);
+
+    /* device capability register
+     * table 7-12:
+     * roll based error reporting bit must be set by all
+     * Functions conforming to the ECN, PCI Express Base
+     * Specification, Revision 1.1., or subsequent PCI Express Base
+     * Specification revisions.
+     */
+    pci_set_long(exp_cap + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER);
+
+    pci_set_long(exp_cap + PCI_EXP_LNKCAP,
+                 (port << PCI_EXP_LNKCAP_PN_SHIFT) |
+                 PCI_EXP_LNKCAP_ASPMS_0S |
+                 PCI_EXP_LNK_MLW_1 |
+                 PCI_EXP_LNK_LS_25);
+
+    pci_set_word(exp_cap + PCI_EXP_LNKSTA,
+                 PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25);
+
+    pci_set_long(exp_cap + PCI_EXP_DEVCAP2,
+                 PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);
+
+    pci_set_word(dev->wmask + pos, PCI_EXP_DEVCTL2_EETLPPB);
+    return pos;
+}
+
+void pcie_cap_exit(PCIDevice *dev)
+{
+    pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER2_SIZEOF);
+}
+
+uint8_t pcie_cap_get_type(const PCIDevice *dev)
+{
+    uint32_t pos = dev->exp.exp_cap;
+    assert(pos > 0);
+    return (pci_get_word(dev->config + pos + PCI_EXP_FLAGS) &
+            PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT;
+}
+
+/* MSI/MSI-X */
+/* pci express interrupt message number */
+/* 7.8.2 PCI Express Capabilities Register: Interrupt Message Number */
+void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector)
+{
+    assert(vector < 32);
+    pci_clear_set_bit_word(dev->config + dev->exp.exp_cap + PCI_EXP_FLAGS,
+                           PCI_EXP_FLAGS_IRQ,
+                           vector << PCI_EXP_FLAGS_IRQ_SHIFT);
+}
+
+uint8_t pcie_cap_flags_get_vector(PCIDevice *dev)
+{
+    return (pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_FLAGS) &
+            PCI_EXP_FLAGS_IRQ) >> PCI_EXP_FLAGS_IRQ_SHIFT;
+}
+
+void pcie_cap_deverr_init(PCIDevice *dev)
+{
+    uint32_t pos = dev->exp.exp_cap;
+    pci_set_bit_long(dev->config + pos + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER);
+    pci_set_bit_long(dev->wmask + pos + PCI_EXP_DEVCTL,
+                     PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
+                     PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
+    pci_set_bit_long(dev->w1cmask + pos + PCI_EXP_DEVSTA,
+                     PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED |
+                     PCI_EXP_DEVSTA_URD | PCI_EXP_DEVSTA_URD);
+}
+
+void pcie_cap_deverr_reset(PCIDevice *dev)
+{
+    pci_clear_bit_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL,
+                       PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
+                       PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
+}
+
+static void pcie_cap_slot_notify(PCIDevice *dev,
+                                 bool msi_trigger, int int_level)
+{
+    unsigned int msi_vector = pcie_cap_flags_get_vector(dev);
+    unsigned int int_irq = 0; /* INTA# is chosen for slot event notification */
+
+    if (msix_enabled(dev)) {
+        if (msi_trigger) {
+            msix_notify(dev, msi_vector);
+        }
+    } else if (msi_enabled(dev)) {
+        if (msi_trigger) {
+            msi_notify(dev, msi_vector);
+        }
+    } else {
+        qemu_set_irq(dev->irq[int_irq], int_level);
+    }
+}
+
+/*
+ * A PCI Express Hot-Plug Event has occured, so update slot status register
+ * and notify OS of the event if necessary.
+ *
+ * 6.7.3 PCI Express Hot-Plug Events
+ * 6.7.3.4 Software Notification of Hot-Plug Events
+ */
+static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event)
+{
+    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+    uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
+    uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
+    bool msi_trigger;
+    int int_level;
+
+    PCIE_DEV_PRINTF(dev,
+                    "sltctl: 0x%02"PRIx16" sltsta: 0x%02"PRIx16" event: %x\n",
+                    sltctl, sltsta, event);
+
+    msi_trigger = (sltctl & PCI_EXP_SLTCTL_HPIE) && (sltctl & event) &&
+        ((sltsta ^ event) & event); /* 0 -> 1 */
+
+    sltsta |= event;
+    pci_set_word(exp_cap + PCI_EXP_SLTSTA, sltsta);
+    PCIE_DEV_PRINTF(dev, "sltsta -> %02"PRIx16"\n", sltsta);
+
+    int_level = ((sltctl & PCI_EXP_SLTCTL_HPIE) &&
+                 (sltsta & PCI_EXP_HP_EV_SUPPORTED));
+
+    pcie_cap_slot_notify(dev, msi_trigger, int_level);
+}
+
+static int pcie_cap_slot_hotplug(DeviceState *qdev,
+                                 PCIDevice *pci_dev, int state)
+{
+    PCIDevice *d = DO_UPCAST(PCIDevice, qdev, qdev);
+    uint8_t *exp_cap = d->config + d->exp.exp_cap;
+    uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
+
+    if (!pci_dev->qdev.hotplugged) {
+        assert(state); /* this case only happens at machine creation. */
+        pci_set_bit_word(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
+        return 0;
+    }
+
+    PCIE_DEV_PRINTF(pci_dev, "hotplug state: %d\n", state);
+    if (sltsta & PCI_EXP_SLTSTA_EIS) {
+        /* the slot is electromechanically locked.
+         * This error is propagated up to qdev and then to HMP/QMP.
+         */
+        return -EBUSY;
+    }
+
+    /* TODO: multifunction hot-plug.
+     * Right now, only a device of function = 0 is allowed to be
+     * hot plugged/unplugged.
+     */
+    assert(PCI_FUNC(pci_dev->devfn) == 0);
+
+    if (state) {
+        pci_set_bit_word(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
+        pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC);
+    } else {
+        qdev_free(&pci_dev->qdev);
+        pci_clear_bit_word(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
+        pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC);
+    }
+    return 0;
+}
+
+/* pci express slot for pci express root/downstream port
+   PCI express capability slot registers */
+void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot)
+{
+    uint32_t pos = dev->exp.exp_cap;
+
+    pci_set_bit_word(dev->config + pos + PCI_EXP_FLAGS, PCI_EXP_FLAGS_SLOT);
+
+    pci_clear_set_bit_long(dev->config + pos + PCI_EXP_SLTCAP,
+                           ~PCI_EXP_SLTCAP_PSN,
+                           (slot << PCI_EXP_SLTCAP_PSN_SHIFT) |
+                           PCI_EXP_SLTCAP_EIP |
+                           PCI_EXP_SLTCAP_HPS |
+                           PCI_EXP_SLTCAP_HPC |
+                           PCI_EXP_SLTCAP_PIP |
+                           PCI_EXP_SLTCAP_AIP |
+                           PCI_EXP_SLTCAP_ABP);
+
+    pci_clear_set_bit_word(dev->config + pos + PCI_EXP_SLTCTL,
+                           PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC,
+                           PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF);
+    pci_set_bit_word(dev->wmask + pos + PCI_EXP_SLTCTL,
+                     PCI_EXP_SLTCTL_PIC |
+                     PCI_EXP_SLTCTL_AIC |
+                     PCI_EXP_SLTCTL_HPIE |
+                     PCI_EXP_SLTCTL_CCIE |
+                     PCI_EXP_SLTCTL_PDCE |
+                     PCI_EXP_SLTCTL_ABPE);
+
+    pci_set_bit_word(dev->w1cmask + pos + PCI_EXP_SLTSTA,
+                     PCI_EXP_HP_EV_SUPPORTED);
+
+    pci_bus_hotplug(pci_bridge_get_sec_bus(DO_UPCAST(PCIBridge, dev, dev)),
+                    pcie_cap_slot_hotplug, &dev->qdev);
+}
+
+void pcie_cap_slot_reset(PCIDevice *dev)
+{
+    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+
+    PCIE_DEV_PRINTF(dev, "reset\n");
+
+    pci_clear_set_bit_word(exp_cap + PCI_EXP_SLTCTL,
+                           PCI_EXP_SLTCTL_EIC |
+                           PCI_EXP_SLTCTL_PIC |
+                           PCI_EXP_SLTCTL_AIC |
+                           PCI_EXP_SLTCTL_HPIE |
+                           PCI_EXP_SLTCTL_CCIE |
+                           PCI_EXP_SLTCTL_PDCE |
+                           PCI_EXP_SLTCTL_ABPE,
+                           PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF);
+
+    pci_clear_bit_word(exp_cap + PCI_EXP_SLTSTA,
+                       PCI_EXP_SLTSTA_EIS |/* by reset, the lock is released */
+                       PCI_EXP_SLTSTA_CC |
+                       PCI_EXP_SLTSTA_PDC |
+                       PCI_EXP_SLTSTA_ABP);
+}
+
+void pcie_cap_slot_write_config(PCIDevice *dev,
+                                uint32_t addr, uint32_t val, int len,
+                                uint16_t sltctl_prev)
+{
+    uint32_t pos = dev->exp.exp_cap;
+    uint8_t *exp_cap = dev->config + pos;
+    uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
+    uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
+
+    PCIE_DEV_PRINTF(dev,
+                    "addr: 0x%"PRIx32" val: 0x%"PRIx32" len: %d\n"
+                    "\tsltctl_prev: 0x%02"PRIx16" sltctl: 0x%02"PRIx16
+                    " sltsta: 0x%02"PRIx16"\n",
+                    addr, val, len, sltctl_prev, sltctl, sltsta);
+    /* SLTSTA: process SLTSTA before SLTCTL to avoid spurious interrupt */
+    if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
+        sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
+
+        /* write to stlsta results in clearing bits,
+           so new interrupts won't be generated. */
+        PCIE_DEV_PRINTF(dev, "sltsta -> 0x%02"PRIx16"\n", sltsta);
+    }
+
+    /* SLTCTL */
+    if (ranges_overlap(addr, len, pos + PCI_EXP_SLTCTL, 2)) {
+        PCIE_DEV_PRINTF(dev, "sltctl: 0x%02"PRIx16" -> 0x%02"PRIx16"\n",
+                        sltctl_prev, sltctl);
+        if (pci_get_word(dev->written + pos + PCI_EXP_SLTCTL) &
+            PCI_EXP_SLTCTL_EIC) {
+            sltsta ^= PCI_EXP_SLTSTA_EIS; /* toggle PCI_EXP_SLTSTA_EIS bit */
+            pci_set_word(exp_cap + PCI_EXP_SLTSTA, sltsta);
+            PCIE_DEV_PRINTF(dev, "PCI_EXP_SLTCTL_EIC: "
+                            "sltsta -> 0x%02"PRIx16"\n",
+                            sltsta);
+        }
+
+        /*
+         * The events might be enabled or disabled,
+         * Check if the software notificastion condition is satisfied
+         * or disatisfied.
+         *
+         * 6.7.3.4 Software Notification of Hot-plug events
+         */
+        if (sltctl & PCI_EXP_SLTCTL_HPIE) {
+            bool msi_trigger =
+                (sltctl_prev ^ sltctl) & sltctl & /* 0 -> 1 */
+                sltsta & PCI_EXP_HP_EV_SUPPORTED;
+            int int_level = sltctl & sltsta & PCI_EXP_HP_EV_SUPPORTED;
+
+            pcie_cap_slot_notify(dev, msi_trigger, int_level);
+        }
+
+        if (!((sltctl_prev ^ sltctl) & PCI_EXP_SLTCTL_SUPPORTED)) {
+            PCIE_DEV_PRINTF(dev,
+                            "sprious command completion slctl "
+                            "0x%"PRIx16" -> 0x%"PRIx16"\n",
+                            sltctl_prev, sltctl);
+        }
+
+        /* command completion.
+         * Real hardware might take a while to complete
+         * requested command because physical movement would be involved
+         * like locking the electromechanical lock.
+         * However in our case, command is completed instantaneously above,
+         * so send a command completion event right now.
+         *
+         * 6.7.3.2 Command Completed Events
+         */
+        /* set command completed bit */
+        pcie_cap_slot_event(dev, PCI_EXP_HP_EV_CCI);
+    }
+}
+
+void pcie_cap_slot_push_attention_button(PCIDevice *dev)
+{
+    pcie_cap_slot_event(dev, PCI_EXP_HP_EV_ABP);
+}
+
+/* root control/capabilities/status. PME isn't emulated for now */
+void pcie_cap_root_init(PCIDevice *dev)
+{
+    pci_set_word(dev->wmask + dev->exp.exp_cap + PCI_EXP_RTCTL,
+                 PCI_EXP_RTCTL_SECEE | PCI_EXP_RTCTL_SENFEE |
+                 PCI_EXP_RTCTL_SEFEE);
+}
+
+void pcie_cap_root_reset(PCIDevice *dev)
+{
+    pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_RTCTL, 0);
+}
+
+/* function level reset(FLR) */
+void pcie_cap_flr_init(PCIDevice *dev, pcie_flr_fn flr)
+{
+    pci_set_bit_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP,
+                     PCI_EXP_DEVCAP_FLR);
+    dev->exp.flr = flr;
+}
+
+void pcie_cap_flr_write_config(PCIDevice *dev,
+                               uint32_t addr, uint32_t val, int len)
+{
+    /* if ranges_overlap(addr, len, pos + PCI_EXP_DEVCTL, 2) is false,
+       written == 0 */
+    uint16_t written = pci_get_word(dev->written +
+                                    dev->exp.exp_cap + PCI_EXP_DEVCTL);
+    if ((written & PCI_EXP_DEVCTL_BCR_FLR) && dev->exp.flr) {
+        dev->exp.flr(dev);
+    }
+}
+
+/* Alternative Routing-ID Interpretation (ARI) */
+/* ari forwarding support for down stream port */
+void pcie_cap_ari_init(PCIDevice *dev)
+{
+    uint32_t pos = dev->exp.exp_cap;
+    pci_set_bit_long(dev->config + pos + PCI_EXP_DEVCAP2, PCI_EXP_DEVCAP2_ARI);
+    pci_set_bit_long(dev->wmask + pos + PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_ARI);
+}
+
+void pcie_cap_ari_reset(PCIDevice *dev)
+{
+    pci_clear_bit_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2,
+                       PCI_EXP_DEVCTL2_ARI);
+}
+
+bool pcie_cap_is_ari_enabled(const PCIDevice *dev)
+{
+    if (!pci_is_express(dev)) {
+        return false;
+    }
+    if (!dev->exp.exp_cap) {
+        return false;
+    }
+
+    return pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL2) &
+        PCI_EXP_DEVCTL2_ARI;
+}
+
+/**************************************************************************
+ * pci express extended capability allocation functions
+ * uint16_t ext_cap_id (16 bit)
+ * uint8_t cap_ver (4 bit)
+ * uint16_t cap_offset (12 bit)
+ * uint16_t ext_cap_size
+ */
+
+static uint16_t pcie_find_capability_list(PCIDevice *dev, uint16_t cap_id,
+                                          uint16_t *prev_p)
+{
+    uint16_t prev = 0;
+    uint16_t next;
+    uint32_t header = pci_get_long(dev->config + PCI_CONFIG_SPACE_SIZE);
+
+    if (!header) {
+        /* no extended capability */
+        next = 0;
+        goto out;
+    }
+    for (next = PCI_CONFIG_SPACE_SIZE; next;
+         prev = next, next = PCI_EXT_CAP_NEXT(header)) {
+
+        assert(next >= PCI_CONFIG_SPACE_SIZE);
+        assert(next <= PCIE_CONFIG_SPACE_SIZE - 8);
+
+        header = pci_get_long(dev->config + next);
+        if (PCI_EXT_CAP_ID(header) == cap_id) {
+            break;
+        }
+    }
+
+out:
+    if (prev_p) {
+        *prev_p = prev;
+    }
+    return next;
+}
+
+uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id)
+{
+    return pcie_find_capability_list(dev, cap_id, NULL);
+}
+
+static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t next)
+{
+    uint16_t header = pci_get_long(dev->config + pos);
+    assert(!(next & (PCI_EXT_CAP_ALIGN - 1)));
+    header = (header & ~PCI_EXT_CAP_NEXT_MASK) |
+        ((next << PCI_EXT_CAP_NEXT_SHIFT) & PCI_EXT_CAP_NEXT_MASK);
+    pci_set_long(dev->config + pos, header);
+}
+
+/*
+ * caller must supply valid (offset, size) * such that the range shouldn't
+ * overlap with other capability or other registers.
+ * This function doesn't check it.
+ */
+void pcie_add_capability(PCIDevice *dev,
+                         uint16_t cap_id, uint8_t cap_ver,
+                         uint16_t offset, uint16_t size)
+{
+    uint32_t header;
+    uint16_t next;
+
+    assert(offset >= PCI_CONFIG_SPACE_SIZE);
+    assert(offset < offset + size);
+    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
+    assert(size >= 8);
+    assert(pci_is_express(dev));
+
+    if (offset == PCI_CONFIG_SPACE_SIZE) {
+        header = pci_get_long(dev->config + offset);
+        next = PCI_EXT_CAP_NEXT(header);
+    } else {
+        uint16_t prev;
+
+        /* 0 is reserved cap id. use internally to find the last capability
+           in the linked list */
+        next = pcie_find_capability_list(dev, 0, &prev);
+
+        assert(prev >= PCI_CONFIG_SPACE_SIZE);
+        assert(next == 0);
+        pcie_ext_cap_set_next(dev, prev, offset);
+    }
+    pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next));
+
+    /* Make capability read-only by default */
+    memset(dev->wmask + offset, 0, size);
+    memset(dev->w1cmask + offset, 0, size);
+    /* Check capability by default */
+    memset(dev->cmask + offset, 0xFF, size);
+}
+
+/**************************************************************************
+ * pci express extended capability helper functions
+ */
+
+/* ARI */
+void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn)
+{
+    pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
+                        offset, PCI_ARI_SIZEOF);
+    pci_set_long(dev->config + offset + PCI_ARI_CAP, PCI_ARI_CAP_NFN(nextfn));
+}
diff --git a/hw/pcie.h b/hw/pcie.h
new file mode 100644
index 0000000..603bb8b
--- /dev/null
+++ b/hw/pcie.h
@@ -0,0 +1,107 @@ 
+/*
+ * pcie.h
+ *
+ * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_PCIE_H
+#define QEMU_PCIE_H
+
+#include "hw.h"
+#include "pci_regs.h"
+#include "pcie_regs.h"
+
+typedef enum {
+    /* for attention and power indicator */
+    PCI_EXP_HP_IND_RESERVED     = PCI_EXP_SLTCTL_IND_RESERVED,
+    PCI_EXP_HP_IND_ON           = PCI_EXP_SLTCTL_IND_ON,
+    PCI_EXP_HP_IND_BLINK        = PCI_EXP_SLTCTL_IND_BLINK,
+    PCI_EXP_HP_IND_OFF          = PCI_EXP_SLTCTL_IND_OFF,
+} PCIExpressIndicator;
+
+typedef enum {
+    /* these bits must match the bits in Slot Control/Status registers.
+     * PCI_EXP_HP_EV_xxx = PCI_EXP_SLTCTL_xxxE = PCI_EXP_SLTSTA_xxx
+     *
+     * Not all the bits of slot control register match with the ones of
+     * slot status. Not some bits of slot status register is used to
+     * show status, not to report event occurence.
+     * So such bits must be masked out when checking the software
+     * notification condition.
+     */
+    PCI_EXP_HP_EV_ABP           = PCI_EXP_SLTCTL_ABPE,
+                                        /* attention button pressed */
+    PCI_EXP_HP_EV_PDC           = PCI_EXP_SLTCTL_PDCE,
+                                        /* presence detect changed */
+    PCI_EXP_HP_EV_CCI           = PCI_EXP_SLTCTL_CCIE,
+                                        /* command completed */
+
+    PCI_EXP_HP_EV_SUPPORTED     = PCI_EXP_HP_EV_ABP |
+                                  PCI_EXP_HP_EV_PDC |
+                                  PCI_EXP_HP_EV_CCI,
+                                                /* supported event mask  */
+
+    /* events not listed aren't supported */
+} PCIExpressHotPlugEvent;
+
+typedef void (*pcie_flr_fn)(PCIDevice *dev);
+
+struct PCIExpressDevice {
+    /* Offset of express capability in config space */
+    uint8_t exp_cap;
+
+    /* FLR */
+    pcie_flr_fn flr;
+};
+
+/* PCI express capability helper functions */
+int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port);
+void pcie_cap_exit(PCIDevice *dev);
+uint8_t pcie_cap_get_type(const PCIDevice *dev);
+void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector);
+uint8_t pcie_cap_flags_get_vector(PCIDevice *dev);
+
+void pcie_cap_deverr_init(PCIDevice *dev);
+void pcie_cap_deverr_reset(PCIDevice *dev);
+
+void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot);
+void pcie_cap_slot_reset(PCIDevice *dev);
+void pcie_cap_slot_write_config(PCIDevice *dev,
+                                uint32_t addr, uint32_t val, int len,
+                                uint16_t sltctl_prev);
+void pcie_cap_slot_push_attention_button(PCIDevice *dev);
+
+void pcie_cap_root_init(PCIDevice *dev);
+void pcie_cap_root_reset(PCIDevice *dev);
+
+void pcie_cap_flr_init(PCIDevice *dev, pcie_flr_fn flr);
+void pcie_cap_flr_write_config(PCIDevice *dev,
+                           uint32_t addr, uint32_t val, int len);
+
+void pcie_cap_ari_init(PCIDevice *dev);
+void pcie_cap_ari_reset(PCIDevice *dev);
+bool pcie_cap_is_ari_enabled(const PCIDevice *dev);
+
+/* PCI express extended capability helper functions */
+uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id);
+void pcie_add_capability(PCIDevice *dev,
+                         uint16_t cap_id, uint8_t cap_ver,
+                         uint16_t offset, uint16_t size);
+
+void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
+
+#endif /* QEMU_PCIE_H */
diff --git a/qemu-common.h b/qemu-common.h
index d735235..6d9ee26 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -219,6 +219,7 @@  typedef struct PCIHostState PCIHostState;
 typedef struct PCIExpressHost PCIExpressHost;
 typedef struct PCIBus PCIBus;
 typedef struct PCIDevice PCIDevice;
+typedef struct PCIExpressDevice PCIExpressDevice;
 typedef struct PCIBridge PCIBridge;
 typedef struct SerialState SerialState;
 typedef struct IRQState *qemu_irq;