diff mbox series

[v3,01/35] ppc/xive: introduce a XIVE interrupt source model

Message ID 20180419124331.3915-2-clg@kaod.org
State New
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater April 19, 2018, 12:42 p.m. UTC
Each XIVE interrupt source is associated with a two bit state machine
called an Event State Buffer (ESB) : the first bit "P" means that an
interrupt is "pending" and waiting for an EOI and the bit "Q" (queued)
means a new interrupt was triggered while another was still pending.

When an event is triggered, the associated interrupt state bits are
fetched and modified and forwarded to the virtualization engine of the
controller doing the routing. These can also be controlled by MMIO, to
trigger events or turn off the sources for instance. See code for more
details on the states and transitions.

On a sPAPR machine, the OS will obtain the address of the MMIO page of
the ESB entry associated with a source and its characteristic using
the H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is
used.

The xive_source_notify() routine is in charge forwarding the source
event notification to the routing engine. It will be filled later on.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 Changes since v2:

 - added support for Store EOI
 - added support for two page MMIO setting like on KVM

 default-configs/ppc64-softmmu.mak |   1 +
 hw/intc/Makefile.objs             |   1 +
 hw/intc/xive.c                    | 335 ++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/xive.h             | 130 +++++++++++++++
 4 files changed, 467 insertions(+)
 create mode 100644 hw/intc/xive.c
 create mode 100644 include/hw/ppc/xive.h

Comments

David Gibson April 20, 2018, 7:10 a.m. UTC | #1
On Thu, Apr 19, 2018 at 02:42:57PM +0200, Cédric Le Goater wrote:
> Each XIVE interrupt source is associated with a two bit state machine
> called an Event State Buffer (ESB) : the first bit "P" means that an
> interrupt is "pending" and waiting for an EOI and the bit "Q" (queued)
> means a new interrupt was triggered while another was still pending.
> 
> When an event is triggered, the associated interrupt state bits are
> fetched and modified and forwarded to the virtualization engine of the
> controller doing the routing. These can also be controlled by MMIO, to
> trigger events or turn off the sources for instance. See code for more
> details on the states and transitions.
> 
> On a sPAPR machine, the OS will obtain the address of the MMIO page of
> the ESB entry associated with a source and its characteristic using
> the H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is
> used.
> 
> The xive_source_notify() routine is in charge forwarding the source
> event notification to the routing engine. It will be filled later on.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  Changes since v2:
> 
>  - added support for Store EOI
>  - added support for two page MMIO setting like on KVM

Looks generally sane to me, though I have a few queries.

> 
>  default-configs/ppc64-softmmu.mak |   1 +
>  hw/intc/Makefile.objs             |   1 +
>  hw/intc/xive.c                    | 335 ++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/xive.h             | 130 +++++++++++++++
>  4 files changed, 467 insertions(+)
>  create mode 100644 hw/intc/xive.c
>  create mode 100644 include/hw/ppc/xive.h
> 
> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> index b94af6c7c62a..c6d13e757977 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -16,4 +16,5 @@ CONFIG_VIRTIO_VGA=y
>  CONFIG_XICS=$(CONFIG_PSERIES)
>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
> +CONFIG_XIVE=$(CONFIG_PSERIES)
>  CONFIG_MEM_HOTPLUG=y
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index 0e9963f5eecc..72a46ed91c31 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o
>  obj-$(CONFIG_XICS) += xics.o
>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
> +obj-$(CONFIG_XIVE) += xive.o
>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> new file mode 100644
> index 000000000000..c70578759d02
> --- /dev/null
> +++ b/hw/intc/xive.c
> @@ -0,0 +1,335 @@
> +/*
> + * QEMU PowerPC XIVE interrupt controller model
> + *
> + * Copyright (c) 2017-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "qapi/error.h"
> +#include "target/ppc/cpu.h"
> +#include "sysemu/cpus.h"
> +#include "sysemu/dma.h"
> +#include "monitor/monitor.h"
> +#include "hw/ppc/xive.h"
> +
> +/*
> + * XIVE Interrupt Source
> + */
> +
> +uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno)
> +{
> +    uint32_t byte = srcno / 4;
> +    uint32_t bit  = (srcno % 4) * 2;
> +
> +    assert(byte < xsrc->sbe_size);
> +
> +    return (xsrc->sbe[byte] >> bit) & 0x3;
> +}
> +
> +uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
> +{
> +    uint32_t byte = srcno / 4;
> +    uint32_t bit  = (srcno % 4) * 2;
> +    uint8_t old, new;
> +
> +    assert(byte < xsrc->sbe_size);
> +
> +    old = xsrc->sbe[byte];
> +
> +    new = xsrc->sbe[byte] & ~(0x3 << bit);
> +    new |= (pq & 0x3) << bit;
> +
> +    xsrc->sbe[byte] = new;
> +
> +    return (old >> bit) & 0x3;
> +}
> +
> +static bool xive_source_pq_eoi(XiveSource *xsrc, uint32_t srcno)
> +{
> +    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
> +
> +    switch (old_pq) {
> +    case XIVE_ESB_RESET:
> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
> +        return false;
> +    case XIVE_ESB_PENDING:
> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
> +        return false;
> +    case XIVE_ESB_QUEUED:
> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
> +        return true;
> +    case XIVE_ESB_OFF:
> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
> +        return false;
> +    default:
> +         g_assert_not_reached();
> +    }
> +}
> +
> +/*
> + * Returns whether the event notification should be forwarded.
> + */
> +static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
> +{
> +    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
> +
> +    switch (old_pq) {
> +    case XIVE_ESB_RESET:
> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
> +        return true;
> +    case XIVE_ESB_PENDING:
> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
> +        return false;
> +    case XIVE_ESB_QUEUED:
> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
> +        return false;
> +    case XIVE_ESB_OFF:
> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
> +        return false;
> +    default:
> +         g_assert_not_reached();
> +    }
> +}
> +
> +/*
> + * Forward the source event notification to the associated XiveFabric,
> + * the device owning the sources.
> + */
> +static void xive_source_notify(XiveSource *xsrc, int srcno)
> +{
> +
> +}
> +
> +/* In a two pages ESB MMIO setting, even page is the trigger page, odd
> + * page is for management */

Can I understand from this that the result from this function is only
meaningful in 2-pages mode?

> +static inline bool xive_source_is_trigger_page(hwaddr addr)
> +{
> +    return !((addr >> 16) & 1);

Later on you seem to have both 4k and 64k variants list, but here you
hardcode 64k.  Is that a problem?

> +}
> +
> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> +    uint32_t offset = addr & 0xF00;

You ignore the low bits of the address entirely, so effective you have
a 256 byte range that's all aliases of the same register.  Is that
intentional?

> +    uint32_t srcno = addr >> xsrc->esb_shift;
> +    uint64_t ret = -1;
> +
> +    if (xive_source_esb_2page(xsrc) && xive_source_is_trigger_page(addr)) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "XIVE: invalid load on IRQ %d trigger page at "
> +                      "0x%"HWADDR_PRIx"\n", srcno, addr);
> +        return -1;
> +    }
> +
> +    switch (offset) {
> +    case XIVE_ESB_LOAD_EOI:
> +        /*
> +         * Load EOI is not the default source setting under QEMU, but
> +         * this is what HW uses currently.
> +         */
> +        ret = xive_source_pq_eoi(xsrc, srcno);

You're implicitly casting a bool return value into a u64 here, is that
intentional?

> +
> +        break;
> +
> +    case XIVE_ESB_GET:
> +        ret = xive_source_pq_get(xsrc, srcno);
> +        break;
> +
> +    case XIVE_ESB_SET_PQ_00:
> +    case XIVE_ESB_SET_PQ_01:
> +    case XIVE_ESB_SET_PQ_10:
> +    case XIVE_ESB_SET_PQ_11:
> +        ret = xive_source_pq_set(xsrc, srcno, (offset >> 8) & 0x3);
> +        break;
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB addr %d\n", offset);
> +    }
> +
> +    return ret;
> +}
> +
> +static void xive_source_esb_write(void *opaque, hwaddr addr,
> +                                 uint64_t value, unsigned size)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> +    uint32_t offset = addr & 0xF00;
> +    uint32_t srcno = addr >> xsrc->esb_shift;
> +    bool notify = false;
> +
> +    switch (offset) {
> +    case 0:
> +        notify = xive_source_pq_trigger(xsrc, srcno);
> +        break;
> +
> +    case XIVE_ESB_STORE_EOI:
> +        if (xive_source_is_trigger_page(addr)) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "XIVE: invalid store on IRQ %d trigger page at "
> +                          "0x%"HWADDR_PRIx"\n", srcno, addr);
> +            return;
> +        }
> +
> +        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
> +            return;
> +        }
> +
> +        /* If the Q bit is set, we should forward a new source event
> +         * notification
> +         */
> +        notify = xive_source_pq_eoi(xsrc, srcno);
> +        break;
> +
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %d\n",
> +                      offset);
> +        return;
> +    }
> +
> +    /* Forward the source event notification for routing */
> +    if (notify) {
> +        xive_source_notify(xsrc, srcno);
> +    }

EOI via this path calls notify, but the one via the read path
doesn't.  Is that correct?

> +}
> +
> +static const MemoryRegionOps xive_source_esb_ops = {
> +    .read = xive_source_esb_read,
> +    .write = xive_source_esb_write,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +    .valid = {
> +        .min_access_size = 8,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .min_access_size = 8,
> +        .max_access_size = 8,
> +    },
> +};
> +
> +static void xive_source_set_irq(void *opaque, int srcno, int val)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> +    bool notify = false;
> +
> +    if (val) {
> +        notify = xive_source_pq_trigger(xsrc, srcno);
> +    }
> +
> +    /* Forward the source event notification for routing */
> +    if (notify) {
> +        xive_source_notify(xsrc, srcno);
> +    }
> +}
> +
> +void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon)
> +{
> +    int i;
> +
> +    monitor_printf(mon, "XIVE Source %6x ..%6x\n",
> +                   xsrc->offset, xsrc->offset + xsrc->nr_irqs - 1);
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        uint8_t pq = xive_source_pq_get(xsrc, i);
> +        uint32_t lisn = i  + xsrc->offset;
> +
> +        if (pq == XIVE_ESB_OFF) {
> +            continue;
> +        }
> +
> +        monitor_printf(mon, "  %4x %c%c\n", lisn,
> +                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
> +                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
> +    }
> +}
> +
> +static void xive_source_reset(DeviceState *dev)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(dev);
> +
> +    /* SBEs are initialized to 0b01 which corresponds to "ints off" */
> +    memset(xsrc->sbe, 0x55, xsrc->sbe_size);
> +}
> +
> +static void xive_source_realize(DeviceState *dev, Error **errp)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(dev);
> +
> +    if (!xsrc->nr_irqs) {
> +        error_setg(errp, "Number of interrupt needs to be greater than 0");
> +        return;
> +    }
> +
> +    if (xsrc->esb_shift != XIVE_ESB_4K &&
> +        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
> +        xsrc->esb_shift != XIVE_ESB_64K &&
> +        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
> +        error_setg(errp, "Invalid ESB shift setting");
> +        return;
> +    }
> +
> +    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
> +                                     xsrc->nr_irqs);
> +
> +    /* Allocate the SBEs (State Bit Entry). 2 bits, so 4 entries per byte */
> +    xsrc->sbe_size = DIV_ROUND_UP(xsrc->nr_irqs, 4);
> +    xsrc->sbe = g_malloc0(xsrc->sbe_size);
> +
> +    /* TODO: H_INT_ESB support, which removing the ESB MMIOs */
> +
> +    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
> +                          &xive_source_esb_ops, xsrc, "xive.esb",
> +                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
> +}
> +
> +static const VMStateDescription vmstate_xive_source = {
> +    .name = TYPE_XIVE_SOURCE,
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
> +        VMSTATE_VBUFFER_UINT32(sbe, XiveSource, 1, NULL, sbe_size),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
> +/*
> + * The default XIVE interrupt source setting for ESB MMIO is two 64k
> + * pages without Store EOI. This is in sync with KVM.
> + */
> +static Property xive_source_properties[] = {
> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
> +    DEFINE_PROP_UINT64("bar", XiveSource, esb_base, 0),

Isn't this redundant with however the base address is handled through
the SysBusDevice stuff (I forget the details)?

> +    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void xive_source_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    dc->realize = xive_source_realize;
> +    dc->reset = xive_source_reset;
> +    dc->props = xive_source_properties;
> +    dc->desc = "XIVE interrupt source";
> +    dc->vmsd = &vmstate_xive_source;
> +}
> +
> +static const TypeInfo xive_source_info = {
> +    .name          = TYPE_XIVE_SOURCE,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(XiveSource),
> +    .class_init    = xive_source_class_init,
> +};
> +
> +static void xive_register_types(void)
> +{
> +    type_register_static(&xive_source_info);
> +}
> +
> +type_init(xive_register_types)
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> new file mode 100644
> index 000000000000..d92a50519edf
> --- /dev/null
> +++ b/include/hw/ppc/xive.h
> @@ -0,0 +1,130 @@
> +/*
> + * QEMU PowerPC XIVE interrupt controller model
> + *
> + * Copyright (c) 2017-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#ifndef PPC_XIVE_H
> +#define PPC_XIVE_H
> +
> +#include "hw/sysbus.h"
> +
> +/*
> + * XIVE Interrupt Source
> + */
> +
> +#define TYPE_XIVE_SOURCE "xive-source"
> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
> +
> +/*
> + * XIVE Source Interrupt source characteristics, which define how the
> + * ESB are controlled.
> + */
> +#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
> +#define XIVE_SRC_STORE_EOI     0x4 /* Store EOI supported */
> +
> +typedef struct XiveSource {
> +    SysBusDevice parent;
> +
> +    /* IRQs */
> +    uint32_t     nr_irqs;
> +    uint32_t     offset;
> +    qemu_irq     *qirqs;
> +
> +    /* PQ bits */
> +    uint8_t      *sbe;
> +    uint32_t     sbe_size;
> +
> +    /* ESB memory region */
> +    uint64_t     esb_flags;
> +    hwaddr       esb_base;
> +    uint32_t     esb_shift;
> +    MemoryRegion esb_mmio;
> +} XiveSource;
> +
> +/*
> + * ESB MMIO setting. Can be one page, for both source triggering and
> + * source management, or two different pages. See below for magic
> + * values.
> + */
> +#define XIVE_ESB_4K          12 /* PSI HB */
> +#define XIVE_ESB_4K_2PAGE    17

Should this be 13 instead of 17?

> +#define XIVE_ESB_64K         16
> +#define XIVE_ESB_64K_2PAGE   17

(Also, who the hell comes up with a brand new PIC and decides to have
*4* different interface variants.  But that's not your problem, I
realise).

> +
> +static inline bool xive_source_esb_2page(XiveSource *xsrc)
> +{
> +    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE;
> +}
> +
> +static inline hwaddr xive_source_esb_base(XiveSource *xsrc, uint32_t srcno)
> +{
> +    assert(srcno < xsrc->nr_irqs);
> +    return xsrc->esb_base + (1ull << xsrc->esb_shift) * srcno;
> +}
> +
> +/* The trigger page is always the first/even page */
> +#define xive_source_esb_trigger xive_source_esb_base
> +
> +/* In a two pages ESB MMIO setting, the odd page is for management */
> +static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
> +{
> +    hwaddr addr = xive_source_esb_base(xsrc, srcno);
> +
> +    if (xive_source_esb_2page(xsrc)) {
> +        addr += (1 << (xsrc->esb_shift - 1));
> +    }
> +
> +    return addr;
> +}
> +
> +/*
> + * Each interrupt source has a 2-bit state machine called ESB which
> + * can be controlled by MMIO. It's made of 2 bits, P and Q. P
> + * indicates that an interrupt is pending (has been sent to a queue
> + * and is waiting for an EOI). Q indicates that the interrupt has been
> + * triggered while pending.
> + *
> + * This acts as a coalescing mechanism in order to guarantee
> + * that a given interrupt only occurs at most once in a queue.
> + *
> + * When doing an EOI, the Q bit will indicate if the interrupt
> + * needs to be re-triggered.
> + */
> +#define XIVE_ESB_VAL_P        0x2
> +#define XIVE_ESB_VAL_Q        0x1
> +
> +#define XIVE_ESB_RESET        0x0
> +#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
> +#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
> +#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
> +
> +/*
> + * "magic" Event State Buffer (ESB) MMIO offsets.
> + *
> + * The following offsets into the ESB MMIO allow to read or
> + * manipulate the PQ bits. They must be used with an 8-bytes
> + * load instruction. They all return the previous state of the
> + * interrupt (atomically).
> + *
> + * Additionally, some ESB pages support doing an EOI via a
> + * store at 0 and some ESBs support doing a trigger via a
> + * separate trigger page.
> + */
> +#define XIVE_ESB_STORE_EOI      0x400 /* Store */
> +#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
> +#define XIVE_ESB_GET            0x800 /* Load */
> +#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
> +#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
> +#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
> +#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
> +
> +uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno);
> +uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
> +
> +void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon);
> +
> +#endif /* PPC_XIVE_H */
Cédric Le Goater April 20, 2018, 8:27 a.m. UTC | #2
On 04/20/2018 09:10 AM, David Gibson wrote:
> On Thu, Apr 19, 2018 at 02:42:57PM +0200, Cédric Le Goater wrote:
>> Each XIVE interrupt source is associated with a two bit state machine
>> called an Event State Buffer (ESB) : the first bit "P" means that an
>> interrupt is "pending" and waiting for an EOI and the bit "Q" (queued)
>> means a new interrupt was triggered while another was still pending.
>>
>> When an event is triggered, the associated interrupt state bits are
>> fetched and modified and forwarded to the virtualization engine of the
>> controller doing the routing. These can also be controlled by MMIO, to
>> trigger events or turn off the sources for instance. See code for more
>> details on the states and transitions.
>>
>> On a sPAPR machine, the OS will obtain the address of the MMIO page of
>> the ESB entry associated with a source and its characteristic using
>> the H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is
>> used.
>>
>> The xive_source_notify() routine is in charge forwarding the source
>> event notification to the routing engine. It will be filled later on.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  Changes since v2:
>>
>>  - added support for Store EOI
>>  - added support for two page MMIO setting like on KVM
> 
> Looks generally sane to me, though I have a few queries.
> 
>>
>>  default-configs/ppc64-softmmu.mak |   1 +
>>  hw/intc/Makefile.objs             |   1 +
>>  hw/intc/xive.c                    | 335 ++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/xive.h             | 130 +++++++++++++++
>>  4 files changed, 467 insertions(+)
>>  create mode 100644 hw/intc/xive.c
>>  create mode 100644 include/hw/ppc/xive.h
>>
>> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
>> index b94af6c7c62a..c6d13e757977 100644
>> --- a/default-configs/ppc64-softmmu.mak
>> +++ b/default-configs/ppc64-softmmu.mak
>> @@ -16,4 +16,5 @@ CONFIG_VIRTIO_VGA=y
>>  CONFIG_XICS=$(CONFIG_PSERIES)
>>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
>> +CONFIG_XIVE=$(CONFIG_PSERIES)
>>  CONFIG_MEM_HOTPLUG=y
>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>> index 0e9963f5eecc..72a46ed91c31 100644
>> --- a/hw/intc/Makefile.objs
>> +++ b/hw/intc/Makefile.objs
>> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o
>>  obj-$(CONFIG_XICS) += xics.o
>>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>> +obj-$(CONFIG_XIVE) += xive.o
>>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> new file mode 100644
>> index 000000000000..c70578759d02
>> --- /dev/null
>> +++ b/hw/intc/xive.c
>> @@ -0,0 +1,335 @@
>> +/*
>> + * QEMU PowerPC XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2017-2018, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/log.h"
>> +#include "qapi/error.h"
>> +#include "target/ppc/cpu.h"
>> +#include "sysemu/cpus.h"
>> +#include "sysemu/dma.h"
>> +#include "monitor/monitor.h"
>> +#include "hw/ppc/xive.h"
>> +
>> +/*
>> + * XIVE Interrupt Source
>> + */
>> +
>> +uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno)
>> +{
>> +    uint32_t byte = srcno / 4;
>> +    uint32_t bit  = (srcno % 4) * 2;
>> +
>> +    assert(byte < xsrc->sbe_size);
>> +
>> +    return (xsrc->sbe[byte] >> bit) & 0x3;
>> +}
>> +
>> +uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
>> +{
>> +    uint32_t byte = srcno / 4;
>> +    uint32_t bit  = (srcno % 4) * 2;
>> +    uint8_t old, new;
>> +
>> +    assert(byte < xsrc->sbe_size);
>> +
>> +    old = xsrc->sbe[byte];
>> +
>> +    new = xsrc->sbe[byte] & ~(0x3 << bit);
>> +    new |= (pq & 0x3) << bit;
>> +
>> +    xsrc->sbe[byte] = new;
>> +
>> +    return (old >> bit) & 0x3;
>> +}
>> +
>> +static bool xive_source_pq_eoi(XiveSource *xsrc, uint32_t srcno)
>> +{
>> +    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
>> +
>> +    switch (old_pq) {
>> +    case XIVE_ESB_RESET:
>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
>> +        return false;
>> +    case XIVE_ESB_PENDING:
>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
>> +        return false;
>> +    case XIVE_ESB_QUEUED:
>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
>> +        return true;
>> +    case XIVE_ESB_OFF:
>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
>> +        return false;
>> +    default:
>> +         g_assert_not_reached();
>> +    }
>> +}
>> +
>> +/*
>> + * Returns whether the event notification should be forwarded.
>> + */
>> +static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
>> +{
>> +    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
>> +
>> +    switch (old_pq) {
>> +    case XIVE_ESB_RESET:
>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
>> +        return true;
>> +    case XIVE_ESB_PENDING:
>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
>> +        return false;
>> +    case XIVE_ESB_QUEUED:
>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
>> +        return false;
>> +    case XIVE_ESB_OFF:
>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
>> +        return false;
>> +    default:
>> +         g_assert_not_reached();
>> +    }
>> +}
>> +
>> +/*
>> + * Forward the source event notification to the associated XiveFabric,
>> + * the device owning the sources.
>> + */
>> +static void xive_source_notify(XiveSource *xsrc, int srcno)
>> +{
>> +
>> +}
>> +
>> +/* In a two pages ESB MMIO setting, even page is the trigger page, odd
>> + * page is for management */
> 
> Can I understand from this that the result from this function is only
> meaningful in 2-pages mode?

yes. May be I should rename it to xive_source_is_even_page() ?

>> +static inline bool xive_source_is_trigger_page(hwaddr addr)
>> +{
>> +    return !((addr >> 16) & 1);
> 
> Later on you seem to have both 4k and 64k variants list, but here you
> hardcode 64k.  Is that a problem?
	
oups. This should be  :

	(addr >> (xsrc->esb_shift - 1))

I did the tests with the spapr guest which uses 64k ESB MMIO pages. 
The check is only significant in a 2 pages setting.
 
>> +}
>> +
>> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>> +    uint32_t offset = addr & 0xF00;
> 
> You ignore the low bits of the address entirely, so effective you have
> a 256 byte range that's all aliases of the same register.  Is that
> intentional?

yes but it's not entirely correct. The exact ranges are :

			Load			Store

0x000 .. 0x3FF		EOI and return 0|1	Trigger
0x400 .. 0x7FF		EOI and return 0|1	EOI
0x800 .. 0xBFF  	return PQ		undefined
0xC00 .. 0xCFF		return PQ and PQ=00	PQ=00
0xD00 .. 0xDFF		return PQ and PQ=01	PQ=01
0xE00 .. 0xDFF		return PQ and PQ=10	PQ=10
0xF00 .. 0xDFF		return PQ and PQ=11	PQ=11

There is room for some improvements.

The trigger page in a two pages ESB MMIO settings only triggers on stores.

 
>> +    uint32_t srcno = addr >> xsrc->esb_shift;
>> +    uint64_t ret = -1;
>> +
>> +    if (xive_source_esb_2page(xsrc) && xive_source_is_trigger_page(addr)) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                      "XIVE: invalid load on IRQ %d trigger page at "
>> +                      "0x%"HWADDR_PRIx"\n", srcno, addr);
>> +        return -1;
>> +    }
>> +
>> +    switch (offset) {
>> +    case XIVE_ESB_LOAD_EOI:
>> +        /*
>> +         * Load EOI is not the default source setting under QEMU, but
>> +         * this is what HW uses currently.
>> +         */
>> +        ret = xive_source_pq_eoi(xsrc, srcno);
> 
> You're implicitly casting a bool return value into a u64 here, is that
> intentional?

yes. is that bad ? This is what the load is supposed to return in the transition 
algo. 

>> +
>> +        break;
>> +
>> +    case XIVE_ESB_GET:
>> +        ret = xive_source_pq_get(xsrc, srcno);
>> +        break;
>> +
>> +    case XIVE_ESB_SET_PQ_00:
>> +    case XIVE_ESB_SET_PQ_01:
>> +    case XIVE_ESB_SET_PQ_10:
>> +    case XIVE_ESB_SET_PQ_11:
>> +        ret = xive_source_pq_set(xsrc, srcno, (offset >> 8) & 0x3);
>> +        break;
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB addr %d\n", offset);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void xive_source_esb_write(void *opaque, hwaddr addr,
>> +                                 uint64_t value, unsigned size)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>> +    uint32_t offset = addr & 0xF00;
>> +    uint32_t srcno = addr >> xsrc->esb_shift;
>> +    bool notify = false;
>> +
>> +    switch (offset) {
>> +    case 0:
>> +        notify = xive_source_pq_trigger(xsrc, srcno);
>> +        break;
>> +
>> +    case XIVE_ESB_STORE_EOI:
>> +        if (xive_source_is_trigger_page(addr)) {
>> +            qemu_log_mask(LOG_GUEST_ERROR,
>> +                          "XIVE: invalid store on IRQ %d trigger page at "
>> +                          "0x%"HWADDR_PRIx"\n", srcno, addr);
>> +            return;
>> +        }
>> +
>> +        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
>> +            qemu_log_mask(LOG_GUEST_ERROR,
>> +                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
>> +            return;
>> +        }
>> +
>> +        /* If the Q bit is set, we should forward a new source event
>> +         * notification
>> +         */
>> +        notify = xive_source_pq_eoi(xsrc, srcno);
>> +        break;
>> +
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %d\n",
>> +                      offset);
>> +        return;
>> +    }
>> +
>> +    /* Forward the source event notification for routing */
>> +    if (notify) {
>> +        xive_source_notify(xsrc, srcno);
>> +    }
> 
> EOI via this path calls notify, but the one via the read path
> doesn't.  Is that correct?

No. I have given attention to the one page ESB MMIO setting + Store EOI 
in the emulated mode and not enough to the two pages ESB MMIO setting. 
This is a late change to be compatible with KVM. I will fix.

> 
>> +}
>> +
>> +static const MemoryRegionOps xive_source_esb_ops = {
>> +    .read = xive_source_esb_read,
>> +    .write = xive_source_esb_write,
>> +    .endianness = DEVICE_BIG_ENDIAN,
>> +    .valid = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +    .impl = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +};
>> +
>> +static void xive_source_set_irq(void *opaque, int srcno, int val)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>> +    bool notify = false;
>> +
>> +    if (val) {
>> +        notify = xive_source_pq_trigger(xsrc, srcno);
>> +    }
>> +
>> +    /* Forward the source event notification for routing */
>> +    if (notify) {
>> +        xive_source_notify(xsrc, srcno);
>> +    }
>> +}
>> +
>> +void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon)
>> +{
>> +    int i;
>> +
>> +    monitor_printf(mon, "XIVE Source %6x ..%6x\n",
>> +                   xsrc->offset, xsrc->offset + xsrc->nr_irqs - 1);
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        uint8_t pq = xive_source_pq_get(xsrc, i);
>> +        uint32_t lisn = i  + xsrc->offset;
>> +
>> +        if (pq == XIVE_ESB_OFF) {
>> +            continue;
>> +        }
>> +
>> +        monitor_printf(mon, "  %4x %c%c\n", lisn,
>> +                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
>> +                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
>> +    }
>> +}
>> +
>> +static void xive_source_reset(DeviceState *dev)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(dev);
>> +
>> +    /* SBEs are initialized to 0b01 which corresponds to "ints off" */
>> +    memset(xsrc->sbe, 0x55, xsrc->sbe_size);
>> +}
>> +
>> +static void xive_source_realize(DeviceState *dev, Error **errp)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(dev);
>> +
>> +    if (!xsrc->nr_irqs) {
>> +        error_setg(errp, "Number of interrupt needs to be greater than 0");
>> +        return;
>> +    }
>> +
>> +    if (xsrc->esb_shift != XIVE_ESB_4K &&
>> +        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
>> +        xsrc->esb_shift != XIVE_ESB_64K &&
>> +        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
>> +        error_setg(errp, "Invalid ESB shift setting");
>> +        return;
>> +    }
>> +
>> +    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
>> +                                     xsrc->nr_irqs);
>> +
>> +    /* Allocate the SBEs (State Bit Entry). 2 bits, so 4 entries per byte */
>> +    xsrc->sbe_size = DIV_ROUND_UP(xsrc->nr_irqs, 4);
>> +    xsrc->sbe = g_malloc0(xsrc->sbe_size);
>> +
>> +    /* TODO: H_INT_ESB support, which removing the ESB MMIOs */
>> +
>> +    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
>> +                          &xive_source_esb_ops, xsrc, "xive.esb",
>> +                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
>> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
>> +}
>> +
>> +static const VMStateDescription vmstate_xive_source = {
>> +    .name = TYPE_XIVE_SOURCE,
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
>> +        VMSTATE_VBUFFER_UINT32(sbe, XiveSource, 1, NULL, sbe_size),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>> +/*
>> + * The default XIVE interrupt source setting for ESB MMIO is two 64k
>> + * pages without Store EOI. This is in sync with KVM.
>> + */
>> +static Property xive_source_properties[] = {
>> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
>> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
>> +    DEFINE_PROP_UINT64("bar", XiveSource, esb_base, 0),
> 
> Isn't this redundant with however the base address is handled through
> the SysBusDevice stuff (I forget the details)?

Storing the ESB MMIO base address under the XiveSource object is 
convenient later on in the h_int_get_source_info hcall which make
use of the helpers : 

	hwaddr xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
	hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)

But it is only used in that place. So we could just store the ESB 
MMIO base address under the sPAPRXive controller. This makes some
sense in the design, as we have to inform KVM of this address with
a KVM device ioctl. 

>> +    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void xive_source_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> +    dc->realize = xive_source_realize;
>> +    dc->reset = xive_source_reset;
>> +    dc->props = xive_source_properties;
>> +    dc->desc = "XIVE interrupt source";
>> +    dc->vmsd = &vmstate_xive_source;
>> +}
>> +
>> +static const TypeInfo xive_source_info = {
>> +    .name          = TYPE_XIVE_SOURCE,
>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>> +    .instance_size = sizeof(XiveSource),
>> +    .class_init    = xive_source_class_init,
>> +};
>> +
>> +static void xive_register_types(void)
>> +{
>> +    type_register_static(&xive_source_info);
>> +}
>> +
>> +type_init(xive_register_types)
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> new file mode 100644
>> index 000000000000..d92a50519edf
>> --- /dev/null
>> +++ b/include/hw/ppc/xive.h
>> @@ -0,0 +1,130 @@
>> +/*
>> + * QEMU PowerPC XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2017-2018, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef PPC_XIVE_H
>> +#define PPC_XIVE_H
>> +
>> +#include "hw/sysbus.h"
>> +
>> +/*
>> + * XIVE Interrupt Source
>> + */
>> +
>> +#define TYPE_XIVE_SOURCE "xive-source"
>> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
>> +
>> +/*
>> + * XIVE Source Interrupt source characteristics, which define how the
>> + * ESB are controlled.
>> + */
>> +#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
>> +#define XIVE_SRC_STORE_EOI     0x4 /* Store EOI supported */
>> +
>> +typedef struct XiveSource {
>> +    SysBusDevice parent;
>> +
>> +    /* IRQs */
>> +    uint32_t     nr_irqs;
>> +    uint32_t     offset;
>> +    qemu_irq     *qirqs;
>> +
>> +    /* PQ bits */
>> +    uint8_t      *sbe;
>> +    uint32_t     sbe_size;
>> +
>> +    /* ESB memory region */
>> +    uint64_t     esb_flags;
>> +    hwaddr       esb_base;
>> +    uint32_t     esb_shift;
>> +    MemoryRegion esb_mmio;
>> +} XiveSource;
>> +
>> +/*
>> + * ESB MMIO setting. Can be one page, for both source triggering and
>> + * source management, or two different pages. See below for magic
>> + * values.
>> + */
>> +#define XIVE_ESB_4K          12 /* PSI HB */
>> +#define XIVE_ESB_4K_2PAGE    17
> 
> Should this be 13 instead of 17?

oups. obviously this is not used.

>> +#define XIVE_ESB_64K         16
>> +#define XIVE_ESB_64K_2PAGE   17
> 
> (Also, who the hell comes up with a brand new PIC and decides to have
> *4* different interface variants.  But that's not your problem, I
> realise).

HW constraints on the different controllers which need to expose
sources : PSI, PHB4. The internal sources of the XIVE interrupt 
controller can be configured to use 4K or 64K but I doubt the 4k
will be ever used.

Thanks,

C.
 
>> +
>> +static inline bool xive_source_esb_2page(XiveSource *xsrc)
>> +{
>> +    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE;
>> +}
>> +
>> +static inline hwaddr xive_source_esb_base(XiveSource *xsrc, uint32_t srcno)
>> +{
>> +    assert(srcno < xsrc->nr_irqs);
>> +    return xsrc->esb_base + (1ull << xsrc->esb_shift) * srcno;
>> +}
>> +
>> +/* The trigger page is always the first/even page */
>> +#define xive_source_esb_trigger xive_source_esb_base
>> +
>> +/* In a two pages ESB MMIO setting, the odd page is for management */
>> +static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
>> +{
>> +    hwaddr addr = xive_source_esb_base(xsrc, srcno);
>> +
>> +    if (xive_source_esb_2page(xsrc)) {
>> +        addr += (1 << (xsrc->esb_shift - 1));
>> +    }
>> +
>> +    return addr;
>> +}
>> +
>> +/*
>> + * Each interrupt source has a 2-bit state machine called ESB which
>> + * can be controlled by MMIO. It's made of 2 bits, P and Q. P
>> + * indicates that an interrupt is pending (has been sent to a queue
>> + * and is waiting for an EOI). Q indicates that the interrupt has been
>> + * triggered while pending.
>> + *
>> + * This acts as a coalescing mechanism in order to guarantee
>> + * that a given interrupt only occurs at most once in a queue.
>> + *
>> + * When doing an EOI, the Q bit will indicate if the interrupt
>> + * needs to be re-triggered.
>> + */
>> +#define XIVE_ESB_VAL_P        0x2
>> +#define XIVE_ESB_VAL_Q        0x1
>> +
>> +#define XIVE_ESB_RESET        0x0
>> +#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
>> +#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
>> +#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
>> +
>> +/*
>> + * "magic" Event State Buffer (ESB) MMIO offsets.
>> + *
>> + * The following offsets into the ESB MMIO allow to read or
>> + * manipulate the PQ bits. They must be used with an 8-bytes
>> + * load instruction. They all return the previous state of the
>> + * interrupt (atomically).
>> + *
>> + * Additionally, some ESB pages support doing an EOI via a
>> + * store at 0 and some ESBs support doing a trigger via a
>> + * separate trigger page.
>> + */
>> +#define XIVE_ESB_STORE_EOI      0x400 /* Store */
>> +#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
>> +#define XIVE_ESB_GET            0x800 /* Load */
>> +#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
>> +#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
>> +#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
>> +#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
>> +
>> +uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno);
>> +uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
>> +
>> +void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon);
>> +
>> +#endif /* PPC_XIVE_H */
>
David Gibson April 23, 2018, 3:59 a.m. UTC | #3
On Fri, Apr 20, 2018 at 10:27:21AM +0200, Cédric Le Goater wrote:
> On 04/20/2018 09:10 AM, David Gibson wrote:
> > On Thu, Apr 19, 2018 at 02:42:57PM +0200, Cédric Le Goater wrote:
> >> Each XIVE interrupt source is associated with a two bit state machine
> >> called an Event State Buffer (ESB) : the first bit "P" means that an
> >> interrupt is "pending" and waiting for an EOI and the bit "Q" (queued)
> >> means a new interrupt was triggered while another was still pending.
> >>
> >> When an event is triggered, the associated interrupt state bits are
> >> fetched and modified and forwarded to the virtualization engine of the
> >> controller doing the routing. These can also be controlled by MMIO, to
> >> trigger events or turn off the sources for instance. See code for more
> >> details on the states and transitions.
> >>
> >> On a sPAPR machine, the OS will obtain the address of the MMIO page of
> >> the ESB entry associated with a source and its characteristic using
> >> the H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is
> >> used.
> >>
> >> The xive_source_notify() routine is in charge forwarding the source
> >> event notification to the routing engine. It will be filled later on.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  Changes since v2:
> >>
> >>  - added support for Store EOI
> >>  - added support for two page MMIO setting like on KVM
> > 
> > Looks generally sane to me, though I have a few queries.
> > 
> >>
> >>  default-configs/ppc64-softmmu.mak |   1 +
> >>  hw/intc/Makefile.objs             |   1 +
> >>  hw/intc/xive.c                    | 335 ++++++++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/xive.h             | 130 +++++++++++++++
> >>  4 files changed, 467 insertions(+)
> >>  create mode 100644 hw/intc/xive.c
> >>  create mode 100644 include/hw/ppc/xive.h
> >>
> >> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> >> index b94af6c7c62a..c6d13e757977 100644
> >> --- a/default-configs/ppc64-softmmu.mak
> >> +++ b/default-configs/ppc64-softmmu.mak
> >> @@ -16,4 +16,5 @@ CONFIG_VIRTIO_VGA=y
> >>  CONFIG_XICS=$(CONFIG_PSERIES)
> >>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
> >>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
> >> +CONFIG_XIVE=$(CONFIG_PSERIES)
> >>  CONFIG_MEM_HOTPLUG=y
> >> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> >> index 0e9963f5eecc..72a46ed91c31 100644
> >> --- a/hw/intc/Makefile.objs
> >> +++ b/hw/intc/Makefile.objs
> >> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o
> >>  obj-$(CONFIG_XICS) += xics.o
> >>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
> >>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
> >> +obj-$(CONFIG_XIVE) += xive.o
> >>  obj-$(CONFIG_POWERNV) += xics_pnv.o
> >>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
> >>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> new file mode 100644
> >> index 000000000000..c70578759d02
> >> --- /dev/null
> >> +++ b/hw/intc/xive.c
> >> @@ -0,0 +1,335 @@
> >> +/*
> >> + * QEMU PowerPC XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2017-2018, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> >> + */
> >> +
> >> +#include "qemu/osdep.h"
> >> +#include "qemu/log.h"
> >> +#include "qapi/error.h"
> >> +#include "target/ppc/cpu.h"
> >> +#include "sysemu/cpus.h"
> >> +#include "sysemu/dma.h"
> >> +#include "monitor/monitor.h"
> >> +#include "hw/ppc/xive.h"
> >> +
> >> +/*
> >> + * XIVE Interrupt Source
> >> + */
> >> +
> >> +uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno)
> >> +{
> >> +    uint32_t byte = srcno / 4;
> >> +    uint32_t bit  = (srcno % 4) * 2;
> >> +
> >> +    assert(byte < xsrc->sbe_size);
> >> +
> >> +    return (xsrc->sbe[byte] >> bit) & 0x3;
> >> +}
> >> +
> >> +uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
> >> +{
> >> +    uint32_t byte = srcno / 4;
> >> +    uint32_t bit  = (srcno % 4) * 2;
> >> +    uint8_t old, new;
> >> +
> >> +    assert(byte < xsrc->sbe_size);
> >> +
> >> +    old = xsrc->sbe[byte];
> >> +
> >> +    new = xsrc->sbe[byte] & ~(0x3 << bit);
> >> +    new |= (pq & 0x3) << bit;
> >> +
> >> +    xsrc->sbe[byte] = new;
> >> +
> >> +    return (old >> bit) & 0x3;
> >> +}
> >> +
> >> +static bool xive_source_pq_eoi(XiveSource *xsrc, uint32_t srcno)
> >> +{
> >> +    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
> >> +
> >> +    switch (old_pq) {
> >> +    case XIVE_ESB_RESET:
> >> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
> >> +        return false;
> >> +    case XIVE_ESB_PENDING:
> >> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
> >> +        return false;
> >> +    case XIVE_ESB_QUEUED:
> >> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
> >> +        return true;
> >> +    case XIVE_ESB_OFF:
> >> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
> >> +        return false;
> >> +    default:
> >> +         g_assert_not_reached();
> >> +    }
> >> +}
> >> +
> >> +/*
> >> + * Returns whether the event notification should be forwarded.
> >> + */
> >> +static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
> >> +{
> >> +    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
> >> +
> >> +    switch (old_pq) {
> >> +    case XIVE_ESB_RESET:
> >> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
> >> +        return true;
> >> +    case XIVE_ESB_PENDING:
> >> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
> >> +        return false;
> >> +    case XIVE_ESB_QUEUED:
> >> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
> >> +        return false;
> >> +    case XIVE_ESB_OFF:
> >> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
> >> +        return false;
> >> +    default:
> >> +         g_assert_not_reached();
> >> +    }
> >> +}
> >> +
> >> +/*
> >> + * Forward the source event notification to the associated XiveFabric,
> >> + * the device owning the sources.
> >> + */
> >> +static void xive_source_notify(XiveSource *xsrc, int srcno)
> >> +{
> >> +
> >> +}
> >> +
> >> +/* In a two pages ESB MMIO setting, even page is the trigger page, odd
> >> + * page is for management */
> > 
> > Can I understand from this that the result from this function is only
> > meaningful in 2-pages mode?
> 
> yes. May be I should rename it to xive_source_is_even_page() ?

Seems very long winded.  Maybe keep the name but have it check both
whether it's an even page and also whether you're in 2pages mode.

> >> +static inline bool xive_source_is_trigger_page(hwaddr addr)
> >> +{
> >> +    return !((addr >> 16) & 1);
> > 
> > Later on you seem to have both 4k and 64k variants list, but here you
> > hardcode 64k.  Is that a problem?
> 	
> oups. This should be  :
> 
> 	(addr >> (xsrc->esb_shift - 1))
> 
> I did the tests with the spapr guest which uses 64k ESB MMIO pages. 
> The check is only significant in a 2 pages setting.

Ok.

> >> +}
> >> +
> >> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> >> +    uint32_t offset = addr & 0xF00;
> > 
> > You ignore the low bits of the address entirely, so effective you have
> > a 256 byte range that's all aliases of the same register.  Is that
> > intentional?
> 
> yes but it's not entirely correct. The exact ranges are :
> 
> 			Load			Store
> 
> 0x000 .. 0x3FF		EOI and return 0|1	Trigger
> 0x400 .. 0x7FF		EOI and return 0|1	EOI
> 0x800 .. 0xBFF  	return PQ		undefined
> 0xC00 .. 0xCFF		return PQ and PQ=00	PQ=00
> 0xD00 .. 0xDFF		return PQ and PQ=01	PQ=01
> 0xE00 .. 0xDFF		return PQ and PQ=10	PQ=10
> 0xF00 .. 0xDFF		return PQ and PQ=11	PQ=11
> 
> There is room for some improvements.
> 
> The trigger page in a two pages ESB MMIO settings only triggers on stores.

Ok.

> >> +    uint32_t srcno = addr >> xsrc->esb_shift;
> >> +    uint64_t ret = -1;
> >> +
> >> +    if (xive_source_esb_2page(xsrc) && xive_source_is_trigger_page(addr)) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR,
> >> +                      "XIVE: invalid load on IRQ %d trigger page at "
> >> +                      "0x%"HWADDR_PRIx"\n", srcno, addr);
> >> +        return -1;
> >> +    }
> >> +
> >> +    switch (offset) {
> >> +    case XIVE_ESB_LOAD_EOI:
> >> +        /*
> >> +         * Load EOI is not the default source setting under QEMU, but
> >> +         * this is what HW uses currently.
> >> +         */
> >> +        ret = xive_source_pq_eoi(xsrc, srcno);
> > 
> > You're implicitly casting a bool return value into a u64 here, is that
> > intentional?
> 
> yes. is that bad ? This is what the load is supposed to return in the transition 
> algo. 

No, that's fine, as long as just using the LSB in your return is
correct.  Just making sure I understood.

> >> +
> >> +        break;
> >> +
> >> +    case XIVE_ESB_GET:
> >> +        ret = xive_source_pq_get(xsrc, srcno);
> >> +        break;
> >> +
> >> +    case XIVE_ESB_SET_PQ_00:
> >> +    case XIVE_ESB_SET_PQ_01:
> >> +    case XIVE_ESB_SET_PQ_10:
> >> +    case XIVE_ESB_SET_PQ_11:
> >> +        ret = xive_source_pq_set(xsrc, srcno, (offset >> 8) & 0x3);
> >> +        break;
> >> +    default:
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB addr %d\n", offset);
> >> +    }
> >> +
> >> +    return ret;
> >> +}
> >> +
> >> +static void xive_source_esb_write(void *opaque, hwaddr addr,
> >> +                                 uint64_t value, unsigned size)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> >> +    uint32_t offset = addr & 0xF00;
> >> +    uint32_t srcno = addr >> xsrc->esb_shift;
> >> +    bool notify = false;
> >> +
> >> +    switch (offset) {
> >> +    case 0:
> >> +        notify = xive_source_pq_trigger(xsrc, srcno);
> >> +        break;
> >> +
> >> +    case XIVE_ESB_STORE_EOI:
> >> +        if (xive_source_is_trigger_page(addr)) {
> >> +            qemu_log_mask(LOG_GUEST_ERROR,
> >> +                          "XIVE: invalid store on IRQ %d trigger page at "
> >> +                          "0x%"HWADDR_PRIx"\n", srcno, addr);
> >> +            return;
> >> +        }
> >> +
> >> +        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
> >> +            qemu_log_mask(LOG_GUEST_ERROR,
> >> +                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
> >> +            return;
> >> +        }
> >> +
> >> +        /* If the Q bit is set, we should forward a new source event
> >> +         * notification
> >> +         */
> >> +        notify = xive_source_pq_eoi(xsrc, srcno);
> >> +        break;
> >> +
> >> +    default:
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %d\n",
> >> +                      offset);
> >> +        return;
> >> +    }
> >> +
> >> +    /* Forward the source event notification for routing */
> >> +    if (notify) {
> >> +        xive_source_notify(xsrc, srcno);
> >> +    }
> > 
> > EOI via this path calls notify, but the one via the read path
> > doesn't.  Is that correct?
> 
> No. I have given attention to the one page ESB MMIO setting + Store EOI 
> in the emulated mode and not enough to the two pages ESB MMIO setting. 
> This is a late change to be compatible with KVM. I will fix.

Ok.

> > 
> >> +}
> >> +
> >> +static const MemoryRegionOps xive_source_esb_ops = {
> >> +    .read = xive_source_esb_read,
> >> +    .write = xive_source_esb_write,
> >> +    .endianness = DEVICE_BIG_ENDIAN,
> >> +    .valid = {
> >> +        .min_access_size = 8,
> >> +        .max_access_size = 8,
> >> +    },
> >> +    .impl = {
> >> +        .min_access_size = 8,
> >> +        .max_access_size = 8,
> >> +    },
> >> +};
> >> +
> >> +static void xive_source_set_irq(void *opaque, int srcno, int val)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> >> +    bool notify = false;
> >> +
> >> +    if (val) {
> >> +        notify = xive_source_pq_trigger(xsrc, srcno);
> >> +    }
> >> +
> >> +    /* Forward the source event notification for routing */
> >> +    if (notify) {
> >> +        xive_source_notify(xsrc, srcno);
> >> +    }
> >> +}
> >> +
> >> +void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon)
> >> +{
> >> +    int i;
> >> +
> >> +    monitor_printf(mon, "XIVE Source %6x ..%6x\n",
> >> +                   xsrc->offset, xsrc->offset + xsrc->nr_irqs - 1);
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        uint8_t pq = xive_source_pq_get(xsrc, i);
> >> +        uint32_t lisn = i  + xsrc->offset;
> >> +
> >> +        if (pq == XIVE_ESB_OFF) {
> >> +            continue;
> >> +        }
> >> +
> >> +        monitor_printf(mon, "  %4x %c%c\n", lisn,
> >> +                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
> >> +                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
> >> +    }
> >> +}
> >> +
> >> +static void xive_source_reset(DeviceState *dev)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(dev);
> >> +
> >> +    /* SBEs are initialized to 0b01 which corresponds to "ints off" */
> >> +    memset(xsrc->sbe, 0x55, xsrc->sbe_size);
> >> +}
> >> +
> >> +static void xive_source_realize(DeviceState *dev, Error **errp)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(dev);
> >> +
> >> +    if (!xsrc->nr_irqs) {
> >> +        error_setg(errp, "Number of interrupt needs to be greater than 0");
> >> +        return;
> >> +    }
> >> +
> >> +    if (xsrc->esb_shift != XIVE_ESB_4K &&
> >> +        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
> >> +        xsrc->esb_shift != XIVE_ESB_64K &&
> >> +        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
> >> +        error_setg(errp, "Invalid ESB shift setting");
> >> +        return;
> >> +    }
> >> +
> >> +    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
> >> +                                     xsrc->nr_irqs);
> >> +
> >> +    /* Allocate the SBEs (State Bit Entry). 2 bits, so 4 entries per byte */
> >> +    xsrc->sbe_size = DIV_ROUND_UP(xsrc->nr_irqs, 4);
> >> +    xsrc->sbe = g_malloc0(xsrc->sbe_size);
> >> +
> >> +    /* TODO: H_INT_ESB support, which removing the ESB MMIOs */
> >> +
> >> +    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
> >> +                          &xive_source_esb_ops, xsrc, "xive.esb",
> >> +                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
> >> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
> >> +}
> >> +
> >> +static const VMStateDescription vmstate_xive_source = {
> >> +    .name = TYPE_XIVE_SOURCE,
> >> +    .version_id = 1,
> >> +    .minimum_version_id = 1,
> >> +    .fields = (VMStateField[]) {
> >> +        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
> >> +        VMSTATE_VBUFFER_UINT32(sbe, XiveSource, 1, NULL, sbe_size),
> >> +        VMSTATE_END_OF_LIST()
> >> +    },
> >> +};
> >> +
> >> +/*
> >> + * The default XIVE interrupt source setting for ESB MMIO is two 64k
> >> + * pages without Store EOI. This is in sync with KVM.
> >> + */
> >> +static Property xive_source_properties[] = {
> >> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
> >> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
> >> +    DEFINE_PROP_UINT64("bar", XiveSource, esb_base, 0),
> > 
> > Isn't this redundant with however the base address is handled through
> > the SysBusDevice stuff (I forget the details)?
> 
> Storing the ESB MMIO base address under the XiveSource object is 
> convenient later on in the h_int_get_source_info hcall which make
> use of the helpers : 
> 
> 	hwaddr xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
> 	hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
> 
> But it is only used in that place. So we could just store the ESB 
> MMIO base address under the sPAPRXive controller. This makes some
> sense in the design, as we have to inform KVM of this address with
> a KVM device ioctl.

Well.. I really dislike the idea that you could change the actual MMIO
mapping address in one place, but other bits of code would still think
it was mapped somewhere else.

> >> +    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
> >> +    DEFINE_PROP_END_OF_LIST(),
> >> +};
> >> +
> >> +static void xive_source_class_init(ObjectClass *klass, void *data)
> >> +{
> >> +    DeviceClass *dc = DEVICE_CLASS(klass);
> >> +
> >> +    dc->realize = xive_source_realize;
> >> +    dc->reset = xive_source_reset;
> >> +    dc->props = xive_source_properties;
> >> +    dc->desc = "XIVE interrupt source";
> >> +    dc->vmsd = &vmstate_xive_source;
> >> +}
> >> +
> >> +static const TypeInfo xive_source_info = {
> >> +    .name          = TYPE_XIVE_SOURCE,
> >> +    .parent        = TYPE_SYS_BUS_DEVICE,
> >> +    .instance_size = sizeof(XiveSource),
> >> +    .class_init    = xive_source_class_init,
> >> +};
> >> +
> >> +static void xive_register_types(void)
> >> +{
> >> +    type_register_static(&xive_source_info);
> >> +}
> >> +
> >> +type_init(xive_register_types)
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> new file mode 100644
> >> index 000000000000..d92a50519edf
> >> --- /dev/null
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -0,0 +1,130 @@
> >> +/*
> >> + * QEMU PowerPC XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2017-2018, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> >> + */
> >> +
> >> +#ifndef PPC_XIVE_H
> >> +#define PPC_XIVE_H
> >> +
> >> +#include "hw/sysbus.h"
> >> +
> >> +/*
> >> + * XIVE Interrupt Source
> >> + */
> >> +
> >> +#define TYPE_XIVE_SOURCE "xive-source"
> >> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
> >> +
> >> +/*
> >> + * XIVE Source Interrupt source characteristics, which define how the
> >> + * ESB are controlled.
> >> + */
> >> +#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
> >> +#define XIVE_SRC_STORE_EOI     0x4 /* Store EOI supported */
> >> +
> >> +typedef struct XiveSource {
> >> +    SysBusDevice parent;
> >> +
> >> +    /* IRQs */
> >> +    uint32_t     nr_irqs;
> >> +    uint32_t     offset;
> >> +    qemu_irq     *qirqs;
> >> +
> >> +    /* PQ bits */
> >> +    uint8_t      *sbe;
> >> +    uint32_t     sbe_size;
> >> +
> >> +    /* ESB memory region */
> >> +    uint64_t     esb_flags;
> >> +    hwaddr       esb_base;
> >> +    uint32_t     esb_shift;
> >> +    MemoryRegion esb_mmio;
> >> +} XiveSource;
> >> +
> >> +/*
> >> + * ESB MMIO setting. Can be one page, for both source triggering and
> >> + * source management, or two different pages. See below for magic
> >> + * values.
> >> + */
> >> +#define XIVE_ESB_4K          12 /* PSI HB */
> >> +#define XIVE_ESB_4K_2PAGE    17
> > 
> > Should this be 13 instead of 17?
> 
> oups. obviously this is not used.
> 
> >> +#define XIVE_ESB_64K         16
> >> +#define XIVE_ESB_64K_2PAGE   17
> > 
> > (Also, who the hell comes up with a brand new PIC and decides to have
> > *4* different interface variants.  But that's not your problem, I
> > realise).
> 
> HW constraints on the different controllers which need to expose
> sources : PSI, PHB4. The internal sources of the XIVE interrupt 
> controller can be configured to use 4K or 64K but I doubt the 4k
> will be ever used.

Sure, the hardware is different, but *why* is it different.  This is a
brand new design, you'd think they could come up with one variant that
works for all the cases.
Cédric Le Goater April 23, 2018, 7:11 a.m. UTC | #4
On 04/23/2018 05:59 AM, David Gibson wrote:
> On Fri, Apr 20, 2018 at 10:27:21AM +0200, Cédric Le Goater wrote:
>> On 04/20/2018 09:10 AM, David Gibson wrote:
>>> On Thu, Apr 19, 2018 at 02:42:57PM +0200, Cédric Le Goater wrote:
>>>> Each XIVE interrupt source is associated with a two bit state machine
>>>> called an Event State Buffer (ESB) : the first bit "P" means that an
>>>> interrupt is "pending" and waiting for an EOI and the bit "Q" (queued)
>>>> means a new interrupt was triggered while another was still pending.
>>>>
>>>> When an event is triggered, the associated interrupt state bits are
>>>> fetched and modified and forwarded to the virtualization engine of the
>>>> controller doing the routing. These can also be controlled by MMIO, to
>>>> trigger events or turn off the sources for instance. See code for more
>>>> details on the states and transitions.
>>>>
>>>> On a sPAPR machine, the OS will obtain the address of the MMIO page of
>>>> the ESB entry associated with a source and its characteristic using
>>>> the H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is
>>>> used.
>>>>
>>>> The xive_source_notify() routine is in charge forwarding the source
>>>> event notification to the routing engine. It will be filled later on.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  Changes since v2:
>>>>
>>>>  - added support for Store EOI
>>>>  - added support for two page MMIO setting like on KVM
>>>
>>> Looks generally sane to me, though I have a few queries.
>>>
>>>>
>>>>  default-configs/ppc64-softmmu.mak |   1 +
>>>>  hw/intc/Makefile.objs             |   1 +
>>>>  hw/intc/xive.c                    | 335 ++++++++++++++++++++++++++++++++++++++
>>>>  include/hw/ppc/xive.h             | 130 +++++++++++++++
>>>>  4 files changed, 467 insertions(+)
>>>>  create mode 100644 hw/intc/xive.c
>>>>  create mode 100644 include/hw/ppc/xive.h
>>>>
>>>> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
>>>> index b94af6c7c62a..c6d13e757977 100644
>>>> --- a/default-configs/ppc64-softmmu.mak
>>>> +++ b/default-configs/ppc64-softmmu.mak
>>>> @@ -16,4 +16,5 @@ CONFIG_VIRTIO_VGA=y
>>>>  CONFIG_XICS=$(CONFIG_PSERIES)
>>>>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>>>>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
>>>> +CONFIG_XIVE=$(CONFIG_PSERIES)
>>>>  CONFIG_MEM_HOTPLUG=y
>>>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>>>> index 0e9963f5eecc..72a46ed91c31 100644
>>>> --- a/hw/intc/Makefile.objs
>>>> +++ b/hw/intc/Makefile.objs
>>>> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o
>>>>  obj-$(CONFIG_XICS) += xics.o
>>>>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>>>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>>>> +obj-$(CONFIG_XIVE) += xive.o
>>>>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>>>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>> new file mode 100644
>>>> index 000000000000..c70578759d02
>>>> --- /dev/null
>>>> +++ b/hw/intc/xive.c
>>>> @@ -0,0 +1,335 @@
>>>> +/*
>>>> + * QEMU PowerPC XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2017-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#include "qemu/osdep.h"
>>>> +#include "qemu/log.h"
>>>> +#include "qapi/error.h"
>>>> +#include "target/ppc/cpu.h"
>>>> +#include "sysemu/cpus.h"
>>>> +#include "sysemu/dma.h"
>>>> +#include "monitor/monitor.h"
>>>> +#include "hw/ppc/xive.h"
>>>> +
>>>> +/*
>>>> + * XIVE Interrupt Source
>>>> + */
>>>> +
>>>> +uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno)
>>>> +{
>>>> +    uint32_t byte = srcno / 4;
>>>> +    uint32_t bit  = (srcno % 4) * 2;
>>>> +
>>>> +    assert(byte < xsrc->sbe_size);
>>>> +
>>>> +    return (xsrc->sbe[byte] >> bit) & 0x3;
>>>> +}
>>>> +
>>>> +uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
>>>> +{
>>>> +    uint32_t byte = srcno / 4;
>>>> +    uint32_t bit  = (srcno % 4) * 2;
>>>> +    uint8_t old, new;
>>>> +
>>>> +    assert(byte < xsrc->sbe_size);
>>>> +
>>>> +    old = xsrc->sbe[byte];
>>>> +
>>>> +    new = xsrc->sbe[byte] & ~(0x3 << bit);
>>>> +    new |= (pq & 0x3) << bit;
>>>> +
>>>> +    xsrc->sbe[byte] = new;
>>>> +
>>>> +    return (old >> bit) & 0x3;
>>>> +}
>>>> +
>>>> +static bool xive_source_pq_eoi(XiveSource *xsrc, uint32_t srcno)
>>>> +{
>>>> +    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
>>>> +
>>>> +    switch (old_pq) {
>>>> +    case XIVE_ESB_RESET:
>>>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
>>>> +        return false;
>>>> +    case XIVE_ESB_PENDING:
>>>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
>>>> +        return false;
>>>> +    case XIVE_ESB_QUEUED:
>>>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
>>>> +        return true;
>>>> +    case XIVE_ESB_OFF:
>>>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
>>>> +        return false;
>>>> +    default:
>>>> +         g_assert_not_reached();
>>>> +    }
>>>> +}
>>>> +
>>>> +/*
>>>> + * Returns whether the event notification should be forwarded.
>>>> + */
>>>> +static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
>>>> +{
>>>> +    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
>>>> +
>>>> +    switch (old_pq) {
>>>> +    case XIVE_ESB_RESET:
>>>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
>>>> +        return true;
>>>> +    case XIVE_ESB_PENDING:
>>>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
>>>> +        return false;
>>>> +    case XIVE_ESB_QUEUED:
>>>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
>>>> +        return false;
>>>> +    case XIVE_ESB_OFF:
>>>> +        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
>>>> +        return false;
>>>> +    default:
>>>> +         g_assert_not_reached();
>>>> +    }
>>>> +}
>>>> +
>>>> +/*
>>>> + * Forward the source event notification to the associated XiveFabric,
>>>> + * the device owning the sources.
>>>> + */
>>>> +static void xive_source_notify(XiveSource *xsrc, int srcno)
>>>> +{
>>>> +
>>>> +}
>>>> +
>>>> +/* In a two pages ESB MMIO setting, even page is the trigger page, odd
>>>> + * page is for management */
>>>
>>> Can I understand from this that the result from this function is only
>>> meaningful in 2-pages mode?
>>
>> yes. May be I should rename it to xive_source_is_even_page() ?
> 
> Seems very long winded.  Maybe keep the name but have it check both
> whether it's an even page and also whether you're in 2pages mode.

yes. I had that in mind.

> 
>>>> +static inline bool xive_source_is_trigger_page(hwaddr addr)
>>>> +{
>>>> +    return !((addr >> 16) & 1);
>>>
>>> Later on you seem to have both 4k and 64k variants list, but here you
>>> hardcode 64k.  Is that a problem?
>> 	
>> oups. This should be  :
>>
>> 	(addr >> (xsrc->esb_shift - 1))
>>
>> I did the tests with the spapr guest which uses 64k ESB MMIO pages. 
>> The check is only significant in a 2 pages setting.
> 
> Ok.
> 
>>>> +}
>>>> +
>>>> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>>>> +    uint32_t offset = addr & 0xF00;
>>>
>>> You ignore the low bits of the address entirely, so effective you have
>>> a 256 byte range that's all aliases of the same register.  Is that
>>> intentional?
>>
>> yes but it's not entirely correct. The exact ranges are :
>>
>> 			Load			Store
>>
>> 0x000 .. 0x3FF		EOI and return 0|1	Trigger
>> 0x400 .. 0x7FF		EOI and return 0|1	EOI
>> 0x800 .. 0xBFF  	return PQ		undefined
>> 0xC00 .. 0xCFF		return PQ and PQ=00	PQ=00
>> 0xD00 .. 0xDFF		return PQ and PQ=01	PQ=01
>> 0xE00 .. 0xDFF		return PQ and PQ=10	PQ=10
>> 0xF00 .. 0xDFF		return PQ and PQ=11	PQ=11
>>
>> There is room for some improvements.
>>
>> The trigger page in a two pages ESB MMIO settings only triggers on stores.
> 
> Ok.

I have fixed the ranges and added the above table as a comment in the code.
it helps in understanding what are the MMIOs are.

> 
>>>> +    uint32_t srcno = addr >> xsrc->esb_shift;
>>>> +    uint64_t ret = -1;
>>>> +
>>>> +    if (xive_source_esb_2page(xsrc) && xive_source_is_trigger_page(addr)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>>> +                      "XIVE: invalid load on IRQ %d trigger page at "
>>>> +                      "0x%"HWADDR_PRIx"\n", srcno, addr);
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    switch (offset) {
>>>> +    case XIVE_ESB_LOAD_EOI:
>>>> +        /*
>>>> +         * Load EOI is not the default source setting under QEMU, but
>>>> +         * this is what HW uses currently.
>>>> +         */
>>>> +        ret = xive_source_pq_eoi(xsrc, srcno);
>>>
>>> You're implicitly casting a bool return value into a u64 here, is that
>>> intentional?
>>
>> yes. is that bad ? This is what the load is supposed to return in the transition 
>> algo. 
> 
> No, that's fine, as long as just using the LSB in your return is
> correct.  Just making sure I understood.
> 
>>>> +
>>>> +        break;
>>>> +
>>>> +    case XIVE_ESB_GET:
>>>> +        ret = xive_source_pq_get(xsrc, srcno);
>>>> +        break;
>>>> +
>>>> +    case XIVE_ESB_SET_PQ_00:
>>>> +    case XIVE_ESB_SET_PQ_01:
>>>> +    case XIVE_ESB_SET_PQ_10:
>>>> +    case XIVE_ESB_SET_PQ_11:
>>>> +        ret = xive_source_pq_set(xsrc, srcno, (offset >> 8) & 0x3);
>>>> +        break;
>>>> +    default:
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB addr %d\n", offset);
>>>> +    }
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static void xive_source_esb_write(void *opaque, hwaddr addr,
>>>> +                                 uint64_t value, unsigned size)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>>>> +    uint32_t offset = addr & 0xF00;
>>>> +    uint32_t srcno = addr >> xsrc->esb_shift;
>>>> +    bool notify = false;
>>>> +
>>>> +    switch (offset) {
>>>> +    case 0:
>>>> +        notify = xive_source_pq_trigger(xsrc, srcno);
>>>> +        break;
>>>> +
>>>> +    case XIVE_ESB_STORE_EOI:
>>>> +        if (xive_source_is_trigger_page(addr)) {
>>>> +            qemu_log_mask(LOG_GUEST_ERROR,
>>>> +                          "XIVE: invalid store on IRQ %d trigger page at "
>>>> +                          "0x%"HWADDR_PRIx"\n", srcno, addr);
>>>> +            return;
>>>> +        }
>>>> +
>>>> +        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
>>>> +            qemu_log_mask(LOG_GUEST_ERROR,
>>>> +                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
>>>> +            return;
>>>> +        }
>>>> +
>>>> +        /* If the Q bit is set, we should forward a new source event
>>>> +         * notification
>>>> +         */
>>>> +        notify = xive_source_pq_eoi(xsrc, srcno);
>>>> +        break;
>>>> +
>>>> +    default:
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %d\n",
>>>> +                      offset);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* Forward the source event notification for routing */
>>>> +    if (notify) {
>>>> +        xive_source_notify(xsrc, srcno);
>>>> +    }
>>>
>>> EOI via this path calls notify, but the one via the read path
>>> doesn't.  Is that correct?
>>
>> No. I have given attention to the one page ESB MMIO setting + Store EOI 
>> in the emulated mode and not enough to the two pages ESB MMIO setting. 
>> This is a late change to be compatible with KVM. I will fix.
> 
> Ok.
> 
>>>
>>>> +}
>>>> +
>>>> +static const MemoryRegionOps xive_source_esb_ops = {
>>>> +    .read = xive_source_esb_read,
>>>> +    .write = xive_source_esb_write,
>>>> +    .endianness = DEVICE_BIG_ENDIAN,
>>>> +    .valid = {
>>>> +        .min_access_size = 8,
>>>> +        .max_access_size = 8,
>>>> +    },
>>>> +    .impl = {
>>>> +        .min_access_size = 8,
>>>> +        .max_access_size = 8,
>>>> +    },
>>>> +};
>>>> +
>>>> +static void xive_source_set_irq(void *opaque, int srcno, int val)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>>>> +    bool notify = false;
>>>> +
>>>> +    if (val) {
>>>> +        notify = xive_source_pq_trigger(xsrc, srcno);
>>>> +    }
>>>> +
>>>> +    /* Forward the source event notification for routing */
>>>> +    if (notify) {
>>>> +        xive_source_notify(xsrc, srcno);
>>>> +    }
>>>> +}
>>>> +
>>>> +void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    monitor_printf(mon, "XIVE Source %6x ..%6x\n",
>>>> +                   xsrc->offset, xsrc->offset + xsrc->nr_irqs - 1);
>>>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>>>> +        uint8_t pq = xive_source_pq_get(xsrc, i);
>>>> +        uint32_t lisn = i  + xsrc->offset;
>>>> +
>>>> +        if (pq == XIVE_ESB_OFF) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        monitor_printf(mon, "  %4x %c%c\n", lisn,
>>>> +                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
>>>> +                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
>>>> +    }
>>>> +}
>>>> +
>>>> +static void xive_source_reset(DeviceState *dev)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(dev);
>>>> +
>>>> +    /* SBEs are initialized to 0b01 which corresponds to "ints off" */
>>>> +    memset(xsrc->sbe, 0x55, xsrc->sbe_size);
>>>> +}
>>>> +
>>>> +static void xive_source_realize(DeviceState *dev, Error **errp)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(dev);
>>>> +
>>>> +    if (!xsrc->nr_irqs) {
>>>> +        error_setg(errp, "Number of interrupt needs to be greater than 0");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (xsrc->esb_shift != XIVE_ESB_4K &&
>>>> +        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
>>>> +        xsrc->esb_shift != XIVE_ESB_64K &&
>>>> +        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
>>>> +        error_setg(errp, "Invalid ESB shift setting");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
>>>> +                                     xsrc->nr_irqs);
>>>> +
>>>> +    /* Allocate the SBEs (State Bit Entry). 2 bits, so 4 entries per byte */
>>>> +    xsrc->sbe_size = DIV_ROUND_UP(xsrc->nr_irqs, 4);
>>>> +    xsrc->sbe = g_malloc0(xsrc->sbe_size);
>>>> +
>>>> +    /* TODO: H_INT_ESB support, which removing the ESB MMIOs */
>>>> +
>>>> +    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
>>>> +                          &xive_source_esb_ops, xsrc, "xive.esb",
>>>> +                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
>>>> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
>>>> +}
>>>> +
>>>> +static const VMStateDescription vmstate_xive_source = {
>>>> +    .name = TYPE_XIVE_SOURCE,
>>>> +    .version_id = 1,
>>>> +    .minimum_version_id = 1,
>>>> +    .fields = (VMStateField[]) {
>>>> +        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
>>>> +        VMSTATE_VBUFFER_UINT32(sbe, XiveSource, 1, NULL, sbe_size),
>>>> +        VMSTATE_END_OF_LIST()
>>>> +    },
>>>> +};
>>>> +
>>>> +/*
>>>> + * The default XIVE interrupt source setting for ESB MMIO is two 64k
>>>> + * pages without Store EOI. This is in sync with KVM.
>>>> + */
>>>> +static Property xive_source_properties[] = {
>>>> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
>>>> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
>>>> +    DEFINE_PROP_UINT64("bar", XiveSource, esb_base, 0),
>>>
>>> Isn't this redundant with however the base address is handled through
>>> the SysBusDevice stuff (I forget the details)?
>>
>> Storing the ESB MMIO base address under the XiveSource object is 
>> convenient later on in the h_int_get_source_info hcall which make
>> use of the helpers : 
>>
>> 	hwaddr xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
>> 	hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
>>
>> But it is only used in that place. So we could just store the ESB 
>> MMIO base address under the sPAPRXive controller. This makes some
>> sense in the design, as we have to inform KVM of this address with
>> a KVM device ioctl.
> 
> Well.. I really dislike the idea that you could change the actual MMIO
> mapping address in one place, but other bits of code would still think
> it was mapped somewhere else.

OK. I think that my last proposal of removing the ESB MMIO address 
from the source and letting the owning device, the sPAPR Xive controller 
in this case, but this is the same for PoweNV or PSI HB, handle the
mapping goes in the direction you want to take ?  

It does looks better in the overall XIVE model and the XiveSource
object have no need of this address.
 
> 
>>>> +    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
>>>> +    DEFINE_PROP_END_OF_LIST(),
>>>> +};
>>>> +
>>>> +static void xive_source_class_init(ObjectClass *klass, void *data)
>>>> +{
>>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>>> +
>>>> +    dc->realize = xive_source_realize;
>>>> +    dc->reset = xive_source_reset;
>>>> +    dc->props = xive_source_properties;
>>>> +    dc->desc = "XIVE interrupt source";
>>>> +    dc->vmsd = &vmstate_xive_source;
>>>> +}
>>>> +
>>>> +static const TypeInfo xive_source_info = {
>>>> +    .name          = TYPE_XIVE_SOURCE,
>>>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>>>> +    .instance_size = sizeof(XiveSource),
>>>> +    .class_init    = xive_source_class_init,
>>>> +};
>>>> +
>>>> +static void xive_register_types(void)
>>>> +{
>>>> +    type_register_static(&xive_source_info);
>>>> +}
>>>> +
>>>> +type_init(xive_register_types)
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> new file mode 100644
>>>> index 000000000000..d92a50519edf
>>>> --- /dev/null
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -0,0 +1,130 @@
>>>> +/*
>>>> + * QEMU PowerPC XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2017-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#ifndef PPC_XIVE_H
>>>> +#define PPC_XIVE_H
>>>> +
>>>> +#include "hw/sysbus.h"
>>>> +
>>>> +/*
>>>> + * XIVE Interrupt Source
>>>> + */
>>>> +
>>>> +#define TYPE_XIVE_SOURCE "xive-source"
>>>> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
>>>> +
>>>> +/*
>>>> + * XIVE Source Interrupt source characteristics, which define how the
>>>> + * ESB are controlled.
>>>> + */
>>>> +#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
>>>> +#define XIVE_SRC_STORE_EOI     0x4 /* Store EOI supported */
>>>> +
>>>> +typedef struct XiveSource {
>>>> +    SysBusDevice parent;
>>>> +
>>>> +    /* IRQs */
>>>> +    uint32_t     nr_irqs;
>>>> +    uint32_t     offset;
>>>> +    qemu_irq     *qirqs;
>>>> +
>>>> +    /* PQ bits */
>>>> +    uint8_t      *sbe;
>>>> +    uint32_t     sbe_size;
>>>> +
>>>> +    /* ESB memory region */
>>>> +    uint64_t     esb_flags;
>>>> +    hwaddr       esb_base;
>>>> +    uint32_t     esb_shift;
>>>> +    MemoryRegion esb_mmio;
>>>> +} XiveSource;
>>>> +
>>>> +/*
>>>> + * ESB MMIO setting. Can be one page, for both source triggering and
>>>> + * source management, or two different pages. See below for magic
>>>> + * values.
>>>> + */
>>>> +#define XIVE_ESB_4K          12 /* PSI HB */
>>>> +#define XIVE_ESB_4K_2PAGE    17
>>>
>>> Should this be 13 instead of 17?
>>
>> oups. obviously this is not used.
>>
>>>> +#define XIVE_ESB_64K         16
>>>> +#define XIVE_ESB_64K_2PAGE   17
>>>
>>> (Also, who the hell comes up with a brand new PIC and decides to have
>>> *4* different interface variants.  But that's not your problem, I
>>> realise).
>>
>> HW constraints on the different controllers which need to expose
>> sources : PSI, PHB4. The internal sources of the XIVE interrupt 
>> controller can be configured to use 4K or 64K but I doubt the 4k
>> will be ever used.
> 
> Sure, the hardware is different, but *why* is it different.  This is a
> brand new design, you'd think they could come up with one variant that
> works for all the cases.
> 

Yes this is interesting. I will ask the HW team.

Thanks.

C.
David Gibson April 24, 2018, 1:24 a.m. UTC | #5
On Mon, Apr 23, 2018 at 09:11:28AM +0200, Cédric Le Goater wrote:
> On 04/23/2018 05:59 AM, David Gibson wrote:
> > On Fri, Apr 20, 2018 at 10:27:21AM +0200, Cédric Le Goater wrote:
> >> On 04/20/2018 09:10 AM, David Gibson wrote:
> >>> On Thu, Apr 19, 2018 at 02:42:57PM +0200, Cédric Le Goater wrote:
> >>>> Each XIVE interrupt source is associated with a two bit state machine
> >>>> called an Event State Buffer (ESB) : the first bit "P" means that an
> >>>> interrupt is "pending" and waiting for an EOI and the bit "Q" (queued)
> >>>> means a new interrupt was triggered while another was still pending.
> >>>>
> >>>> When an event is triggered, the associated interrupt state bits are
> >>>> fetched and modified and forwarded to the virtualization engine of the
> >>>> controller doing the routing. These can also be controlled by MMIO, to
> >>>> trigger events or turn off the sources for instance. See code for more
> >>>> details on the states and transitions.
> >>>>
> >>>> On a sPAPR machine, the OS will obtain the address of the MMIO page of
> >>>> the ESB entry associated with a source and its characteristic using
> >>>> the H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is
> >>>> used.
> >>>>
> >>>> The xive_source_notify() routine is in charge forwarding the source
> >>>> event notification to the routing engine. It will be filled later on.
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>> ---
> >>>>  Changes since v2:
> >>>>
> >>>>  - added support for Store EOI
> >>>>  - added support for two page MMIO setting like on KVM
> >>>
> >>> Looks generally sane to me, though I have a few queries.
[snip]
> >>>> + * The default XIVE interrupt source setting for ESB MMIO is two 64k
> >>>> + * pages without Store EOI. This is in sync with KVM.
> >>>> + */
> >>>> +static Property xive_source_properties[] = {
> >>>> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
> >>>> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
> >>>> +    DEFINE_PROP_UINT64("bar", XiveSource, esb_base, 0),
> >>>
> >>> Isn't this redundant with however the base address is handled through
> >>> the SysBusDevice stuff (I forget the details)?
> >>
> >> Storing the ESB MMIO base address under the XiveSource object is 
> >> convenient later on in the h_int_get_source_info hcall which make
> >> use of the helpers : 
> >>
> >> 	hwaddr xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
> >> 	hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
> >>
> >> But it is only used in that place. So we could just store the ESB 
> >> MMIO base address under the sPAPRXive controller. This makes some
> >> sense in the design, as we have to inform KVM of this address with
> >> a KVM device ioctl.
> > 
> > Well.. I really dislike the idea that you could change the actual MMIO
> > mapping address in one place, but other bits of code would still think
> > it was mapped somewhere else.
> 
> OK. I think that my last proposal of removing the ESB MMIO address 
> from the source and letting the owning device, the sPAPR Xive controller 
> in this case, but this is the same for PoweNV or PSI HB, handle the
> mapping goes in the direction you want to take ?  
> 
> It does looks better in the overall XIVE model and the XiveSource
> object have no need of this address.

Yes, I think that's the way to go.
diff mbox series

Patch

diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index b94af6c7c62a..c6d13e757977 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -16,4 +16,5 @@  CONFIG_VIRTIO_VGA=y
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
+CONFIG_XIVE=$(CONFIG_PSERIES)
 CONFIG_MEM_HOTPLUG=y
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 0e9963f5eecc..72a46ed91c31 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -37,6 +37,7 @@  obj-$(CONFIG_SH4) += sh_intc.o
 obj-$(CONFIG_XICS) += xics.o
 obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
+obj-$(CONFIG_XIVE) += xive.o
 obj-$(CONFIG_POWERNV) += xics_pnv.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
new file mode 100644
index 000000000000..c70578759d02
--- /dev/null
+++ b/hw/intc/xive.c
@@ -0,0 +1,335 @@ 
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/dma.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/xive.h"
+
+/*
+ * XIVE Interrupt Source
+ */
+
+uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno)
+{
+    uint32_t byte = srcno / 4;
+    uint32_t bit  = (srcno % 4) * 2;
+
+    assert(byte < xsrc->sbe_size);
+
+    return (xsrc->sbe[byte] >> bit) & 0x3;
+}
+
+uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
+{
+    uint32_t byte = srcno / 4;
+    uint32_t bit  = (srcno % 4) * 2;
+    uint8_t old, new;
+
+    assert(byte < xsrc->sbe_size);
+
+    old = xsrc->sbe[byte];
+
+    new = xsrc->sbe[byte] & ~(0x3 << bit);
+    new |= (pq & 0x3) << bit;
+
+    xsrc->sbe[byte] = new;
+
+    return (old >> bit) & 0x3;
+}
+
+static bool xive_source_pq_eoi(XiveSource *xsrc, uint32_t srcno)
+{
+    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
+
+    switch (old_pq) {
+    case XIVE_ESB_RESET:
+        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
+        return false;
+    case XIVE_ESB_PENDING:
+        xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET);
+        return false;
+    case XIVE_ESB_QUEUED:
+        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
+        return true;
+    case XIVE_ESB_OFF:
+        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
+        return false;
+    default:
+         g_assert_not_reached();
+    }
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
+{
+    uint8_t old_pq = xive_source_pq_get(xsrc, srcno);
+
+    switch (old_pq) {
+    case XIVE_ESB_RESET:
+        xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING);
+        return true;
+    case XIVE_ESB_PENDING:
+        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
+        return false;
+    case XIVE_ESB_QUEUED:
+        xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED);
+        return false;
+    case XIVE_ESB_OFF:
+        xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF);
+        return false;
+    default:
+         g_assert_not_reached();
+    }
+}
+
+/*
+ * Forward the source event notification to the associated XiveFabric,
+ * the device owning the sources.
+ */
+static void xive_source_notify(XiveSource *xsrc, int srcno)
+{
+
+}
+
+/* In a two pages ESB MMIO setting, even page is the trigger page, odd
+ * page is for management */
+static inline bool xive_source_is_trigger_page(hwaddr addr)
+{
+    return !((addr >> 16) & 1);
+}
+
+static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    uint32_t offset = addr & 0xF00;
+    uint32_t srcno = addr >> xsrc->esb_shift;
+    uint64_t ret = -1;
+
+    if (xive_source_esb_2page(xsrc) && xive_source_is_trigger_page(addr)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "XIVE: invalid load on IRQ %d trigger page at "
+                      "0x%"HWADDR_PRIx"\n", srcno, addr);
+        return -1;
+    }
+
+    switch (offset) {
+    case XIVE_ESB_LOAD_EOI:
+        /*
+         * Load EOI is not the default source setting under QEMU, but
+         * this is what HW uses currently.
+         */
+        ret = xive_source_pq_eoi(xsrc, srcno);
+
+        break;
+
+    case XIVE_ESB_GET:
+        ret = xive_source_pq_get(xsrc, srcno);
+        break;
+
+    case XIVE_ESB_SET_PQ_00:
+    case XIVE_ESB_SET_PQ_01:
+    case XIVE_ESB_SET_PQ_10:
+    case XIVE_ESB_SET_PQ_11:
+        ret = xive_source_pq_set(xsrc, srcno, (offset >> 8) & 0x3);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB addr %d\n", offset);
+    }
+
+    return ret;
+}
+
+static void xive_source_esb_write(void *opaque, hwaddr addr,
+                                 uint64_t value, unsigned size)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    uint32_t offset = addr & 0xF00;
+    uint32_t srcno = addr >> xsrc->esb_shift;
+    bool notify = false;
+
+    switch (offset) {
+    case 0:
+        notify = xive_source_pq_trigger(xsrc, srcno);
+        break;
+
+    case XIVE_ESB_STORE_EOI:
+        if (xive_source_is_trigger_page(addr)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "XIVE: invalid store on IRQ %d trigger page at "
+                          "0x%"HWADDR_PRIx"\n", srcno, addr);
+            return;
+        }
+
+        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
+            return;
+        }
+
+        /* If the Q bit is set, we should forward a new source event
+         * notification
+         */
+        notify = xive_source_pq_eoi(xsrc, srcno);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %d\n",
+                      offset);
+        return;
+    }
+
+    /* Forward the source event notification for routing */
+    if (notify) {
+        xive_source_notify(xsrc, srcno);
+    }
+}
+
+static const MemoryRegionOps xive_source_esb_ops = {
+    .read = xive_source_esb_read,
+    .write = xive_source_esb_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+};
+
+static void xive_source_set_irq(void *opaque, int srcno, int val)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    bool notify = false;
+
+    if (val) {
+        notify = xive_source_pq_trigger(xsrc, srcno);
+    }
+
+    /* Forward the source event notification for routing */
+    if (notify) {
+        xive_source_notify(xsrc, srcno);
+    }
+}
+
+void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon)
+{
+    int i;
+
+    monitor_printf(mon, "XIVE Source %6x ..%6x\n",
+                   xsrc->offset, xsrc->offset + xsrc->nr_irqs - 1);
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        uint8_t pq = xive_source_pq_get(xsrc, i);
+        uint32_t lisn = i  + xsrc->offset;
+
+        if (pq == XIVE_ESB_OFF) {
+            continue;
+        }
+
+        monitor_printf(mon, "  %4x %c%c\n", lisn,
+                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
+                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
+    }
+}
+
+static void xive_source_reset(DeviceState *dev)
+{
+    XiveSource *xsrc = XIVE_SOURCE(dev);
+
+    /* SBEs are initialized to 0b01 which corresponds to "ints off" */
+    memset(xsrc->sbe, 0x55, xsrc->sbe_size);
+}
+
+static void xive_source_realize(DeviceState *dev, Error **errp)
+{
+    XiveSource *xsrc = XIVE_SOURCE(dev);
+
+    if (!xsrc->nr_irqs) {
+        error_setg(errp, "Number of interrupt needs to be greater than 0");
+        return;
+    }
+
+    if (xsrc->esb_shift != XIVE_ESB_4K &&
+        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
+        xsrc->esb_shift != XIVE_ESB_64K &&
+        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
+        error_setg(errp, "Invalid ESB shift setting");
+        return;
+    }
+
+    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
+                                     xsrc->nr_irqs);
+
+    /* Allocate the SBEs (State Bit Entry). 2 bits, so 4 entries per byte */
+    xsrc->sbe_size = DIV_ROUND_UP(xsrc->nr_irqs, 4);
+    xsrc->sbe = g_malloc0(xsrc->sbe_size);
+
+    /* TODO: H_INT_ESB support, which removing the ESB MMIOs */
+
+    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+                          &xive_source_esb_ops, xsrc, "xive.esb",
+                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
+}
+
+static const VMStateDescription vmstate_xive_source = {
+    .name = TYPE_XIVE_SOURCE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
+        VMSTATE_VBUFFER_UINT32(sbe, XiveSource, 1, NULL, sbe_size),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+/*
+ * The default XIVE interrupt source setting for ESB MMIO is two 64k
+ * pages without Store EOI. This is in sync with KVM.
+ */
+static Property xive_source_properties[] = {
+    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
+    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
+    DEFINE_PROP_UINT64("bar", XiveSource, esb_base, 0),
+    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_source_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = xive_source_realize;
+    dc->reset = xive_source_reset;
+    dc->props = xive_source_properties;
+    dc->desc = "XIVE interrupt source";
+    dc->vmsd = &vmstate_xive_source;
+}
+
+static const TypeInfo xive_source_info = {
+    .name          = TYPE_XIVE_SOURCE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XiveSource),
+    .class_init    = xive_source_class_init,
+};
+
+static void xive_register_types(void)
+{
+    type_register_static(&xive_source_info);
+}
+
+type_init(xive_register_types)
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
new file mode 100644
index 000000000000..d92a50519edf
--- /dev/null
+++ b/include/hw/ppc/xive.h
@@ -0,0 +1,130 @@ 
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_XIVE_H
+#define PPC_XIVE_H
+
+#include "hw/sysbus.h"
+
+/*
+ * XIVE Interrupt Source
+ */
+
+#define TYPE_XIVE_SOURCE "xive-source"
+#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
+
+/*
+ * XIVE Source Interrupt source characteristics, which define how the
+ * ESB are controlled.
+ */
+#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
+#define XIVE_SRC_STORE_EOI     0x4 /* Store EOI supported */
+
+typedef struct XiveSource {
+    SysBusDevice parent;
+
+    /* IRQs */
+    uint32_t     nr_irqs;
+    uint32_t     offset;
+    qemu_irq     *qirqs;
+
+    /* PQ bits */
+    uint8_t      *sbe;
+    uint32_t     sbe_size;
+
+    /* ESB memory region */
+    uint64_t     esb_flags;
+    hwaddr       esb_base;
+    uint32_t     esb_shift;
+    MemoryRegion esb_mmio;
+} XiveSource;
+
+/*
+ * ESB MMIO setting. Can be one page, for both source triggering and
+ * source management, or two different pages. See below for magic
+ * values.
+ */
+#define XIVE_ESB_4K          12 /* PSI HB */
+#define XIVE_ESB_4K_2PAGE    17
+#define XIVE_ESB_64K         16
+#define XIVE_ESB_64K_2PAGE   17
+
+static inline bool xive_source_esb_2page(XiveSource *xsrc)
+{
+    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE;
+}
+
+static inline hwaddr xive_source_esb_base(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    return xsrc->esb_base + (1ull << xsrc->esb_shift) * srcno;
+}
+
+/* The trigger page is always the first/even page */
+#define xive_source_esb_trigger xive_source_esb_base
+
+/* In a two pages ESB MMIO setting, the odd page is for management */
+static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
+{
+    hwaddr addr = xive_source_esb_base(xsrc, srcno);
+
+    if (xive_source_esb_2page(xsrc)) {
+        addr += (1 << (xsrc->esb_shift - 1));
+    }
+
+    return addr;
+}
+
+/*
+ * Each interrupt source has a 2-bit state machine called ESB which
+ * can be controlled by MMIO. It's made of 2 bits, P and Q. P
+ * indicates that an interrupt is pending (has been sent to a queue
+ * and is waiting for an EOI). Q indicates that the interrupt has been
+ * triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ */
+#define XIVE_ESB_VAL_P        0x2
+#define XIVE_ESB_VAL_Q        0x1
+
+#define XIVE_ESB_RESET        0x0
+#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
+#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
+#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
+
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI      0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
+#define XIVE_ESB_GET            0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
+
+uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno);
+uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
+
+void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon);
+
+#endif /* PPC_XIVE_H */