diff mbox series

[v3,04/35] spapr/xive: introduce a XIVE interrupt controller for sPAPR

Message ID 20180419124331.3915-5-clg@kaod.org
State New
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater April 19, 2018, 12:43 p.m. UTC
sPAPRXive is a model for the XIVE interrupt controller device of the
sPAPR machine. It holds the routing XIVE table, the Interrupt
Virtualization Entry (IVE) table which associates interrupt source
numbers with targets.

Also extend the XiveFabric with an accessor to the IVT. This will be
needed by the routing algorithm.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---

 May be should introduce a XiveRouter model to hold the IVT. To be
 discussed.

 Changes since v2 :

 - introduced the XiveFabric interface

 default-configs/ppc64-softmmu.mak |   1 +
 hw/intc/Makefile.objs             |   1 +
 hw/intc/spapr_xive.c              | 159 ++++++++++++++++++++++++++++++++++++++
 hw/intc/xive.c                    |   7 ++
 include/hw/ppc/spapr_xive.h       |  31 ++++++++
 include/hw/ppc/xive.h             |   5 ++
 include/hw/ppc/xive_regs.h        |  33 ++++++++
 7 files changed, 237 insertions(+)
 create mode 100644 hw/intc/spapr_xive.c
 create mode 100644 include/hw/ppc/spapr_xive.h
 create mode 100644 include/hw/ppc/xive_regs.h

Comments

David Gibson April 24, 2018, 6:51 a.m. UTC | #1
On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
> sPAPRXive is a model for the XIVE interrupt controller device of the
> sPAPR machine. It holds the routing XIVE table, the Interrupt
> Virtualization Entry (IVE) table which associates interrupt source
> numbers with targets.
> 
> Also extend the XiveFabric with an accessor to the IVT. This will be
> needed by the routing algorithm.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
> 
>  May be should introduce a XiveRouter model to hold the IVT. To be
>  discussed.

Yeah, maybe.  Am I correct in thinking that on pnv there could be more
than one XiveRouter?

If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
interface, possibly its methods could just be class methods of
XiveRouter.

> 
>  Changes since v2 :
> 
>  - introduced the XiveFabric interface
> 
>  default-configs/ppc64-softmmu.mak |   1 +
>  hw/intc/Makefile.objs             |   1 +
>  hw/intc/spapr_xive.c              | 159 ++++++++++++++++++++++++++++++++++++++
>  hw/intc/xive.c                    |   7 ++
>  include/hw/ppc/spapr_xive.h       |  31 ++++++++
>  include/hw/ppc/xive.h             |   5 ++
>  include/hw/ppc/xive_regs.h        |  33 ++++++++
>  7 files changed, 237 insertions(+)
>  create mode 100644 hw/intc/spapr_xive.c
>  create mode 100644 include/hw/ppc/spapr_xive.h
>  create mode 100644 include/hw/ppc/xive_regs.h
> 
> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> index c6d13e757977..f8d34722931d 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -17,4 +17,5 @@ CONFIG_XICS=$(CONFIG_PSERIES)
>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
>  CONFIG_XIVE=$(CONFIG_PSERIES)
> +CONFIG_XIVE_SPAPR=$(CONFIG_PSERIES)
>  CONFIG_MEM_HOTPLUG=y
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index 72a46ed91c31..301a8e972d91 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -38,6 +38,7 @@ obj-$(CONFIG_XICS) += xics.o
>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>  obj-$(CONFIG_XIVE) += xive.o
> +obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> new file mode 100644
> index 000000000000..020444e2665a
> --- /dev/null
> +++ b/hw/intc/spapr_xive.c
> @@ -0,0 +1,159 @@
> +/*
> + * QEMU PowerPC sPAPR XIVE interrupt controller model
> + *
> + * Copyright (c) 2017-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "qapi/error.h"
> +#include "target/ppc/cpu.h"
> +#include "sysemu/cpus.h"
> +#include "monitor/monitor.h"
> +#include "hw/ppc/spapr_xive.h"
> +#include "hw/ppc/xive_regs.h"
> +
> +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
> +{
> +    int i;
> +
> +    monitor_printf(mon, "IVE Table\n");
> +    for (i = 0; i < xive->nr_irqs; i++) {
> +        XiveIVE *ive = &xive->ivt[i];
> +
> +        if (!(ive->w & IVE_VALID)) {
> +            continue;
> +        }
> +
> +        monitor_printf(mon, "  %4x %s %08x %08x\n", i,
> +                       ive->w & IVE_MASKED ? "M" : " ",
> +                       (int) GETFIELD(IVE_EQ_INDEX, ive->w),
> +                       (int) GETFIELD(IVE_EQ_DATA, ive->w));
> +    }
> +}
> +
> +static void spapr_xive_reset(DeviceState *dev)
> +{
> +    sPAPRXive *xive = SPAPR_XIVE(dev);
> +    int i;
> +
> +    /* Mask all valid IVEs in the IRQ number space. */
> +    for (i = 0; i < xive->nr_irqs; i++) {
> +        XiveIVE *ive = &xive->ivt[i];
> +        if (ive->w & IVE_VALID) {
> +            ive->w |= IVE_MASKED;
> +        }
> +    }
> +}
> +
> +static void spapr_xive_init(Object *obj)

I'm trying to standardize on init_instance methods being called
*_instance_init().  It helps to make it obvious that this is ineed an
instance_init() method, rather than one of the various other init
calls that exist in various places.

> +{
> +
> +}
> +
> +static void spapr_xive_realize(DeviceState *dev, Error **errp)
> +{
> +    sPAPRXive *xive = SPAPR_XIVE(dev);
> +
> +    if (!xive->nr_irqs) {
> +        error_setg(errp, "Number of interrupt needs to be greater 0");
> +        return;
> +    }
> +
> +    /* Allocate the Interrupt Virtualization Table */
> +    xive->ivt = g_new0(XiveIVE, xive->nr_irqs);
> +}
> +
> +static XiveIVE *spapr_xive_get_ive(XiveFabric *xf, uint32_t lisn)
> +{
> +    sPAPRXive *xive = SPAPR_XIVE(xf);
> +
> +    return lisn < xive->nr_irqs ? &xive->ivt[lisn] : NULL;
> +}
> +
> +static const VMStateDescription vmstate_spapr_xive_ive = {
> +    .name = TYPE_SPAPR_XIVE "/ive",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField []) {
> +        VMSTATE_UINT64(w, XiveIVE),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
> +static const VMStateDescription vmstate_spapr_xive = {
> +    .name = TYPE_SPAPR_XIVE,
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
> +        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(ivt, sPAPRXive, nr_irqs,
> +                                     vmstate_spapr_xive_ive, XiveIVE),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
> +static Property spapr_xive_properties[] = {
> +    DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void spapr_xive_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
> +
> +    dc->realize = spapr_xive_realize;
> +    dc->reset = spapr_xive_reset;
> +    dc->props = spapr_xive_properties;
> +    dc->desc = "sPAPR XIVE interrupt controller";
> +    dc->vmsd = &vmstate_spapr_xive;
> +
> +    xfc->get_ive = spapr_xive_get_ive;
> +}
> +
> +static const TypeInfo spapr_xive_info = {
> +    .name = TYPE_SPAPR_XIVE,
> +    .parent = TYPE_SYS_BUS_DEVICE,
> +    .instance_init = spapr_xive_init,
> +    .instance_size = sizeof(sPAPRXive),
> +    .class_init = spapr_xive_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +            { TYPE_XIVE_FABRIC },
> +            { },
> +    },
> +};
> +
> +static void spapr_xive_register_types(void)
> +{
> +    type_register_static(&spapr_xive_info);
> +}
> +
> +type_init(spapr_xive_register_types)
> +
> +bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi)
> +{
> +    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
> +
> +    if (!ive) {
> +        return false;
> +    }
> +
> +    ive->w |= IVE_VALID;
> +    return true;
> +}
> +
> +bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn)
> +{
> +    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
> +
> +    if (!ive) {
> +        return false;
> +    }
> +
> +    ive->w &= ~IVE_VALID;
> +    return true;
> +}
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index b4c3d06c1219..dccad0318834 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -20,6 +20,13 @@
>   * XIVE Fabric
>   */
>  
> +XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn)
> +{
> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xf);
> +
> +    return xfc->get_ive(xf, lisn);
> +}
> +
>  static void xive_fabric_route(XiveFabric *xf, int lisn)
>  {
>  
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> new file mode 100644
> index 000000000000..1d966b5d3a96
> --- /dev/null
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -0,0 +1,31 @@
> +/*
> + * QEMU PowerPC sPAPR XIVE interrupt controller model
> + *
> + * Copyright (c) 2017-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#ifndef PPC_SPAPR_XIVE_H
> +#define PPC_SPAPR_XIVE_H
> +
> +#include "hw/sysbus.h"
> +#include "hw/ppc/xive.h"
> +
> +#define TYPE_SPAPR_XIVE "spapr-xive"
> +#define SPAPR_XIVE(obj) OBJECT_CHECK(sPAPRXive, (obj), TYPE_SPAPR_XIVE)
> +
> +typedef struct sPAPRXive {
> +    SysBusDevice parent;
> +
> +    /* Routing table */
> +    XiveIVE      *ivt;
> +    uint32_t     nr_irqs;
> +} sPAPRXive;
> +
> +bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
> +bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
> +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> +
> +#endif /* PPC_SPAPR_XIVE_H */
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 4fcae2c763e6..5b145816acdc 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -11,6 +11,7 @@
>  #define PPC_XIVE_H
>  
>  #include "hw/sysbus.h"
> +#include "hw/ppc/xive_regs.h"
>  
>  typedef struct XiveFabric XiveFabric;
>  
> @@ -166,6 +167,10 @@ typedef struct XiveFabric {
>  typedef struct XiveFabricClass {
>      InterfaceClass parent;
>      void (*notify)(XiveFabric *xf, uint32_t lisn);
> +
> +    XiveIVE *(*get_ive)(XiveFabric *xf, uint32_t lisn);
>  } XiveFabricClass;
>  
> +XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn);
> +
>  #endif /* PPC_XIVE_H */
> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
> new file mode 100644
> index 000000000000..5903f29eb789
> --- /dev/null
> +++ b/include/hw/ppc/xive_regs.h
> @@ -0,0 +1,33 @@
> +/*
> + * QEMU PowerPC XIVE interrupt controller model
> + *
> + * Copyright (c) 2016-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#ifndef _PPC_XIVE_REGS_H
> +#define _PPC_XIVE_REGS_H
> +
> +/* IVE/EAS
> + *
> + * One per interrupt source. Targets that interrupt to a given EQ
> + * and provides the corresponding logical interrupt number (EQ data)
> + *
> + * We also map this structure to the escalation descriptor inside
> + * an EQ, though in that case the valid and masked bits are not used.
> + */
> +typedef struct XiveIVE {
> +        /* Use a single 64-bit definition to make it easier to
> +         * perform atomic updates
> +         */
> +        uint64_t        w;
> +#define IVE_VALID       PPC_BIT(0)
> +#define IVE_EQ_BLOCK    PPC_BITMASK(4, 7)        /* Destination EQ block# */
> +#define IVE_EQ_INDEX    PPC_BITMASK(8, 31)       /* Destination EQ index */
> +#define IVE_MASKED      PPC_BIT(32)              /* Masked */
> +#define IVE_EQ_DATA     PPC_BITMASK(33, 63)      /* Data written to the EQ */
> +} XiveIVE;
> +
> +#endif /* _INTC_XIVE_INTERNAL_H */
Cédric Le Goater April 24, 2018, 9:46 a.m. UTC | #2
On 04/24/2018 08:51 AM, David Gibson wrote:
> On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
>> sPAPRXive is a model for the XIVE interrupt controller device of the
>> sPAPR machine. It holds the routing XIVE table, the Interrupt
>> Virtualization Entry (IVE) table which associates interrupt source
>> numbers with targets.
>>
>> Also extend the XiveFabric with an accessor to the IVT. This will be
>> needed by the routing algorithm.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>
>>  May be should introduce a XiveRouter model to hold the IVT. To be
>>  discussed.
> 
> Yeah, maybe.  Am I correct in thinking that on pnv there could be more
> than one XiveRouter?

There is only one, the main IC. 

> If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
> interface, possibly its methods could just be class methods of
> XiveRouter.

Yes. We could introduce a XiveRouter to share the ivt table between 
the sPAPRXive and the PnvXIVE models, the interrupt controllers of
the machines. Methods would provide way to get the ivt/eq/nvt
objects required for routing. I need to add a set_eq() to push the
EQ data.

The XiveRouter would also be a XiveFabric (or some other name) to 
let the internal sources of the interrupt controller forward events.

>>
>>  Changes since v2 :
>>
>>  - introduced the XiveFabric interface
>>
>>  default-configs/ppc64-softmmu.mak |   1 +
>>  hw/intc/Makefile.objs             |   1 +
>>  hw/intc/spapr_xive.c              | 159 ++++++++++++++++++++++++++++++++++++++
>>  hw/intc/xive.c                    |   7 ++
>>  include/hw/ppc/spapr_xive.h       |  31 ++++++++
>>  include/hw/ppc/xive.h             |   5 ++
>>  include/hw/ppc/xive_regs.h        |  33 ++++++++
>>  7 files changed, 237 insertions(+)
>>  create mode 100644 hw/intc/spapr_xive.c
>>  create mode 100644 include/hw/ppc/spapr_xive.h
>>  create mode 100644 include/hw/ppc/xive_regs.h
>>
>> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
>> index c6d13e757977..f8d34722931d 100644
>> --- a/default-configs/ppc64-softmmu.mak
>> +++ b/default-configs/ppc64-softmmu.mak
>> @@ -17,4 +17,5 @@ CONFIG_XICS=$(CONFIG_PSERIES)
>>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
>>  CONFIG_XIVE=$(CONFIG_PSERIES)
>> +CONFIG_XIVE_SPAPR=$(CONFIG_PSERIES)
>>  CONFIG_MEM_HOTPLUG=y
>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>> index 72a46ed91c31..301a8e972d91 100644
>> --- a/hw/intc/Makefile.objs
>> +++ b/hw/intc/Makefile.objs
>> @@ -38,6 +38,7 @@ obj-$(CONFIG_XICS) += xics.o
>>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>>  obj-$(CONFIG_XIVE) += xive.o
>> +obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
>>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>> new file mode 100644
>> index 000000000000..020444e2665a
>> --- /dev/null
>> +++ b/hw/intc/spapr_xive.c
>> @@ -0,0 +1,159 @@
>> +/*
>> + * QEMU PowerPC sPAPR XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2017-2018, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/log.h"
>> +#include "qapi/error.h"
>> +#include "target/ppc/cpu.h"
>> +#include "sysemu/cpus.h"
>> +#include "monitor/monitor.h"
>> +#include "hw/ppc/spapr_xive.h"
>> +#include "hw/ppc/xive_regs.h"
>> +
>> +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>> +{
>> +    int i;
>> +
>> +    monitor_printf(mon, "IVE Table\n");
>> +    for (i = 0; i < xive->nr_irqs; i++) {
>> +        XiveIVE *ive = &xive->ivt[i];
>> +
>> +        if (!(ive->w & IVE_VALID)) {
>> +            continue;
>> +        }
>> +
>> +        monitor_printf(mon, "  %4x %s %08x %08x\n", i,
>> +                       ive->w & IVE_MASKED ? "M" : " ",
>> +                       (int) GETFIELD(IVE_EQ_INDEX, ive->w),
>> +                       (int) GETFIELD(IVE_EQ_DATA, ive->w));
>> +    }
>> +}
>> +
>> +static void spapr_xive_reset(DeviceState *dev)
>> +{
>> +    sPAPRXive *xive = SPAPR_XIVE(dev);
>> +    int i;
>> +
>> +    /* Mask all valid IVEs in the IRQ number space. */
>> +    for (i = 0; i < xive->nr_irqs; i++) {
>> +        XiveIVE *ive = &xive->ivt[i];
>> +        if (ive->w & IVE_VALID) {
>> +            ive->w |= IVE_MASKED;
>> +        }
>> +    }
>> +}
>> +
>> +static void spapr_xive_init(Object *obj)
> 
> I'm trying to standardize on init_instance methods being called
> *_instance_init().  It helps to make it obvious that this is ineed an
> instance_init() method, rather than one of the various other init
> calls that exist in various places.

ok. this is good practice. I will fix.

Thanks,

C.

> 
>> +{
>> +
>> +}
>> +
>> +static void spapr_xive_realize(DeviceState *dev, Error **errp)
>> +{
>> +    sPAPRXive *xive = SPAPR_XIVE(dev);
>> +
>> +    if (!xive->nr_irqs) {
>> +        error_setg(errp, "Number of interrupt needs to be greater 0");
>> +        return;
>> +    }
>> +
>> +    /* Allocate the Interrupt Virtualization Table */
>> +    xive->ivt = g_new0(XiveIVE, xive->nr_irqs);
>> +}
>> +
>> +static XiveIVE *spapr_xive_get_ive(XiveFabric *xf, uint32_t lisn)
>> +{
>> +    sPAPRXive *xive = SPAPR_XIVE(xf);
>> +
>> +    return lisn < xive->nr_irqs ? &xive->ivt[lisn] : NULL;
>> +}
>> +
>> +static const VMStateDescription vmstate_spapr_xive_ive = {
>> +    .name = TYPE_SPAPR_XIVE "/ive",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField []) {
>> +        VMSTATE_UINT64(w, XiveIVE),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>> +static const VMStateDescription vmstate_spapr_xive = {
>> +    .name = TYPE_SPAPR_XIVE,
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
>> +        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(ivt, sPAPRXive, nr_irqs,
>> +                                     vmstate_spapr_xive_ive, XiveIVE),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>> +static Property spapr_xive_properties[] = {
>> +    DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void spapr_xive_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
>> +
>> +    dc->realize = spapr_xive_realize;
>> +    dc->reset = spapr_xive_reset;
>> +    dc->props = spapr_xive_properties;
>> +    dc->desc = "sPAPR XIVE interrupt controller";
>> +    dc->vmsd = &vmstate_spapr_xive;
>> +
>> +    xfc->get_ive = spapr_xive_get_ive;
>> +}
>> +
>> +static const TypeInfo spapr_xive_info = {
>> +    .name = TYPE_SPAPR_XIVE,
>> +    .parent = TYPE_SYS_BUS_DEVICE,
>> +    .instance_init = spapr_xive_init,
>> +    .instance_size = sizeof(sPAPRXive),
>> +    .class_init = spapr_xive_class_init,
>> +    .interfaces = (InterfaceInfo[]) {
>> +            { TYPE_XIVE_FABRIC },
>> +            { },
>> +    },
>> +};
>> +
>> +static void spapr_xive_register_types(void)
>> +{
>> +    type_register_static(&spapr_xive_info);
>> +}
>> +
>> +type_init(spapr_xive_register_types)
>> +
>> +bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi)
>> +{
>> +    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
>> +
>> +    if (!ive) {
>> +        return false;
>> +    }
>> +
>> +    ive->w |= IVE_VALID;
>> +    return true;
>> +}
>> +
>> +bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn)
>> +{
>> +    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
>> +
>> +    if (!ive) {
>> +        return false;
>> +    }
>> +
>> +    ive->w &= ~IVE_VALID;
>> +    return true;
>> +}
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index b4c3d06c1219..dccad0318834 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -20,6 +20,13 @@
>>   * XIVE Fabric
>>   */
>>  
>> +XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn)
>> +{
>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xf);
>> +
>> +    return xfc->get_ive(xf, lisn);
>> +}
>> +
>>  static void xive_fabric_route(XiveFabric *xf, int lisn)
>>  {
>>  
>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>> new file mode 100644
>> index 000000000000..1d966b5d3a96
>> --- /dev/null
>> +++ b/include/hw/ppc/spapr_xive.h
>> @@ -0,0 +1,31 @@
>> +/*
>> + * QEMU PowerPC sPAPR XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2017-2018, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef PPC_SPAPR_XIVE_H
>> +#define PPC_SPAPR_XIVE_H
>> +
>> +#include "hw/sysbus.h"
>> +#include "hw/ppc/xive.h"
>> +
>> +#define TYPE_SPAPR_XIVE "spapr-xive"
>> +#define SPAPR_XIVE(obj) OBJECT_CHECK(sPAPRXive, (obj), TYPE_SPAPR_XIVE)
>> +
>> +typedef struct sPAPRXive {
>> +    SysBusDevice parent;
>> +
>> +    /* Routing table */
>> +    XiveIVE      *ivt;
>> +    uint32_t     nr_irqs;
>> +} sPAPRXive;
>> +
>> +bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
>> +bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
>> +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>> +
>> +#endif /* PPC_SPAPR_XIVE_H */
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index 4fcae2c763e6..5b145816acdc 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -11,6 +11,7 @@
>>  #define PPC_XIVE_H
>>  
>>  #include "hw/sysbus.h"
>> +#include "hw/ppc/xive_regs.h"
>>  
>>  typedef struct XiveFabric XiveFabric;
>>  
>> @@ -166,6 +167,10 @@ typedef struct XiveFabric {
>>  typedef struct XiveFabricClass {
>>      InterfaceClass parent;
>>      void (*notify)(XiveFabric *xf, uint32_t lisn);
>> +
>> +    XiveIVE *(*get_ive)(XiveFabric *xf, uint32_t lisn);
>>  } XiveFabricClass;
>>  
>> +XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn);
>> +
>>  #endif /* PPC_XIVE_H */
>> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
>> new file mode 100644
>> index 000000000000..5903f29eb789
>> --- /dev/null
>> +++ b/include/hw/ppc/xive_regs.h
>> @@ -0,0 +1,33 @@
>> +/*
>> + * QEMU PowerPC XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2016-2018, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef _PPC_XIVE_REGS_H
>> +#define _PPC_XIVE_REGS_H
>> +
>> +/* IVE/EAS
>> + *
>> + * One per interrupt source. Targets that interrupt to a given EQ
>> + * and provides the corresponding logical interrupt number (EQ data)
>> + *
>> + * We also map this structure to the escalation descriptor inside
>> + * an EQ, though in that case the valid and masked bits are not used.
>> + */
>> +typedef struct XiveIVE {
>> +        /* Use a single 64-bit definition to make it easier to
>> +         * perform atomic updates
>> +         */
>> +        uint64_t        w;
>> +#define IVE_VALID       PPC_BIT(0)
>> +#define IVE_EQ_BLOCK    PPC_BITMASK(4, 7)        /* Destination EQ block# */
>> +#define IVE_EQ_INDEX    PPC_BITMASK(8, 31)       /* Destination EQ index */
>> +#define IVE_MASKED      PPC_BIT(32)              /* Masked */
>> +#define IVE_EQ_DATA     PPC_BITMASK(33, 63)      /* Data written to the EQ */
>> +} XiveIVE;
>> +
>> +#endif /* _INTC_XIVE_INTERNAL_H */
>
David Gibson April 26, 2018, 4:20 a.m. UTC | #3
On Tue, Apr 24, 2018 at 11:46:04AM +0200, Cédric Le Goater wrote:
> On 04/24/2018 08:51 AM, David Gibson wrote:
> > On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
> >> sPAPRXive is a model for the XIVE interrupt controller device of the
> >> sPAPR machine. It holds the routing XIVE table, the Interrupt
> >> Virtualization Entry (IVE) table which associates interrupt source
> >> numbers with targets.
> >>
> >> Also extend the XiveFabric with an accessor to the IVT. This will be
> >> needed by the routing algorithm.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>
> >>  May be should introduce a XiveRouter model to hold the IVT. To be
> >>  discussed.
> > 
> > Yeah, maybe.  Am I correct in thinking that on pnv there could be more
> > than one XiveRouter?
> 
> There is only one, the main IC. 

Ok, that's what I thought originally.  In that case some of the stuff
in the patches really doesn't make sense to me.

> > If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
> > interface, possibly its methods could just be class methods of
> > XiveRouter.
> 
> Yes. We could introduce a XiveRouter to share the ivt table between 
> the sPAPRXive and the PnvXIVE models, the interrupt controllers of
> the machines. Methods would provide way to get the ivt/eq/nvt
> objects required for routing. I need to add a set_eq() to push the
> EQ data.

Hrm.  Well, to add some more clarity, let's say the XiveRouter is the
object which owns the IVT.  It may or may not do other stuff as well.

Now IIUC, on pnv the IVT lives in main system memory.  Under PAPR is
the IVT in guest memory, or is it outside (updated by
hypercalls/rtas)?
 
> The XiveRouter would also be a XiveFabric (or some other name) to 
> let the internal sources of the interrupt controller forward events.

The further we go here, the less sure I am that XiveFabric even makes
sense as a concept.

> 
> >>
> >>  Changes since v2 :
> >>
> >>  - introduced the XiveFabric interface
> >>
> >>  default-configs/ppc64-softmmu.mak |   1 +
> >>  hw/intc/Makefile.objs             |   1 +
> >>  hw/intc/spapr_xive.c              | 159 ++++++++++++++++++++++++++++++++++++++
> >>  hw/intc/xive.c                    |   7 ++
> >>  include/hw/ppc/spapr_xive.h       |  31 ++++++++
> >>  include/hw/ppc/xive.h             |   5 ++
> >>  include/hw/ppc/xive_regs.h        |  33 ++++++++
> >>  7 files changed, 237 insertions(+)
> >>  create mode 100644 hw/intc/spapr_xive.c
> >>  create mode 100644 include/hw/ppc/spapr_xive.h
> >>  create mode 100644 include/hw/ppc/xive_regs.h
> >>
> >> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> >> index c6d13e757977..f8d34722931d 100644
> >> --- a/default-configs/ppc64-softmmu.mak
> >> +++ b/default-configs/ppc64-softmmu.mak
> >> @@ -17,4 +17,5 @@ CONFIG_XICS=$(CONFIG_PSERIES)
> >>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
> >>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
> >>  CONFIG_XIVE=$(CONFIG_PSERIES)
> >> +CONFIG_XIVE_SPAPR=$(CONFIG_PSERIES)
> >>  CONFIG_MEM_HOTPLUG=y
> >> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> >> index 72a46ed91c31..301a8e972d91 100644
> >> --- a/hw/intc/Makefile.objs
> >> +++ b/hw/intc/Makefile.objs
> >> @@ -38,6 +38,7 @@ obj-$(CONFIG_XICS) += xics.o
> >>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
> >>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
> >>  obj-$(CONFIG_XIVE) += xive.o
> >> +obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
> >>  obj-$(CONFIG_POWERNV) += xics_pnv.o
> >>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
> >>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> >> new file mode 100644
> >> index 000000000000..020444e2665a
> >> --- /dev/null
> >> +++ b/hw/intc/spapr_xive.c
> >> @@ -0,0 +1,159 @@
> >> +/*
> >> + * QEMU PowerPC sPAPR XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2017-2018, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> >> + */
> >> +
> >> +#include "qemu/osdep.h"
> >> +#include "qemu/log.h"
> >> +#include "qapi/error.h"
> >> +#include "target/ppc/cpu.h"
> >> +#include "sysemu/cpus.h"
> >> +#include "monitor/monitor.h"
> >> +#include "hw/ppc/spapr_xive.h"
> >> +#include "hw/ppc/xive_regs.h"
> >> +
> >> +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
> >> +{
> >> +    int i;
> >> +
> >> +    monitor_printf(mon, "IVE Table\n");
> >> +    for (i = 0; i < xive->nr_irqs; i++) {
> >> +        XiveIVE *ive = &xive->ivt[i];
> >> +
> >> +        if (!(ive->w & IVE_VALID)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        monitor_printf(mon, "  %4x %s %08x %08x\n", i,
> >> +                       ive->w & IVE_MASKED ? "M" : " ",
> >> +                       (int) GETFIELD(IVE_EQ_INDEX, ive->w),
> >> +                       (int) GETFIELD(IVE_EQ_DATA, ive->w));
> >> +    }
> >> +}
> >> +
> >> +static void spapr_xive_reset(DeviceState *dev)
> >> +{
> >> +    sPAPRXive *xive = SPAPR_XIVE(dev);
> >> +    int i;
> >> +
> >> +    /* Mask all valid IVEs in the IRQ number space. */
> >> +    for (i = 0; i < xive->nr_irqs; i++) {
> >> +        XiveIVE *ive = &xive->ivt[i];
> >> +        if (ive->w & IVE_VALID) {
> >> +            ive->w |= IVE_MASKED;
> >> +        }
> >> +    }
> >> +}
> >> +
> >> +static void spapr_xive_init(Object *obj)
> > 
> > I'm trying to standardize on init_instance methods being called
> > *_instance_init().  It helps to make it obvious that this is ineed an
> > instance_init() method, rather than one of the various other init
> > calls that exist in various places.
> 
> ok. this is good practice. I will fix.
> 
> Thanks,
> 
> C.
> 
> > 
> >> +{
> >> +
> >> +}
> >> +
> >> +static void spapr_xive_realize(DeviceState *dev, Error **errp)
> >> +{
> >> +    sPAPRXive *xive = SPAPR_XIVE(dev);
> >> +
> >> +    if (!xive->nr_irqs) {
> >> +        error_setg(errp, "Number of interrupt needs to be greater 0");
> >> +        return;
> >> +    }
> >> +
> >> +    /* Allocate the Interrupt Virtualization Table */
> >> +    xive->ivt = g_new0(XiveIVE, xive->nr_irqs);
> >> +}
> >> +
> >> +static XiveIVE *spapr_xive_get_ive(XiveFabric *xf, uint32_t lisn)
> >> +{
> >> +    sPAPRXive *xive = SPAPR_XIVE(xf);
> >> +
> >> +    return lisn < xive->nr_irqs ? &xive->ivt[lisn] : NULL;
> >> +}
> >> +
> >> +static const VMStateDescription vmstate_spapr_xive_ive = {
> >> +    .name = TYPE_SPAPR_XIVE "/ive",
> >> +    .version_id = 1,
> >> +    .minimum_version_id = 1,
> >> +    .fields = (VMStateField []) {
> >> +        VMSTATE_UINT64(w, XiveIVE),
> >> +        VMSTATE_END_OF_LIST()
> >> +    },
> >> +};
> >> +
> >> +static const VMStateDescription vmstate_spapr_xive = {
> >> +    .name = TYPE_SPAPR_XIVE,
> >> +    .version_id = 1,
> >> +    .minimum_version_id = 1,
> >> +    .fields = (VMStateField[]) {
> >> +        VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
> >> +        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(ivt, sPAPRXive, nr_irqs,
> >> +                                     vmstate_spapr_xive_ive, XiveIVE),
> >> +        VMSTATE_END_OF_LIST()
> >> +    },
> >> +};
> >> +
> >> +static Property spapr_xive_properties[] = {
> >> +    DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0),
> >> +    DEFINE_PROP_END_OF_LIST(),
> >> +};
> >> +
> >> +static void spapr_xive_class_init(ObjectClass *klass, void *data)
> >> +{
> >> +    DeviceClass *dc = DEVICE_CLASS(klass);
> >> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
> >> +
> >> +    dc->realize = spapr_xive_realize;
> >> +    dc->reset = spapr_xive_reset;
> >> +    dc->props = spapr_xive_properties;
> >> +    dc->desc = "sPAPR XIVE interrupt controller";
> >> +    dc->vmsd = &vmstate_spapr_xive;
> >> +
> >> +    xfc->get_ive = spapr_xive_get_ive;
> >> +}
> >> +
> >> +static const TypeInfo spapr_xive_info = {
> >> +    .name = TYPE_SPAPR_XIVE,
> >> +    .parent = TYPE_SYS_BUS_DEVICE,
> >> +    .instance_init = spapr_xive_init,
> >> +    .instance_size = sizeof(sPAPRXive),
> >> +    .class_init = spapr_xive_class_init,
> >> +    .interfaces = (InterfaceInfo[]) {
> >> +            { TYPE_XIVE_FABRIC },
> >> +            { },
> >> +    },
> >> +};
> >> +
> >> +static void spapr_xive_register_types(void)
> >> +{
> >> +    type_register_static(&spapr_xive_info);
> >> +}
> >> +
> >> +type_init(spapr_xive_register_types)
> >> +
> >> +bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi)
> >> +{
> >> +    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
> >> +
> >> +    if (!ive) {
> >> +        return false;
> >> +    }
> >> +
> >> +    ive->w |= IVE_VALID;
> >> +    return true;
> >> +}
> >> +
> >> +bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn)
> >> +{
> >> +    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
> >> +
> >> +    if (!ive) {
> >> +        return false;
> >> +    }
> >> +
> >> +    ive->w &= ~IVE_VALID;
> >> +    return true;
> >> +}
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> index b4c3d06c1219..dccad0318834 100644
> >> --- a/hw/intc/xive.c
> >> +++ b/hw/intc/xive.c
> >> @@ -20,6 +20,13 @@
> >>   * XIVE Fabric
> >>   */
> >>  
> >> +XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn)
> >> +{
> >> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xf);
> >> +
> >> +    return xfc->get_ive(xf, lisn);
> >> +}
> >> +
> >>  static void xive_fabric_route(XiveFabric *xf, int lisn)
> >>  {
> >>  
> >> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> >> new file mode 100644
> >> index 000000000000..1d966b5d3a96
> >> --- /dev/null
> >> +++ b/include/hw/ppc/spapr_xive.h
> >> @@ -0,0 +1,31 @@
> >> +/*
> >> + * QEMU PowerPC sPAPR XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2017-2018, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> >> + */
> >> +
> >> +#ifndef PPC_SPAPR_XIVE_H
> >> +#define PPC_SPAPR_XIVE_H
> >> +
> >> +#include "hw/sysbus.h"
> >> +#include "hw/ppc/xive.h"
> >> +
> >> +#define TYPE_SPAPR_XIVE "spapr-xive"
> >> +#define SPAPR_XIVE(obj) OBJECT_CHECK(sPAPRXive, (obj), TYPE_SPAPR_XIVE)
> >> +
> >> +typedef struct sPAPRXive {
> >> +    SysBusDevice parent;
> >> +
> >> +    /* Routing table */
> >> +    XiveIVE      *ivt;
> >> +    uint32_t     nr_irqs;
> >> +} sPAPRXive;
> >> +
> >> +bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
> >> +bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
> >> +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> >> +
> >> +#endif /* PPC_SPAPR_XIVE_H */
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index 4fcae2c763e6..5b145816acdc 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -11,6 +11,7 @@
> >>  #define PPC_XIVE_H
> >>  
> >>  #include "hw/sysbus.h"
> >> +#include "hw/ppc/xive_regs.h"
> >>  
> >>  typedef struct XiveFabric XiveFabric;
> >>  
> >> @@ -166,6 +167,10 @@ typedef struct XiveFabric {
> >>  typedef struct XiveFabricClass {
> >>      InterfaceClass parent;
> >>      void (*notify)(XiveFabric *xf, uint32_t lisn);
> >> +
> >> +    XiveIVE *(*get_ive)(XiveFabric *xf, uint32_t lisn);
> >>  } XiveFabricClass;
> >>  
> >> +XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn);
> >> +
> >>  #endif /* PPC_XIVE_H */
> >> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
> >> new file mode 100644
> >> index 000000000000..5903f29eb789
> >> --- /dev/null
> >> +++ b/include/hw/ppc/xive_regs.h
> >> @@ -0,0 +1,33 @@
> >> +/*
> >> + * QEMU PowerPC XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2016-2018, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> >> + */
> >> +
> >> +#ifndef _PPC_XIVE_REGS_H
> >> +#define _PPC_XIVE_REGS_H
> >> +
> >> +/* IVE/EAS
> >> + *
> >> + * One per interrupt source. Targets that interrupt to a given EQ
> >> + * and provides the corresponding logical interrupt number (EQ data)
> >> + *
> >> + * We also map this structure to the escalation descriptor inside
> >> + * an EQ, though in that case the valid and masked bits are not used.
> >> + */
> >> +typedef struct XiveIVE {
> >> +        /* Use a single 64-bit definition to make it easier to
> >> +         * perform atomic updates
> >> +         */
> >> +        uint64_t        w;
> >> +#define IVE_VALID       PPC_BIT(0)
> >> +#define IVE_EQ_BLOCK    PPC_BITMASK(4, 7)        /* Destination EQ block# */
> >> +#define IVE_EQ_INDEX    PPC_BITMASK(8, 31)       /* Destination EQ index */
> >> +#define IVE_MASKED      PPC_BIT(32)              /* Masked */
> >> +#define IVE_EQ_DATA     PPC_BITMASK(33, 63)      /* Data written to the EQ */
> >> +} XiveIVE;
> >> +
> >> +#endif /* _INTC_XIVE_INTERNAL_H */
> > 
>
Cédric Le Goater April 26, 2018, 10:43 a.m. UTC | #4
On 04/26/2018 06:20 AM, David Gibson wrote:
> On Tue, Apr 24, 2018 at 11:46:04AM +0200, Cédric Le Goater wrote:
>> On 04/24/2018 08:51 AM, David Gibson wrote:
>>> On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
>>>> sPAPRXive is a model for the XIVE interrupt controller device of the
>>>> sPAPR machine. It holds the routing XIVE table, the Interrupt
>>>> Virtualization Entry (IVE) table which associates interrupt source
>>>> numbers with targets.
>>>>
>>>> Also extend the XiveFabric with an accessor to the IVT. This will be
>>>> needed by the routing algorithm.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>
>>>>  May be should introduce a XiveRouter model to hold the IVT. To be
>>>>  discussed.
>>>
>>> Yeah, maybe.  Am I correct in thinking that on pnv there could be more
>>> than one XiveRouter?
>>
>> There is only one, the main IC. 
> 
> Ok, that's what I thought originally.  In that case some of the stuff
> in the patches really doesn't make sense to me.

well, there is one IC per chip on powernv, but we haven't reach that part
yet.

>>> If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
>>> interface, possibly its methods could just be class methods of
>>> XiveRouter.
>>
>> Yes. We could introduce a XiveRouter to share the ivt table between 
>> the sPAPRXive and the PnvXIVE models, the interrupt controllers of
>> the machines. Methods would provide way to get the ivt/eq/nvt
>> objects required for routing. I need to add a set_eq() to push the
>> EQ data.
> 
> Hrm.  Well, to add some more clarity, let's say the XiveRouter is the
> object which owns the IVT.  

OK. that would be a model with some state and not an interface.

> It may or may not do other stuff as well.

Its only task would be to do the final event routing: get the IVE,
get the EQ, push the EQ DATA in the OS event queue, notify the CPU.

> Now IIUC, on pnv the IVT lives in main system memory.  

yes. It is allocated by skiboot in RAM and fed to the HW using some 
IC configuration registers. Then, each entry is configured with OPAL 
calls and the HW is updated using cache scrub registers. 

> Under PAPR is the IVT in guest memory, or is it outside (updated by
> hypercalls/rtas)?

Under sPAPR, the IVT is updated by the H_INT_SET_SOURCE_CONFIG hcall
which configures the targeting of an IRQ. It's not in the guest 
memory.

Behind the hood, the IVT is still configured by OPAL under KVM and 
by QEMU when kernel_irqchip=off 


>> The XiveRouter would also be a XiveFabric (or some other name) to 
>> let the internal sources of the interrupt controller forward events.
> 
> The further we go here, the less sure I am that XiveFabric even makes
> sense as a concept.

See previous email.

C.

>>
>>>>
>>>>  Changes since v2 :
>>>>
>>>>  - introduced the XiveFabric interface
>>>>
>>>>  default-configs/ppc64-softmmu.mak |   1 +
>>>>  hw/intc/Makefile.objs             |   1 +
>>>>  hw/intc/spapr_xive.c              | 159 ++++++++++++++++++++++++++++++++++++++
>>>>  hw/intc/xive.c                    |   7 ++
>>>>  include/hw/ppc/spapr_xive.h       |  31 ++++++++
>>>>  include/hw/ppc/xive.h             |   5 ++
>>>>  include/hw/ppc/xive_regs.h        |  33 ++++++++
>>>>  7 files changed, 237 insertions(+)
>>>>  create mode 100644 hw/intc/spapr_xive.c
>>>>  create mode 100644 include/hw/ppc/spapr_xive.h
>>>>  create mode 100644 include/hw/ppc/xive_regs.h
>>>>
>>>> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
>>>> index c6d13e757977..f8d34722931d 100644
>>>> --- a/default-configs/ppc64-softmmu.mak
>>>> +++ b/default-configs/ppc64-softmmu.mak
>>>> @@ -17,4 +17,5 @@ CONFIG_XICS=$(CONFIG_PSERIES)
>>>>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>>>>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
>>>>  CONFIG_XIVE=$(CONFIG_PSERIES)
>>>> +CONFIG_XIVE_SPAPR=$(CONFIG_PSERIES)
>>>>  CONFIG_MEM_HOTPLUG=y
>>>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>>>> index 72a46ed91c31..301a8e972d91 100644
>>>> --- a/hw/intc/Makefile.objs
>>>> +++ b/hw/intc/Makefile.objs
>>>> @@ -38,6 +38,7 @@ obj-$(CONFIG_XICS) += xics.o
>>>>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>>>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>>>>  obj-$(CONFIG_XIVE) += xive.o
>>>> +obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
>>>>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>>>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>>>> new file mode 100644
>>>> index 000000000000..020444e2665a
>>>> --- /dev/null
>>>> +++ b/hw/intc/spapr_xive.c
>>>> @@ -0,0 +1,159 @@
>>>> +/*
>>>> + * QEMU PowerPC sPAPR XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2017-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#include "qemu/osdep.h"
>>>> +#include "qemu/log.h"
>>>> +#include "qapi/error.h"
>>>> +#include "target/ppc/cpu.h"
>>>> +#include "sysemu/cpus.h"
>>>> +#include "monitor/monitor.h"
>>>> +#include "hw/ppc/spapr_xive.h"
>>>> +#include "hw/ppc/xive_regs.h"
>>>> +
>>>> +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    monitor_printf(mon, "IVE Table\n");
>>>> +    for (i = 0; i < xive->nr_irqs; i++) {
>>>> +        XiveIVE *ive = &xive->ivt[i];
>>>> +
>>>> +        if (!(ive->w & IVE_VALID)) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        monitor_printf(mon, "  %4x %s %08x %08x\n", i,
>>>> +                       ive->w & IVE_MASKED ? "M" : " ",
>>>> +                       (int) GETFIELD(IVE_EQ_INDEX, ive->w),
>>>> +                       (int) GETFIELD(IVE_EQ_DATA, ive->w));
>>>> +    }
>>>> +}
>>>> +
>>>> +static void spapr_xive_reset(DeviceState *dev)
>>>> +{
>>>> +    sPAPRXive *xive = SPAPR_XIVE(dev);
>>>> +    int i;
>>>> +
>>>> +    /* Mask all valid IVEs in the IRQ number space. */
>>>> +    for (i = 0; i < xive->nr_irqs; i++) {
>>>> +        XiveIVE *ive = &xive->ivt[i];
>>>> +        if (ive->w & IVE_VALID) {
>>>> +            ive->w |= IVE_MASKED;
>>>> +        }
>>>> +    }
>>>> +}
>>>> +
>>>> +static void spapr_xive_init(Object *obj)
>>>
>>> I'm trying to standardize on init_instance methods being called
>>> *_instance_init().  It helps to make it obvious that this is ineed an
>>> instance_init() method, rather than one of the various other init
>>> calls that exist in various places.
>>
>> ok. this is good practice. I will fix.
>>
>> Thanks,
>>
>> C.
>>
>>>
>>>> +{
>>>> +
>>>> +}
>>>> +
>>>> +static void spapr_xive_realize(DeviceState *dev, Error **errp)
>>>> +{
>>>> +    sPAPRXive *xive = SPAPR_XIVE(dev);
>>>> +
>>>> +    if (!xive->nr_irqs) {
>>>> +        error_setg(errp, "Number of interrupt needs to be greater 0");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* Allocate the Interrupt Virtualization Table */
>>>> +    xive->ivt = g_new0(XiveIVE, xive->nr_irqs);
>>>> +}
>>>> +
>>>> +static XiveIVE *spapr_xive_get_ive(XiveFabric *xf, uint32_t lisn)
>>>> +{
>>>> +    sPAPRXive *xive = SPAPR_XIVE(xf);
>>>> +
>>>> +    return lisn < xive->nr_irqs ? &xive->ivt[lisn] : NULL;
>>>> +}
>>>> +
>>>> +static const VMStateDescription vmstate_spapr_xive_ive = {
>>>> +    .name = TYPE_SPAPR_XIVE "/ive",
>>>> +    .version_id = 1,
>>>> +    .minimum_version_id = 1,
>>>> +    .fields = (VMStateField []) {
>>>> +        VMSTATE_UINT64(w, XiveIVE),
>>>> +        VMSTATE_END_OF_LIST()
>>>> +    },
>>>> +};
>>>> +
>>>> +static const VMStateDescription vmstate_spapr_xive = {
>>>> +    .name = TYPE_SPAPR_XIVE,
>>>> +    .version_id = 1,
>>>> +    .minimum_version_id = 1,
>>>> +    .fields = (VMStateField[]) {
>>>> +        VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
>>>> +        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(ivt, sPAPRXive, nr_irqs,
>>>> +                                     vmstate_spapr_xive_ive, XiveIVE),
>>>> +        VMSTATE_END_OF_LIST()
>>>> +    },
>>>> +};
>>>> +
>>>> +static Property spapr_xive_properties[] = {
>>>> +    DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0),
>>>> +    DEFINE_PROP_END_OF_LIST(),
>>>> +};
>>>> +
>>>> +static void spapr_xive_class_init(ObjectClass *klass, void *data)
>>>> +{
>>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
>>>> +
>>>> +    dc->realize = spapr_xive_realize;
>>>> +    dc->reset = spapr_xive_reset;
>>>> +    dc->props = spapr_xive_properties;
>>>> +    dc->desc = "sPAPR XIVE interrupt controller";
>>>> +    dc->vmsd = &vmstate_spapr_xive;
>>>> +
>>>> +    xfc->get_ive = spapr_xive_get_ive;
>>>> +}
>>>> +
>>>> +static const TypeInfo spapr_xive_info = {
>>>> +    .name = TYPE_SPAPR_XIVE,
>>>> +    .parent = TYPE_SYS_BUS_DEVICE,
>>>> +    .instance_init = spapr_xive_init,
>>>> +    .instance_size = sizeof(sPAPRXive),
>>>> +    .class_init = spapr_xive_class_init,
>>>> +    .interfaces = (InterfaceInfo[]) {
>>>> +            { TYPE_XIVE_FABRIC },
>>>> +            { },
>>>> +    },
>>>> +};
>>>> +
>>>> +static void spapr_xive_register_types(void)
>>>> +{
>>>> +    type_register_static(&spapr_xive_info);
>>>> +}
>>>> +
>>>> +type_init(spapr_xive_register_types)
>>>> +
>>>> +bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi)
>>>> +{
>>>> +    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
>>>> +
>>>> +    if (!ive) {
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    ive->w |= IVE_VALID;
>>>> +    return true;
>>>> +}
>>>> +
>>>> +bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn)
>>>> +{
>>>> +    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
>>>> +
>>>> +    if (!ive) {
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    ive->w &= ~IVE_VALID;
>>>> +    return true;
>>>> +}
>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>> index b4c3d06c1219..dccad0318834 100644
>>>> --- a/hw/intc/xive.c
>>>> +++ b/hw/intc/xive.c
>>>> @@ -20,6 +20,13 @@
>>>>   * XIVE Fabric
>>>>   */
>>>>  
>>>> +XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn)
>>>> +{
>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xf);
>>>> +
>>>> +    return xfc->get_ive(xf, lisn);
>>>> +}
>>>> +
>>>>  static void xive_fabric_route(XiveFabric *xf, int lisn)
>>>>  {
>>>>  
>>>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>>>> new file mode 100644
>>>> index 000000000000..1d966b5d3a96
>>>> --- /dev/null
>>>> +++ b/include/hw/ppc/spapr_xive.h
>>>> @@ -0,0 +1,31 @@
>>>> +/*
>>>> + * QEMU PowerPC sPAPR XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2017-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#ifndef PPC_SPAPR_XIVE_H
>>>> +#define PPC_SPAPR_XIVE_H
>>>> +
>>>> +#include "hw/sysbus.h"
>>>> +#include "hw/ppc/xive.h"
>>>> +
>>>> +#define TYPE_SPAPR_XIVE "spapr-xive"
>>>> +#define SPAPR_XIVE(obj) OBJECT_CHECK(sPAPRXive, (obj), TYPE_SPAPR_XIVE)
>>>> +
>>>> +typedef struct sPAPRXive {
>>>> +    SysBusDevice parent;
>>>> +
>>>> +    /* Routing table */
>>>> +    XiveIVE      *ivt;
>>>> +    uint32_t     nr_irqs;
>>>> +} sPAPRXive;
>>>> +
>>>> +bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
>>>> +bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
>>>> +void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>>>> +
>>>> +#endif /* PPC_SPAPR_XIVE_H */
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> index 4fcae2c763e6..5b145816acdc 100644
>>>> --- a/include/hw/ppc/xive.h
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -11,6 +11,7 @@
>>>>  #define PPC_XIVE_H
>>>>  
>>>>  #include "hw/sysbus.h"
>>>> +#include "hw/ppc/xive_regs.h"
>>>>  
>>>>  typedef struct XiveFabric XiveFabric;
>>>>  
>>>> @@ -166,6 +167,10 @@ typedef struct XiveFabric {
>>>>  typedef struct XiveFabricClass {
>>>>      InterfaceClass parent;
>>>>      void (*notify)(XiveFabric *xf, uint32_t lisn);
>>>> +
>>>> +    XiveIVE *(*get_ive)(XiveFabric *xf, uint32_t lisn);
>>>>  } XiveFabricClass;
>>>>  
>>>> +XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn);
>>>> +
>>>>  #endif /* PPC_XIVE_H */
>>>> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
>>>> new file mode 100644
>>>> index 000000000000..5903f29eb789
>>>> --- /dev/null
>>>> +++ b/include/hw/ppc/xive_regs.h
>>>> @@ -0,0 +1,33 @@
>>>> +/*
>>>> + * QEMU PowerPC XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2016-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#ifndef _PPC_XIVE_REGS_H
>>>> +#define _PPC_XIVE_REGS_H
>>>> +
>>>> +/* IVE/EAS
>>>> + *
>>>> + * One per interrupt source. Targets that interrupt to a given EQ
>>>> + * and provides the corresponding logical interrupt number (EQ data)
>>>> + *
>>>> + * We also map this structure to the escalation descriptor inside
>>>> + * an EQ, though in that case the valid and masked bits are not used.
>>>> + */
>>>> +typedef struct XiveIVE {
>>>> +        /* Use a single 64-bit definition to make it easier to
>>>> +         * perform atomic updates
>>>> +         */
>>>> +        uint64_t        w;
>>>> +#define IVE_VALID       PPC_BIT(0)
>>>> +#define IVE_EQ_BLOCK    PPC_BITMASK(4, 7)        /* Destination EQ block# */
>>>> +#define IVE_EQ_INDEX    PPC_BITMASK(8, 31)       /* Destination EQ index */
>>>> +#define IVE_MASKED      PPC_BIT(32)              /* Masked */
>>>> +#define IVE_EQ_DATA     PPC_BITMASK(33, 63)      /* Data written to the EQ */
>>>> +} XiveIVE;
>>>> +
>>>> +#endif /* _INTC_XIVE_INTERNAL_H */
>>>
>>
>
David Gibson May 3, 2018, 5:22 a.m. UTC | #5
On Thu, Apr 26, 2018 at 12:43:29PM +0200, Cédric Le Goater wrote:
> On 04/26/2018 06:20 AM, David Gibson wrote:
> > On Tue, Apr 24, 2018 at 11:46:04AM +0200, Cédric Le Goater wrote:
> >> On 04/24/2018 08:51 AM, David Gibson wrote:
> >>> On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
> >>>> sPAPRXive is a model for the XIVE interrupt controller device of the
> >>>> sPAPR machine. It holds the routing XIVE table, the Interrupt
> >>>> Virtualization Entry (IVE) table which associates interrupt source
> >>>> numbers with targets.
> >>>>
> >>>> Also extend the XiveFabric with an accessor to the IVT. This will be
> >>>> needed by the routing algorithm.
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>> ---
> >>>>
> >>>>  May be should introduce a XiveRouter model to hold the IVT. To be
> >>>>  discussed.
> >>>
> >>> Yeah, maybe.  Am I correct in thinking that on pnv there could be more
> >>> than one XiveRouter?
> >>
> >> There is only one, the main IC. 
> > 
> > Ok, that's what I thought originally.  In that case some of the stuff
> > in the patches really doesn't make sense to me.
> 
> well, there is one IC per chip on powernv, but we haven't reach that part
> yet.

Hmm.  There's some things we can delay dealing with, but I don't think
this is one of them.  I think we need to understand how multichip is
going to work in order to come up with a sane architecture.  Otherwise
I fear we'll end up with something that we either need to horribly
bastardize for multichip, or have to rework things dramatically
leading to migration nightmares.

> >>> If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
> >>> interface, possibly its methods could just be class methods of
> >>> XiveRouter.
> >>
> >> Yes. We could introduce a XiveRouter to share the ivt table between 
> >> the sPAPRXive and the PnvXIVE models, the interrupt controllers of
> >> the machines. Methods would provide way to get the ivt/eq/nvt
> >> objects required for routing. I need to add a set_eq() to push the
> >> EQ data.
> > 
> > Hrm.  Well, to add some more clarity, let's say the XiveRouter is the
> > object which owns the IVT.  
> 
> OK. that would be a model with some state and not an interface.

Yes.  For papr variant it would have the whole IVT contents as its
state.  For the powernv, just the registers telling it where to find
the IVT in RAM.

> > It may or may not do other stuff as well.
> 
> Its only task would be to do the final event routing: get the IVE,
> get the EQ, push the EQ DATA in the OS event queue, notify the CPU.

That seems like a lot of steps.  Up to push the EQ DATA, certainly.
And I guess it'll have to ping an NVT somehow, but I'm not sure it
should know about CPUs as such.

I'm not sure at this stage what should own the EQD table.  In the
multichip case is there one EQD table for every IVT?  I'm guessing
not - I figure the EQD table must be effectively global so that any
chip's router can send events to any EQ in the whole system.

> > Now IIUC, on pnv the IVT lives in main system memory.  
> 
> yes. It is allocated by skiboot in RAM and fed to the HW using some 
> IC configuration registers. Then, each entry is configured with OPAL 
> calls and the HW is updated using cache scrub registers. 

Right.  At least for the first pass we should be able to treat the
cache scrub registers as no-ops and just not cache anything in the
qemu implementation.

> > Under PAPR is the IVT in guest memory, or is it outside (updated by
> > hypercalls/rtas)?
> 
> Under sPAPR, the IVT is updated by the H_INT_SET_SOURCE_CONFIG hcall
> which configures the targeting of an IRQ. It's not in the guest 
> memory.

Right.

> Behind the hood, the IVT is still configured by OPAL under KVM and 
> by QEMU when kernel_irqchip=off

Sure.  Even with kernel_irqchip=on there's still logically a guest IVT
(or "IVT view" I guess), even if it's actual entries are stored
distributed across various places in the host's IVTs.

> >> The XiveRouter would also be a XiveFabric (or some other name) to 
> >> let the internal sources of the interrupt controller forward events.
> > 
> > The further we go here, the less sure I am that XiveFabric even makes
> > sense as a concept.
> 
> See previous email.
Cédric Le Goater May 3, 2018, 4:50 p.m. UTC | #6
On 05/03/2018 07:22 AM, David Gibson wrote:
> On Thu, Apr 26, 2018 at 12:43:29PM +0200, Cédric Le Goater wrote:
>> On 04/26/2018 06:20 AM, David Gibson wrote:
>>> On Tue, Apr 24, 2018 at 11:46:04AM +0200, Cédric Le Goater wrote:
>>>> On 04/24/2018 08:51 AM, David Gibson wrote:
>>>>> On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
>>>>>> sPAPRXive is a model for the XIVE interrupt controller device of the
>>>>>> sPAPR machine. It holds the routing XIVE table, the Interrupt
>>>>>> Virtualization Entry (IVE) table which associates interrupt source
>>>>>> numbers with targets.
>>>>>>
>>>>>> Also extend the XiveFabric with an accessor to the IVT. This will be
>>>>>> needed by the routing algorithm.
>>>>>>
>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>> ---
>>>>>>
>>>>>>  May be should introduce a XiveRouter model to hold the IVT. To be
>>>>>>  discussed.
>>>>>
>>>>> Yeah, maybe.  Am I correct in thinking that on pnv there could be more
>>>>> than one XiveRouter?
>>>>
>>>> There is only one, the main IC. 
>>>
>>> Ok, that's what I thought originally.  In that case some of the stuff
>>> in the patches really doesn't make sense to me.
>>
>> well, there is one IC per chip on powernv, but we haven't reach that part
>> yet.
> 
> Hmm.  There's some things we can delay dealing with, but I don't think
> this is one of them.  I think we need to understand how multichip is
> going to work in order to come up with a sane architecture.  Otherwise
> I fear we'll end up with something that we either need to horribly
> bastardize for multichip, or have to rework things dramatically
> leading to migration nightmares.

So, it is all controlled by MMIO, so we should be fine on that part. 
As for the internal tables, they are all configured by firmware, using
a chip identifier (block). I need to check how the remote XIVE are 
accessed. I think this is by MMIO. 

I haven't looked at multichip XIVE support but I am not too worried as 
the framework is already in place for the machine.
 
>>>>> If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
>>>>> interface, possibly its methods could just be class methods of
>>>>> XiveRouter.
>>>>
>>>> Yes. We could introduce a XiveRouter to share the ivt table between 
>>>> the sPAPRXive and the PnvXIVE models, the interrupt controllers of
>>>> the machines. Methods would provide way to get the ivt/eq/nvt
>>>> objects required for routing. I need to add a set_eq() to push the
>>>> EQ data.
>>>
>>> Hrm.  Well, to add some more clarity, let's say the XiveRouter is the
>>> object which owns the IVT.  
>>
>> OK. that would be a model with some state and not an interface.
> 
> Yes.  For papr variant it would have the whole IVT contents as its
> state.  For the powernv, just the registers telling it where to find
> the IVT in RAM.
> 
>>> It may or may not do other stuff as well.
>>
>> Its only task would be to do the final event routing: get the IVE,
>> get the EQ, push the EQ DATA in the OS event queue, notify the CPU.
> 
> That seems like a lot of steps.  Up to push the EQ DATA, certainly.
> And I guess it'll have to ping an NVT somehow, but I'm not sure it
> should know about CPUs as such.

For PowerNV, the concept could be generalized, yes. An NVT can 
contain the interrupt state of a logical server but the common 
case is baremetal without guests for QEMU and so we have a NVT 
per cpu. 

PowerNV will have some limitation but we can make it better than 
today for sure. It boots.

We can improve some of the NVT notification process, the way NVT 
are matched eventually. may be support remote engines if the
NVT is not local. I have not looked at the details.

> I'm not sure at this stage what should own the EQD table.

The EQDT is in RAM.

> In the multichip case is there one EQD table for every IVT?

There is one EQDT per chip, same for the IVT. They are in RAM, 
identified with a block ID.

>  I'm guessing
> not - I figure the EQD table must be effectively global so that any
> chip's router can send events to any EQ in the whole system.
>>>> Now IIUC, on pnv the IVT lives in main system memory.  
>>
>> yes. It is allocated by skiboot in RAM and fed to the HW using some 
>> IC configuration registers. Then, each entry is configured with OPAL 
>> calls and the HW is updated using cache scrub registers. 
> 
> Right.  At least for the first pass we should be able to treat the
> cache scrub registers as no-ops and just not cache anything in the
> qemu implementation.

The model currently supports the cache scrub registers, we need it
to update some values. It's not too complex. 


>>> Under PAPR is the IVT in guest memory, or is it outside (updated by
>>> hypercalls/rtas)?
>>
>> Under sPAPR, the IVT is updated by the H_INT_SET_SOURCE_CONFIG hcall
>> which configures the targeting of an IRQ. It's not in the guest 
>> memory.
> 
> Right.
> 
>> Behind the hood, the IVT is still configured by OPAL under KVM and 
>> by QEMU when kernel_irqchip=off
> 
> Sure.  Even with kernel_irqchip=on there's still logically a guest IVT
> (or "IVT view" I guess), even if it's actual entries are stored
> distributed across various places in the host's IVTs.

yes. The XIVE KVM device caches the info. This is used to dump the 
state without doing OPAL calls.

C. 


>>>> The XiveRouter would also be a XiveFabric (or some other name) to 
>>>> let the internal sources of the interrupt controller forward events.
>>>
>>> The further we go here, the less sure I am that XiveFabric even makes
>>> sense as a concept.
>>
>> See previous email.
>
David Gibson May 4, 2018, 3:33 a.m. UTC | #7
On Thu, May 03, 2018 at 06:50:09PM +0200, Cédric Le Goater wrote:
> On 05/03/2018 07:22 AM, David Gibson wrote:
> > On Thu, Apr 26, 2018 at 12:43:29PM +0200, Cédric Le Goater wrote:
> >> On 04/26/2018 06:20 AM, David Gibson wrote:
> >>> On Tue, Apr 24, 2018 at 11:46:04AM +0200, Cédric Le Goater wrote:
> >>>> On 04/24/2018 08:51 AM, David Gibson wrote:
> >>>>> On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
> >>>>>> sPAPRXive is a model for the XIVE interrupt controller device of the
> >>>>>> sPAPR machine. It holds the routing XIVE table, the Interrupt
> >>>>>> Virtualization Entry (IVE) table which associates interrupt source
> >>>>>> numbers with targets.
> >>>>>>
> >>>>>> Also extend the XiveFabric with an accessor to the IVT. This will be
> >>>>>> needed by the routing algorithm.
> >>>>>>
> >>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>> ---
> >>>>>>
> >>>>>>  May be should introduce a XiveRouter model to hold the IVT. To be
> >>>>>>  discussed.
> >>>>>
> >>>>> Yeah, maybe.  Am I correct in thinking that on pnv there could be more
> >>>>> than one XiveRouter?
> >>>>
> >>>> There is only one, the main IC. 
> >>>
> >>> Ok, that's what I thought originally.  In that case some of the stuff
> >>> in the patches really doesn't make sense to me.
> >>
> >> well, there is one IC per chip on powernv, but we haven't reach that part
> >> yet.
> > 
> > Hmm.  There's some things we can delay dealing with, but I don't think
> > this is one of them.  I think we need to understand how multichip is
> > going to work in order to come up with a sane architecture.  Otherwise
> > I fear we'll end up with something that we either need to horribly
> > bastardize for multichip, or have to rework things dramatically
> > leading to migration nightmares.
> 
> So, it is all controlled by MMIO, so we should be fine on that part. 
> As for the internal tables, they are all configured by firmware, using
> a chip identifier (block). I need to check how the remote XIVE are 
> accessed. I think this is by MMIO. 

Right, but for powernv we execute OPAL inside the VM, rather than
emulating its effects.  So we still need to model the actual hardware
interfaces.  OPAL hides the details from the kernel, but not from us
on the other side.

> I haven't looked at multichip XIVE support but I am not too worried as 
> the framework is already in place for the machine.
>  
> >>>>> If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
> >>>>> interface, possibly its methods could just be class methods of
> >>>>> XiveRouter.
> >>>>
> >>>> Yes. We could introduce a XiveRouter to share the ivt table between 
> >>>> the sPAPRXive and the PnvXIVE models, the interrupt controllers of
> >>>> the machines. Methods would provide way to get the ivt/eq/nvt
> >>>> objects required for routing. I need to add a set_eq() to push the
> >>>> EQ data.
> >>>
> >>> Hrm.  Well, to add some more clarity, let's say the XiveRouter is the
> >>> object which owns the IVT.  
> >>
> >> OK. that would be a model with some state and not an interface.
> > 
> > Yes.  For papr variant it would have the whole IVT contents as its
> > state.  For the powernv, just the registers telling it where to find
> > the IVT in RAM.
> > 
> >>> It may or may not do other stuff as well.
> >>
> >> Its only task would be to do the final event routing: get the IVE,
> >> get the EQ, push the EQ DATA in the OS event queue, notify the CPU.
> > 
> > That seems like a lot of steps.  Up to push the EQ DATA, certainly.
> > And I guess it'll have to ping an NVT somehow, but I'm not sure it
> > should know about CPUs as such.
> 
> For PowerNV, the concept could be generalized, yes. An NVT can 
> contain the interrupt state of a logical server but the common 
> case is baremetal without guests for QEMU and so we have a NVT 
> per cpu. 

Hmm.  We eventually want to support a kernel running guests under
qemu/powernv though, right?  So even if we don't allow it right now,
we don't want allowing that to require major surgery to our
architecture.

> PowerNV will have some limitation but we can make it better than 
> today for sure. It boots.
> 
> We can improve some of the NVT notification process, the way NVT 
> are matched eventually. may be support remote engines if the
> NVT is not local. I have not looked at the details.
> 
> > I'm not sure at this stage what should own the EQD table.
> 
> The EQDT is in RAM.

Not for spapr, it's not.  And even when it is in RAM, something needs
to own the register that gives its base address.

> > In the multichip case is there one EQD table for every IVT?
> 
> There is one EQDT per chip, same for the IVT. They are in RAM, 
> identified with a block ID.
> 
> >  I'm guessing
> > not - I figure the EQD table must be effectively global so that any
> > chip's router can send events to any EQ in the whole system.
> >>>> Now IIUC, on pnv the IVT lives in main system memory.  
> >>
> >> yes. It is allocated by skiboot in RAM and fed to the HW using some 
> >> IC configuration registers. Then, each entry is configured with OPAL 
> >> calls and the HW is updated using cache scrub registers. 
> > 
> > Right.  At least for the first pass we should be able to treat the
> > cache scrub registers as no-ops and just not cache anything in the
> > qemu implementation.
> 
> The model currently supports the cache scrub registers, we need it
> to update some values. It's not too complex.

Ok.

> >>> Under PAPR is the IVT in guest memory, or is it outside (updated by
> >>> hypercalls/rtas)?
> >>
> >> Under sPAPR, the IVT is updated by the H_INT_SET_SOURCE_CONFIG hcall
> >> which configures the targeting of an IRQ. It's not in the guest 
> >> memory.
> > 
> > Right.
> > 
> >> Behind the hood, the IVT is still configured by OPAL under KVM and 
> >> by QEMU when kernel_irqchip=off
> > 
> > Sure.  Even with kernel_irqchip=on there's still logically a guest IVT
> > (or "IVT view" I guess), even if it's actual entries are stored
> > distributed across various places in the host's IVTs.
> 
> yes. The XIVE KVM device caches the info. This is used to dump the 
> state without doing OPAL calls.
> 
> C. 
> 
> 
> >>>> The XiveRouter would also be a XiveFabric (or some other name) to 
> >>>> let the internal sources of the interrupt controller forward events.
> >>>
> >>> The further we go here, the less sure I am that XiveFabric even makes
> >>> sense as a concept.
> >>
> >> See previous email.
> > 
>
Cédric Le Goater May 4, 2018, 1:05 p.m. UTC | #8
On 05/04/2018 05:33 AM, David Gibson wrote:
> On Thu, May 03, 2018 at 06:50:09PM +0200, Cédric Le Goater wrote:
>> On 05/03/2018 07:22 AM, David Gibson wrote:
>>> On Thu, Apr 26, 2018 at 12:43:29PM +0200, Cédric Le Goater wrote:
>>>> On 04/26/2018 06:20 AM, David Gibson wrote:
>>>>> On Tue, Apr 24, 2018 at 11:46:04AM +0200, Cédric Le Goater wrote:
>>>>>> On 04/24/2018 08:51 AM, David Gibson wrote:
>>>>>>> On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
>>>>>>>> sPAPRXive is a model for the XIVE interrupt controller device of the
>>>>>>>> sPAPR machine. It holds the routing XIVE table, the Interrupt
>>>>>>>> Virtualization Entry (IVE) table which associates interrupt source
>>>>>>>> numbers with targets.
>>>>>>>>
>>>>>>>> Also extend the XiveFabric with an accessor to the IVT. This will be
>>>>>>>> needed by the routing algorithm.
>>>>>>>>
>>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>>>> ---
>>>>>>>>
>>>>>>>>  May be should introduce a XiveRouter model to hold the IVT. To be
>>>>>>>>  discussed.
>>>>>>>
>>>>>>> Yeah, maybe.  Am I correct in thinking that on pnv there could be more
>>>>>>> than one XiveRouter?
>>>>>>
>>>>>> There is only one, the main IC. 
>>>>>
>>>>> Ok, that's what I thought originally.  In that case some of the stuff
>>>>> in the patches really doesn't make sense to me.
>>>>
>>>> well, there is one IC per chip on powernv, but we haven't reach that part
>>>> yet.
>>>
>>> Hmm.  There's some things we can delay dealing with, but I don't think
>>> this is one of them.  I think we need to understand how multichip is
>>> going to work in order to come up with a sane architecture.  Otherwise
>>> I fear we'll end up with something that we either need to horribly
>>> bastardize for multichip, or have to rework things dramatically
>>> leading to migration nightmares.
>>
>> So, it is all controlled by MMIO, so we should be fine on that part. 
>> As for the internal tables, they are all configured by firmware, using
>> a chip identifier (block). I need to check how the remote XIVE are 
>> accessed. I think this is by MMIO. 
> 
> Right, but for powernv we execute OPAL inside the VM, rather than
> emulating its effects.  So we still need to model the actual hardware
> interfaces.  OPAL hides the details from the kernel, but not from us
> on the other side.

Yes. This is the case in the current model. I took a look today and
I have a few fixes for the MMIO layout for P9 chips which I will send.

As for XIVE, the model needs to be a little more  complex to support 
VSD_MODE_FORWARD tables which describe how to forward a notification
to another XIVE IC on another chip. They contain an address on which 
to load, This is another hop in the notification chain.  

>> I haven't looked at multichip XIVE support but I am not too worried as 
>> the framework is already in place for the machine.
>>  
>>>>>>> If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
>>>>>>> interface, possibly its methods could just be class methods of
>>>>>>> XiveRouter.
>>>>>>
>>>>>> Yes. We could introduce a XiveRouter to share the ivt table between 
>>>>>> the sPAPRXive and the PnvXIVE models, the interrupt controllers of
>>>>>> the machines. Methods would provide way to get the ivt/eq/nvt
>>>>>> objects required for routing. I need to add a set_eq() to push the
>>>>>> EQ data.
>>>>>
>>>>> Hrm.  Well, to add some more clarity, let's say the XiveRouter is the
>>>>> object which owns the IVT.  
>>>>
>>>> OK. that would be a model with some state and not an interface.
>>>
>>> Yes.  For papr variant it would have the whole IVT contents as its
>>> state.  For the powernv, just the registers telling it where to find
>>> the IVT in RAM.
>>>
>>>>> It may or may not do other stuff as well.
>>>>
>>>> Its only task would be to do the final event routing: get the IVE,
>>>> get the EQ, push the EQ DATA in the OS event queue, notify the CPU.
>>>
>>> That seems like a lot of steps.  Up to push the EQ DATA, certainly.
>>> And I guess it'll have to ping an NVT somehow, but I'm not sure it
>>> should know about CPUs as such.
>>
>> For PowerNV, the concept could be generalized, yes. An NVT can 
>> contain the interrupt state of a logical server but the common 
>> case is baremetal without guests for QEMU and so we have a NVT 
>> per cpu. 
> 
> Hmm.  We eventually want to support a kernel running guests under
> qemu/powernv though, right?  

arg. an emulated hypervisor ! OK let's say this is a long term goal :) 

> So even if we don't allow it right now,
> we don't want allowing that to require major surgery to our
> architecture.

That I agree on. 

>> PowerNV will have some limitation but we can make it better than 
>> today for sure. It boots.
>>
>> We can improve some of the NVT notification process, the way NVT 
>> are matched eventually. may be support remote engines if the
>> NVT is not local. I have not looked at the details.
>>
>>> I'm not sure at this stage what should own the EQD table.
>>
>> The EQDT is in RAM.
> 
> Not for spapr, it's not.  

yeah ok. It's in QEMU/KVM.

> And even when it is in RAM, something needs
> to own the register that gives its base address.

It's more complex than registers on powernv. There is a procedure
to define the XIVE tables using XIVE table descriptors which contain
their characteristics, size, indirect vs. indirect, local vs remote.
OPAL/skiboot defines all these to configure the HW, and the model
necessarily needs to support the same interface. This is the case
for a single chip.  

C.

>>> In the multichip case is there one EQD table for every IVT?
>>
>> There is one EQDT per chip, same for the IVT. They are in RAM, 
>> identified with a block ID.
>>
>>>  I'm guessing
>>> not - I figure the EQD table must be effectively global so that any
>>> chip's router can send events to any EQ in the whole system.
>>>>>> Now IIUC, on pnv the IVT lives in main system memory.  
>>>>
>>>> yes. It is allocated by skiboot in RAM and fed to the HW using some 
>>>> IC configuration registers. Then, each entry is configured with OPAL 
>>>> calls and the HW is updated using cache scrub registers. 
>>>
>>> Right.  At least for the first pass we should be able to treat the
>>> cache scrub registers as no-ops and just not cache anything in the
>>> qemu implementation.
>>
>> The model currently supports the cache scrub registers, we need it
>> to update some values. It's not too complex.
> 
> Ok.
> 
>>>>> Under PAPR is the IVT in guest memory, or is it outside (updated by
>>>>> hypercalls/rtas)?
>>>>
>>>> Under sPAPR, the IVT is updated by the H_INT_SET_SOURCE_CONFIG hcall
>>>> which configures the targeting of an IRQ. It's not in the guest 
>>>> memory.
>>>
>>> Right.
>>>
>>>> Behind the hood, the IVT is still configured by OPAL under KVM and 
>>>> by QEMU when kernel_irqchip=off
>>>
>>> Sure.  Even with kernel_irqchip=on there's still logically a guest IVT
>>> (or "IVT view" I guess), even if it's actual entries are stored
>>> distributed across various places in the host's IVTs.
>>
>> yes. The XIVE KVM device caches the info. This is used to dump the 
>> state without doing OPAL calls.
>>
>> C. 
>>
>>
>>>>>> The XiveRouter would also be a XiveFabric (or some other name) to 
>>>>>> let the internal sources of the interrupt controller forward events.
>>>>>
>>>>> The further we go here, the less sure I am that XiveFabric even makes
>>>>> sense as a concept.
>>>>
>>>> See previous email.
>>>
>>
>
David Gibson May 5, 2018, 4:26 a.m. UTC | #9
On Fri, May 04, 2018 at 03:05:08PM +0200, Cédric Le Goater wrote:
> On 05/04/2018 05:33 AM, David Gibson wrote:
> > On Thu, May 03, 2018 at 06:50:09PM +0200, Cédric Le Goater wrote:
> >> On 05/03/2018 07:22 AM, David Gibson wrote:
> >>> On Thu, Apr 26, 2018 at 12:43:29PM +0200, Cédric Le Goater wrote:
> >>>> On 04/26/2018 06:20 AM, David Gibson wrote:
> >>>>> On Tue, Apr 24, 2018 at 11:46:04AM +0200, Cédric Le Goater wrote:
> >>>>>> On 04/24/2018 08:51 AM, David Gibson wrote:
> >>>>>>> On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
> >>>>>>>> sPAPRXive is a model for the XIVE interrupt controller device of the
> >>>>>>>> sPAPR machine. It holds the routing XIVE table, the Interrupt
> >>>>>>>> Virtualization Entry (IVE) table which associates interrupt source
> >>>>>>>> numbers with targets.
> >>>>>>>>
> >>>>>>>> Also extend the XiveFabric with an accessor to the IVT. This will be
> >>>>>>>> needed by the routing algorithm.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>>>> ---
> >>>>>>>>
> >>>>>>>>  May be should introduce a XiveRouter model to hold the IVT. To be
> >>>>>>>>  discussed.
> >>>>>>>
> >>>>>>> Yeah, maybe.  Am I correct in thinking that on pnv there could be more
> >>>>>>> than one XiveRouter?
> >>>>>>
> >>>>>> There is only one, the main IC. 
> >>>>>
> >>>>> Ok, that's what I thought originally.  In that case some of the stuff
> >>>>> in the patches really doesn't make sense to me.
> >>>>
> >>>> well, there is one IC per chip on powernv, but we haven't reach that part
> >>>> yet.
> >>>
> >>> Hmm.  There's some things we can delay dealing with, but I don't think
> >>> this is one of them.  I think we need to understand how multichip is
> >>> going to work in order to come up with a sane architecture.  Otherwise
> >>> I fear we'll end up with something that we either need to horribly
> >>> bastardize for multichip, or have to rework things dramatically
> >>> leading to migration nightmares.
> >>
> >> So, it is all controlled by MMIO, so we should be fine on that part. 
> >> As for the internal tables, they are all configured by firmware, using
> >> a chip identifier (block). I need to check how the remote XIVE are 
> >> accessed. I think this is by MMIO. 
> > 
> > Right, but for powernv we execute OPAL inside the VM, rather than
> > emulating its effects.  So we still need to model the actual hardware
> > interfaces.  OPAL hides the details from the kernel, but not from us
> > on the other side.
> 
> Yes. This is the case in the current model. I took a look today and
> I have a few fixes for the MMIO layout for P9 chips which I will send.
> 
> As for XIVE, the model needs to be a little more  complex to support 
> VSD_MODE_FORWARD tables which describe how to forward a notification
> to another XIVE IC on another chip. They contain an address on which 
> to load, This is another hop in the notification chain.  

Ah, ok.  So is that mode and address configured in the (bare metal)
IVT as well?  Or is that a different piece of configuration?

> >> I haven't looked at multichip XIVE support but I am not too worried as 
> >> the framework is already in place for the machine.
> >>  
> >>>>>>> If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
> >>>>>>> interface, possibly its methods could just be class methods of
> >>>>>>> XiveRouter.
> >>>>>>
> >>>>>> Yes. We could introduce a XiveRouter to share the ivt table between 
> >>>>>> the sPAPRXive and the PnvXIVE models, the interrupt controllers of
> >>>>>> the machines. Methods would provide way to get the ivt/eq/nvt
> >>>>>> objects required for routing. I need to add a set_eq() to push the
> >>>>>> EQ data.
> >>>>>
> >>>>> Hrm.  Well, to add some more clarity, let's say the XiveRouter is the
> >>>>> object which owns the IVT.  
> >>>>
> >>>> OK. that would be a model with some state and not an interface.
> >>>
> >>> Yes.  For papr variant it would have the whole IVT contents as its
> >>> state.  For the powernv, just the registers telling it where to find
> >>> the IVT in RAM.
> >>>
> >>>>> It may or may not do other stuff as well.
> >>>>
> >>>> Its only task would be to do the final event routing: get the IVE,
> >>>> get the EQ, push the EQ DATA in the OS event queue, notify the CPU.
> >>>
> >>> That seems like a lot of steps.  Up to push the EQ DATA, certainly.
> >>> And I guess it'll have to ping an NVT somehow, but I'm not sure it
> >>> should know about CPUs as such.
> >>
> >> For PowerNV, the concept could be generalized, yes. An NVT can 
> >> contain the interrupt state of a logical server but the common 
> >> case is baremetal without guests for QEMU and so we have a NVT 
> >> per cpu. 
> > 
> > Hmm.  We eventually want to support a kernel running guests under
> > qemu/powernv though, right?  
> 
> arg. an emulated hypervisor ! OK let's say this is a long term goal :) 
> 
> > So even if we don't allow it right now,
> > we don't want allowing that to require major surgery to our
> > architecture.
> 
> That I agree on. 
> 
> >> PowerNV will have some limitation but we can make it better than 
> >> today for sure. It boots.
> >>
> >> We can improve some of the NVT notification process, the way NVT 
> >> are matched eventually. may be support remote engines if the
> >> NVT is not local. I have not looked at the details.
> >>
> >>> I'm not sure at this stage what should own the EQD table.
> >>
> >> The EQDT is in RAM.
> > 
> > Not for spapr, it's not.  
> 
> yeah ok. It's in QEMU/KVM.
> 
> > And even when it is in RAM, something needs
> > to own the register that gives its base address.
> 
> It's more complex than registers on powernv. There is a procedure
> to define the XIVE tables using XIVE table descriptors which contain
> their characteristics, size, indirect vs. indirect, local vs remote.
> OPAL/skiboot defines all these to configure the HW, and the model
> necessarily needs to support the same interface. This is the case
> for a single chip.

Ah, ok.  So there's some sort of IVTD.  Also in RAM?  Eventually there
must be a register giving the base address of the IVTD, yes?
Cédric Le Goater May 9, 2018, 7:23 a.m. UTC | #10
On 05/05/2018 06:26 AM, David Gibson wrote:
> On Fri, May 04, 2018 at 03:05:08PM +0200, Cédric Le Goater wrote:
>> On 05/04/2018 05:33 AM, David Gibson wrote:
>>> On Thu, May 03, 2018 at 06:50:09PM +0200, Cédric Le Goater wrote:
>>>> On 05/03/2018 07:22 AM, David Gibson wrote:
>>>>> On Thu, Apr 26, 2018 at 12:43:29PM +0200, Cédric Le Goater wrote:
>>>>>> On 04/26/2018 06:20 AM, David Gibson wrote:
>>>>>>> On Tue, Apr 24, 2018 at 11:46:04AM +0200, Cédric Le Goater wrote:
>>>>>>>> On 04/24/2018 08:51 AM, David Gibson wrote:
>>>>>>>>> On Thu, Apr 19, 2018 at 02:43:00PM +0200, Cédric Le Goater wrote:
>>>>>>>>>> sPAPRXive is a model for the XIVE interrupt controller device of the
>>>>>>>>>> sPAPR machine. It holds the routing XIVE table, the Interrupt
>>>>>>>>>> Virtualization Entry (IVE) table which associates interrupt source
>>>>>>>>>> numbers with targets.
>>>>>>>>>>
>>>>>>>>>> Also extend the XiveFabric with an accessor to the IVT. This will be
>>>>>>>>>> needed by the routing algorithm.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>>>>>> ---
>>>>>>>>>>
>>>>>>>>>>  May be should introduce a XiveRouter model to hold the IVT. To be
>>>>>>>>>>  discussed.
>>>>>>>>>
>>>>>>>>> Yeah, maybe.  Am I correct in thinking that on pnv there could be more
>>>>>>>>> than one XiveRouter?
>>>>>>>>
>>>>>>>> There is only one, the main IC. 
>>>>>>>
>>>>>>> Ok, that's what I thought originally.  In that case some of the stuff
>>>>>>> in the patches really doesn't make sense to me.
>>>>>>
>>>>>> well, there is one IC per chip on powernv, but we haven't reach that part
>>>>>> yet.
>>>>>
>>>>> Hmm.  There's some things we can delay dealing with, but I don't think
>>>>> this is one of them.  I think we need to understand how multichip is
>>>>> going to work in order to come up with a sane architecture.  Otherwise
>>>>> I fear we'll end up with something that we either need to horribly
>>>>> bastardize for multichip, or have to rework things dramatically
>>>>> leading to migration nightmares.
>>>>
>>>> So, it is all controlled by MMIO, so we should be fine on that part. 
>>>> As for the internal tables, they are all configured by firmware, using
>>>> a chip identifier (block). I need to check how the remote XIVE are 
>>>> accessed. I think this is by MMIO. 
>>>
>>> Right, but for powernv we execute OPAL inside the VM, rather than
>>> emulating its effects.  So we still need to model the actual hardware
>>> interfaces.  OPAL hides the details from the kernel, but not from us
>>> on the other side.
>>
>> Yes. This is the case in the current model. I took a look today and
>> I have a few fixes for the MMIO layout for P9 chips which I will send.
>>
>> As for XIVE, the model needs to be a little more  complex to support 
>> VSD_MODE_FORWARD tables which describe how to forward a notification
>> to another XIVE IC on another chip. They contain an address on which 
>> to load, This is another hop in the notification chain.  
> 
> Ah, ok.  So is that mode and address configured in the (bare metal)
> IVT as well?  Or is that a different piece of configuration?

The mode of a virtual structure table is configured by firmware. 
There are 4 main table types:  IVT, SBE, EQD, VPD (and an extra one
for IRQ) for the 16 possible blocks of a machine (I am simplifying 
a bit there). 

Local tables to a block/chip, today, are set to EXCLUSIVE and all 
remotes tables set to FORWARD.

The address of a table is configured by FW also. In case of a 
FORWARD table, it is set to the remote IC BAR + one page. This page 
has two 2K windows : one for for HW interrupt triggers and another 
one for to forward interrupts and for operation synchronization. 
>>>> I haven't looked at multichip XIVE support but I am not too worried as 
>>>> the framework is already in place for the machine.
>>>>  
>>>>>>>>> If we did have a XiveRouter, I'm not sure we'd need the XiveFabric
>>>>>>>>> interface, possibly its methods could just be class methods of
>>>>>>>>> XiveRouter.
>>>>>>>>
>>>>>>>> Yes. We could introduce a XiveRouter to share the ivt table between 
>>>>>>>> the sPAPRXive and the PnvXIVE models, the interrupt controllers of
>>>>>>>> the machines. Methods would provide way to get the ivt/eq/nvt
>>>>>>>> objects required for routing. I need to add a set_eq() to push the
>>>>>>>> EQ data.
>>>>>>>
>>>>>>> Hrm.  Well, to add some more clarity, let's say the XiveRouter is the
>>>>>>> object which owns the IVT.  
>>>>>>
>>>>>> OK. that would be a model with some state and not an interface.
>>>>>
>>>>> Yes.  For papr variant it would have the whole IVT contents as its
>>>>> state.  For the powernv, just the registers telling it where to find
>>>>> the IVT in RAM.
>>>>>
>>>>>>> It may or may not do other stuff as well.
>>>>>>
>>>>>> Its only task would be to do the final event routing: get the IVE,
>>>>>> get the EQ, push the EQ DATA in the OS event queue, notify the CPU.
>>>>>
>>>>> That seems like a lot of steps.  Up to push the EQ DATA, certainly.
>>>>> And I guess it'll have to ping an NVT somehow, but I'm not sure it
>>>>> should know about CPUs as such.
>>>>
>>>> For PowerNV, the concept could be generalized, yes. An NVT can 
>>>> contain the interrupt state of a logical server but the common 
>>>> case is baremetal without guests for QEMU and so we have a NVT 
>>>> per cpu. 
>>>
>>> Hmm.  We eventually want to support a kernel running guests under
>>> qemu/powernv though, right?  
>>
>> arg. an emulated hypervisor ! OK let's say this is a long term goal :) 
>>
>>> So even if we don't allow it right now,
>>> we don't want allowing that to require major surgery to our
>>> architecture.
>>
>> That I agree on. 
>>
>>>> PowerNV will have some limitation but we can make it better than 
>>>> today for sure. It boots.
>>>>
>>>> We can improve some of the NVT notification process, the way NVT 
>>>> are matched eventually. may be support remote engines if the
>>>> NVT is not local. I have not looked at the details.
>>>>
>>>>> I'm not sure at this stage what should own the EQD table.
>>>>
>>>> The EQDT is in RAM.
>>>
>>> Not for spapr, it's not.  
>>
>> yeah ok. It's in QEMU/KVM.
>>
>>> And even when it is in RAM, something needs
>>> to own the register that gives its base address.
>>
>> It's more complex than registers on powernv. There is a procedure
>> to define the XIVE tables using XIVE table descriptors which contain
>> their characteristics, size, indirect vs. indirect, local vs remote.
>> OPAL/skiboot defines all these to configure the HW, and the model
>> necessarily needs to support the same interface. This is the case
>> for a single chip.
> 
> Ah, ok.  So there's some sort of IVTD. 

These are called Virtual Structure table Descriptors (VSDs). Each
XIVE chip has an array of these.

> Also in RAM?  

Yes. But the vsd are just temporary structures to configure HW. 
What is important is the information it is holding : IVT, EQDT,
VPDT, etc. 

> Eventually there
> must be a register giving the base address of the IVTD, yes?

There are two registers to configure the table. One to set the 
table type and block, and one to set its VSD.

C.
diff mbox series

Patch

diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index c6d13e757977..f8d34722931d 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -17,4 +17,5 @@  CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
 CONFIG_XIVE=$(CONFIG_PSERIES)
+CONFIG_XIVE_SPAPR=$(CONFIG_PSERIES)
 CONFIG_MEM_HOTPLUG=y
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 72a46ed91c31..301a8e972d91 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -38,6 +38,7 @@  obj-$(CONFIG_XICS) += xics.o
 obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
 obj-$(CONFIG_XIVE) += xive.o
+obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
 obj-$(CONFIG_POWERNV) += xics_pnv.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
new file mode 100644
index 000000000000..020444e2665a
--- /dev/null
+++ b/hw/intc/spapr_xive.c
@@ -0,0 +1,159 @@ 
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xive_regs.h"
+
+void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
+{
+    int i;
+
+    monitor_printf(mon, "IVE Table\n");
+    for (i = 0; i < xive->nr_irqs; i++) {
+        XiveIVE *ive = &xive->ivt[i];
+
+        if (!(ive->w & IVE_VALID)) {
+            continue;
+        }
+
+        monitor_printf(mon, "  %4x %s %08x %08x\n", i,
+                       ive->w & IVE_MASKED ? "M" : " ",
+                       (int) GETFIELD(IVE_EQ_INDEX, ive->w),
+                       (int) GETFIELD(IVE_EQ_DATA, ive->w));
+    }
+}
+
+static void spapr_xive_reset(DeviceState *dev)
+{
+    sPAPRXive *xive = SPAPR_XIVE(dev);
+    int i;
+
+    /* Mask all valid IVEs in the IRQ number space. */
+    for (i = 0; i < xive->nr_irqs; i++) {
+        XiveIVE *ive = &xive->ivt[i];
+        if (ive->w & IVE_VALID) {
+            ive->w |= IVE_MASKED;
+        }
+    }
+}
+
+static void spapr_xive_init(Object *obj)
+{
+
+}
+
+static void spapr_xive_realize(DeviceState *dev, Error **errp)
+{
+    sPAPRXive *xive = SPAPR_XIVE(dev);
+
+    if (!xive->nr_irqs) {
+        error_setg(errp, "Number of interrupt needs to be greater 0");
+        return;
+    }
+
+    /* Allocate the Interrupt Virtualization Table */
+    xive->ivt = g_new0(XiveIVE, xive->nr_irqs);
+}
+
+static XiveIVE *spapr_xive_get_ive(XiveFabric *xf, uint32_t lisn)
+{
+    sPAPRXive *xive = SPAPR_XIVE(xf);
+
+    return lisn < xive->nr_irqs ? &xive->ivt[lisn] : NULL;
+}
+
+static const VMStateDescription vmstate_spapr_xive_ive = {
+    .name = TYPE_SPAPR_XIVE "/ive",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT64(w, XiveIVE),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_spapr_xive = {
+    .name = TYPE_SPAPR_XIVE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
+        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(ivt, sPAPRXive, nr_irqs,
+                                     vmstate_spapr_xive_ive, XiveIVE),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property spapr_xive_properties[] = {
+    DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_xive_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
+
+    dc->realize = spapr_xive_realize;
+    dc->reset = spapr_xive_reset;
+    dc->props = spapr_xive_properties;
+    dc->desc = "sPAPR XIVE interrupt controller";
+    dc->vmsd = &vmstate_spapr_xive;
+
+    xfc->get_ive = spapr_xive_get_ive;
+}
+
+static const TypeInfo spapr_xive_info = {
+    .name = TYPE_SPAPR_XIVE,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_init = spapr_xive_init,
+    .instance_size = sizeof(sPAPRXive),
+    .class_init = spapr_xive_class_init,
+    .interfaces = (InterfaceInfo[]) {
+            { TYPE_XIVE_FABRIC },
+            { },
+    },
+};
+
+static void spapr_xive_register_types(void)
+{
+    type_register_static(&spapr_xive_info);
+}
+
+type_init(spapr_xive_register_types)
+
+bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi)
+{
+    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
+
+    if (!ive) {
+        return false;
+    }
+
+    ive->w |= IVE_VALID;
+    return true;
+}
+
+bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn)
+{
+    XiveIVE *ive = spapr_xive_get_ive(XIVE_FABRIC(xive), lisn);
+
+    if (!ive) {
+        return false;
+    }
+
+    ive->w &= ~IVE_VALID;
+    return true;
+}
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index b4c3d06c1219..dccad0318834 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -20,6 +20,13 @@ 
  * XIVE Fabric
  */
 
+XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn)
+{
+    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xf);
+
+    return xfc->get_ive(xf, lisn);
+}
+
 static void xive_fabric_route(XiveFabric *xf, int lisn)
 {
 
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
new file mode 100644
index 000000000000..1d966b5d3a96
--- /dev/null
+++ b/include/hw/ppc/spapr_xive.h
@@ -0,0 +1,31 @@ 
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_SPAPR_XIVE_H
+#define PPC_SPAPR_XIVE_H
+
+#include "hw/sysbus.h"
+#include "hw/ppc/xive.h"
+
+#define TYPE_SPAPR_XIVE "spapr-xive"
+#define SPAPR_XIVE(obj) OBJECT_CHECK(sPAPRXive, (obj), TYPE_SPAPR_XIVE)
+
+typedef struct sPAPRXive {
+    SysBusDevice parent;
+
+    /* Routing table */
+    XiveIVE      *ivt;
+    uint32_t     nr_irqs;
+} sPAPRXive;
+
+bool spapr_xive_irq_enable(sPAPRXive *xive, uint32_t lisn, bool lsi);
+bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
+void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
+
+#endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 4fcae2c763e6..5b145816acdc 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -11,6 +11,7 @@ 
 #define PPC_XIVE_H
 
 #include "hw/sysbus.h"
+#include "hw/ppc/xive_regs.h"
 
 typedef struct XiveFabric XiveFabric;
 
@@ -166,6 +167,10 @@  typedef struct XiveFabric {
 typedef struct XiveFabricClass {
     InterfaceClass parent;
     void (*notify)(XiveFabric *xf, uint32_t lisn);
+
+    XiveIVE *(*get_ive)(XiveFabric *xf, uint32_t lisn);
 } XiveFabricClass;
 
+XiveIVE *xive_fabric_get_ive(XiveFabric *xf, uint32_t lisn);
+
 #endif /* PPC_XIVE_H */
diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
new file mode 100644
index 000000000000..5903f29eb789
--- /dev/null
+++ b/include/hw/ppc/xive_regs.h
@@ -0,0 +1,33 @@ 
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2016-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef _PPC_XIVE_REGS_H
+#define _PPC_XIVE_REGS_H
+
+/* IVE/EAS
+ *
+ * One per interrupt source. Targets that interrupt to a given EQ
+ * and provides the corresponding logical interrupt number (EQ data)
+ *
+ * We also map this structure to the escalation descriptor inside
+ * an EQ, though in that case the valid and masked bits are not used.
+ */
+typedef struct XiveIVE {
+        /* Use a single 64-bit definition to make it easier to
+         * perform atomic updates
+         */
+        uint64_t        w;
+#define IVE_VALID       PPC_BIT(0)
+#define IVE_EQ_BLOCK    PPC_BITMASK(4, 7)        /* Destination EQ block# */
+#define IVE_EQ_INDEX    PPC_BITMASK(8, 31)       /* Destination EQ index */
+#define IVE_MASKED      PPC_BIT(32)              /* Masked */
+#define IVE_EQ_DATA     PPC_BITMASK(33, 63)      /* Data written to the EQ */
+} XiveIVE;
+
+#endif /* _INTC_XIVE_INTERNAL_H */