diff mbox series

[v5,04/36] ppc/xive: introduce the XiveRouter model

Message ID 20181116105729.23240-5-clg@kaod.org
State New
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater Nov. 16, 2018, 10:56 a.m. UTC
The XiveRouter models the second sub-engine of the overall XIVE
architecture : the Interrupt Virtualization Routing Engine (IVRE).

The IVRE handles event notifications of the IVSE through MMIO stores
and performs the interrupt routing process. For this purpose, it uses
a set of table stored in system memory, the first of which being the
Event Assignment Structure (EAS) table.

The EAT associates an interrupt source number with an Event Notification
Descriptor (END) which will be used in a second phase of the routing
process to identify a Notification Virtual Target.

The XiveRouter is an abstract class which needs to be inherited from
to define a storage for the EAT, and other upcoming tables. The
'chip-id' atttribute is not strictly necessary for the sPAPR and
PowerNV machines but it's a good way to test the routing algorithm.
Without this atttribute, the XiveRouter could be a simple QOM
interface.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/xive.h      | 32 ++++++++++++++
 include/hw/ppc/xive_regs.h | 31 ++++++++++++++
 hw/intc/xive.c             | 86 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 149 insertions(+)
 create mode 100644 include/hw/ppc/xive_regs.h

Comments

David Gibson Nov. 22, 2018, 4:11 a.m. UTC | #1
On Fri, Nov 16, 2018 at 11:56:57AM +0100, Cédric Le Goater wrote:
> The XiveRouter models the second sub-engine of the overall XIVE
> architecture : the Interrupt Virtualization Routing Engine (IVRE).
> 
> The IVRE handles event notifications of the IVSE through MMIO stores
> and performs the interrupt routing process. For this purpose, it uses
> a set of table stored in system memory, the first of which being the
> Event Assignment Structure (EAS) table.
> 
> The EAT associates an interrupt source number with an Event Notification
> Descriptor (END) which will be used in a second phase of the routing
> process to identify a Notification Virtual Target.
> 
> The XiveRouter is an abstract class which needs to be inherited from
> to define a storage for the EAT, and other upcoming tables. The
> 'chip-id' atttribute is not strictly necessary for the sPAPR and
> PowerNV machines but it's a good way to test the routing algorithm.
> Without this atttribute, the XiveRouter could be a simple QOM
> interface.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/xive.h      | 32 ++++++++++++++
>  include/hw/ppc/xive_regs.h | 31 ++++++++++++++
>  hw/intc/xive.c             | 86 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 149 insertions(+)
>  create mode 100644 include/hw/ppc/xive_regs.h
> 
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index be93fae6317b..5a0696366577 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -11,6 +11,7 @@
>  #define PPC_XIVE_H
>  
>  #include "hw/sysbus.h"

Again, I don't think making this a SysBusDevice is quite right.
Even more so for the router than the source, because at least for PAPR
it might not have any MMIO presence at all.

> +#include "hw/ppc/xive_regs.h"
>  
>  /*
>   * XIVE Fabric (Interface between Source and Router)
> @@ -168,4 +169,35 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
>      }
>  }
>  
> +/*
> + * XIVE Router
> + */
> +
> +typedef struct XiveRouter {
> +    SysBusDevice    parent;
> +
> +    uint32_t        chip_id;

I don't think this belongs in the base class.  The PowerNV specific
variants will need it, but it doesn't make sense for the PAPR version.

> +} XiveRouter;
> +
> +#define TYPE_XIVE_ROUTER "xive-router"
> +#define XIVE_ROUTER(obj)                                \
> +    OBJECT_CHECK(XiveRouter, (obj), TYPE_XIVE_ROUTER)
> +#define XIVE_ROUTER_CLASS(klass)                                        \
> +    OBJECT_CLASS_CHECK(XiveRouterClass, (klass), TYPE_XIVE_ROUTER)
> +#define XIVE_ROUTER_GET_CLASS(obj)                              \
> +    OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
> +
> +typedef struct XiveRouterClass {
> +    SysBusDeviceClass parent;
> +
> +    /* XIVE table accessors */
> +    int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> +    int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> +} XiveRouterClass;
> +
> +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
> +
> +int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> +int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> +
>  #endif /* PPC_XIVE_H */
> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
> new file mode 100644
> index 000000000000..12499b33614c
> --- /dev/null
> +++ b/include/hw/ppc/xive_regs.h
> @@ -0,0 +1,31 @@
> +/*
> + * QEMU PowerPC XIVE interrupt controller model
> + *
> + * Copyright (c) 2016-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#ifndef PPC_XIVE_REGS_H
> +#define PPC_XIVE_REGS_H
> +
> +/* EAS (Event Assignment Structure)
> + *
> + * One per interrupt source. Targets an interrupt to a given Event
> + * Notification Descriptor (END) and provides the corresponding
> + * logical interrupt number (END data)
> + */
> +typedef struct XiveEAS {
> +        /* Use a single 64-bit definition to make it easier to
> +         * perform atomic updates
> +         */
> +        uint64_t        w;
> +#define EAS_VALID       PPC_BIT(0)
> +#define EAS_END_BLOCK   PPC_BITMASK(4, 7)        /* Destination END block# */
> +#define EAS_END_INDEX   PPC_BITMASK(8, 31)       /* Destination END index */
> +#define EAS_MASKED      PPC_BIT(32)              /* Masked */
> +#define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
> +} XiveEAS;
> +
> +#endif /* PPC_XIVE_REGS_H */
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 014a2e41f71f..c4c90a25758e 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -442,6 +442,91 @@ static const TypeInfo xive_source_info = {
>      .class_init    = xive_source_class_init,
>  };
>  
> +/*
> + * XIVE Router (aka. Virtualization Controller or IVRE)
> + */
> +
> +int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
> +{
> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
> +
> +    return xrc->get_eas(xrtr, lisn, eas);
> +}
> +
> +int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
> +{
> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
> +
> +    return xrc->set_eas(xrtr, lisn, eas);
> +}
> +
> +static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
> +{
> +    XiveRouter *xrtr = XIVE_ROUTER(xf);
> +    XiveEAS eas;
> +
> +    /* EAS cache lookup */
> +    if (xive_router_get_eas(xrtr, lisn, &eas)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
> +        return;
> +    }

AFAICT a bad LISN here means a qemu error (in the source, probably),
not a user or guest error, so an assert() would be more appropriate.

> +
> +    /* The IVRE has a State Bit Cache for its internal sources which
> +     * is also involed at this point. We skip the SBC lookup because
> +     * the state bits of the sources are modeled internally in QEMU.
> +     */
> +
> +    if (!(eas.w & EAS_VALID)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
> +        return;
> +    }
> +
> +    if (eas.w & EAS_MASKED) {
> +        /* Notification completed */
> +        return;
> +    }
> +}
> +
> +static Property xive_router_properties[] = {
> +    DEFINE_PROP_UINT32("chip-id", XiveRouter, chip_id, 0),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void xive_router_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
> +
> +    dc->desc    = "XIVE Router Engine";
> +    dc->props   = xive_router_properties;
> +    xfc->notify = xive_router_notify;
> +}
> +
> +static const TypeInfo xive_router_info = {
> +    .name          = TYPE_XIVE_ROUTER,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .abstract      = true,
> +    .class_size    = sizeof(XiveRouterClass),
> +    .class_init    = xive_router_class_init,
> +    .interfaces    = (InterfaceInfo[]) {
> +        { TYPE_XIVE_FABRIC },

So as far as I can see so far, the XiveFabric interface will
essentially have to be implemented on the router object, so I'm not
seeing much point to having the interface rather than just a direct
call on the router object.  But I haven't read the whole series yet,
so maybe I'm missing something.


> +        { }
> +    }
> +};
> +
> +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
> +{
> +    if (!(eas->w & EAS_VALID)) {
> +        return;
> +    }
> +
> +    monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
> +                   lisn, eas->w & EAS_MASKED ? "M" : " ",
> +                   (uint8_t)  GETFIELD(EAS_END_BLOCK, eas->w),
> +                   (uint32_t) GETFIELD(EAS_END_INDEX, eas->w),
> +                   (uint32_t) GETFIELD(EAS_END_DATA, eas->w));
> +}
> +
>  /*
>   * XIVE Fabric
>   */
> @@ -455,6 +540,7 @@ static void xive_register_types(void)
>  {
>      type_register_static(&xive_source_info);
>      type_register_static(&xive_fabric_info);
> +    type_register_static(&xive_router_info);
>  }
>  
>  type_init(xive_register_types)
David Gibson Nov. 22, 2018, 4:44 a.m. UTC | #2
On Fri, Nov 16, 2018 at 11:56:57AM +0100, Cédric Le Goater wrote:
> The XiveRouter models the second sub-engine of the overall XIVE
> architecture : the Interrupt Virtualization Routing Engine (IVRE).
> 
> The IVRE handles event notifications of the IVSE through MMIO stores
> and performs the interrupt routing process. For this purpose, it uses
> a set of table stored in system memory, the first of which being the
> Event Assignment Structure (EAS) table.
> 
> The EAT associates an interrupt source number with an Event Notification
> Descriptor (END) which will be used in a second phase of the routing
> process to identify a Notification Virtual Target.
> 
> The XiveRouter is an abstract class which needs to be inherited from
> to define a storage for the EAT, and other upcoming tables. The
> 'chip-id' atttribute is not strictly necessary for the sPAPR and
> PowerNV machines but it's a good way to test the routing algorithm.
> Without this atttribute, the XiveRouter could be a simple QOM
> interface.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/xive.h      | 32 ++++++++++++++
>  include/hw/ppc/xive_regs.h | 31 ++++++++++++++
>  hw/intc/xive.c             | 86 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 149 insertions(+)
>  create mode 100644 include/hw/ppc/xive_regs.h
> 
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index be93fae6317b..5a0696366577 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -11,6 +11,7 @@
>  #define PPC_XIVE_H
>  
>  #include "hw/sysbus.h"
> +#include "hw/ppc/xive_regs.h"
>  
>  /*
>   * XIVE Fabric (Interface between Source and Router)
> @@ -168,4 +169,35 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
>      }
>  }
>  
> +/*
> + * XIVE Router
> + */
> +
> +typedef struct XiveRouter {
> +    SysBusDevice    parent;
> +
> +    uint32_t        chip_id;
> +} XiveRouter;
> +
> +#define TYPE_XIVE_ROUTER "xive-router"
> +#define XIVE_ROUTER(obj)                                \
> +    OBJECT_CHECK(XiveRouter, (obj), TYPE_XIVE_ROUTER)
> +#define XIVE_ROUTER_CLASS(klass)                                        \
> +    OBJECT_CLASS_CHECK(XiveRouterClass, (klass), TYPE_XIVE_ROUTER)
> +#define XIVE_ROUTER_GET_CLASS(obj)                              \
> +    OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
> +
> +typedef struct XiveRouterClass {
> +    SysBusDeviceClass parent;
> +
> +    /* XIVE table accessors */
> +    int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> +    int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);

Sorry, didn't think of this in my first reply.

1) Does the hardware ever actually write back to the EAS?  I know it
does for the END, but it's not clear why it would need to for the
EAS.  If not, we don't need the setter.

2) The signatures are a bit odd here.  For the setter, a value would
make sense than a (XiveEAS *), since it's just a word.  For the getter
you could return the EAS value directly rather than using a pointer -
there's already a valid bit in the EAS so you can construct a value
with that cleared if the lisn is out of bounds.
Benjamin Herrenschmidt Nov. 22, 2018, 6:50 a.m. UTC | #3
On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
> 
> Sorry, didn't think of this in my first reply.
> 
> 1) Does the hardware ever actually write back to the EAS?  I know it
> does for the END, but it's not clear why it would need to for the
> EAS.  If not, we don't need the setter.

Nope, though the PAPR model will via hcalls

> 
> 2) The signatures are a bit odd here.  For the setter, a value would
> make sense than a (XiveEAS *), since it's just a word.  For the getter
> you could return the EAS value directly rather than using a pointer -
> there's already a valid bit in the EAS so you can construct a value
> with that cleared if the lisn is out of bounds.
Cédric Le Goater Nov. 22, 2018, 7:53 a.m. UTC | #4
On 11/22/18 5:11 AM, David Gibson wrote:
> On Fri, Nov 16, 2018 at 11:56:57AM +0100, Cédric Le Goater wrote:
>> The XiveRouter models the second sub-engine of the overall XIVE
>> architecture : the Interrupt Virtualization Routing Engine (IVRE).
>>
>> The IVRE handles event notifications of the IVSE through MMIO stores
>> and performs the interrupt routing process. For this purpose, it uses
>> a set of table stored in system memory, the first of which being the
>> Event Assignment Structure (EAS) table.
>>
>> The EAT associates an interrupt source number with an Event Notification
>> Descriptor (END) which will be used in a second phase of the routing
>> process to identify a Notification Virtual Target.
>>
>> The XiveRouter is an abstract class which needs to be inherited from
>> to define a storage for the EAT, and other upcoming tables. The
>> 'chip-id' atttribute is not strictly necessary for the sPAPR and
>> PowerNV machines but it's a good way to test the routing algorithm.
>> Without this atttribute, the XiveRouter could be a simple QOM
>> interface.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  include/hw/ppc/xive.h      | 32 ++++++++++++++
>>  include/hw/ppc/xive_regs.h | 31 ++++++++++++++
>>  hw/intc/xive.c             | 86 ++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 149 insertions(+)
>>  create mode 100644 include/hw/ppc/xive_regs.h
>>
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index be93fae6317b..5a0696366577 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -11,6 +11,7 @@
>>  #define PPC_XIVE_H
>>  
>>  #include "hw/sysbus.h"
> 
> Again, I don't think making this a SysBusDevice is quite right.
> Even more so for the router than the source, because at least for PAPR
> it might not have any MMIO presence at all.

The controller model inherits from the XiveRouter and manages the TIMA.

>> +#include "hw/ppc/xive_regs.h"
>>  
>>  /*
>>   * XIVE Fabric (Interface between Source and Router)
>> @@ -168,4 +169,35 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
>>      }
>>  }
>>  
>> +/*
>> + * XIVE Router
>> + */
>> +
>> +typedef struct XiveRouter {
>> +    SysBusDevice    parent;
>> +
>> +    uint32_t        chip_id;
> 
> I don't think this belongs in the base class.  The PowerNV specific
> variants will need it, but it doesn't make sense for the PAPR version.

yeah. I am using it as a END and NVT block identifier but it's not 
required for sPAPR, it could just be zero. 

It was good to test the routing algo which should not assume that the 
block id is zero. 
 
> 
>> +} XiveRouter;
>> +
>> +#define TYPE_XIVE_ROUTER "xive-router"
>> +#define XIVE_ROUTER(obj)                                \
>> +    OBJECT_CHECK(XiveRouter, (obj), TYPE_XIVE_ROUTER)
>> +#define XIVE_ROUTER_CLASS(klass)                                        \
>> +    OBJECT_CLASS_CHECK(XiveRouterClass, (klass), TYPE_XIVE_ROUTER)
>> +#define XIVE_ROUTER_GET_CLASS(obj)                              \
>> +    OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
>> +
>> +typedef struct XiveRouterClass {
>> +    SysBusDeviceClass parent;
>> +
>> +    /* XIVE table accessors */
>> +    int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>> +    int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>> +} XiveRouterClass;
>> +
>> +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
>> +
>> +int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>> +int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>> +
>>  #endif /* PPC_XIVE_H */
>> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
>> new file mode 100644
>> index 000000000000..12499b33614c
>> --- /dev/null
>> +++ b/include/hw/ppc/xive_regs.h
>> @@ -0,0 +1,31 @@
>> +/*
>> + * QEMU PowerPC XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2016-2018, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef PPC_XIVE_REGS_H
>> +#define PPC_XIVE_REGS_H
>> +
>> +/* EAS (Event Assignment Structure)
>> + *
>> + * One per interrupt source. Targets an interrupt to a given Event
>> + * Notification Descriptor (END) and provides the corresponding
>> + * logical interrupt number (END data)
>> + */
>> +typedef struct XiveEAS {
>> +        /* Use a single 64-bit definition to make it easier to
>> +         * perform atomic updates
>> +         */
>> +        uint64_t        w;
>> +#define EAS_VALID       PPC_BIT(0)
>> +#define EAS_END_BLOCK   PPC_BITMASK(4, 7)        /* Destination END block# */
>> +#define EAS_END_INDEX   PPC_BITMASK(8, 31)       /* Destination END index */
>> +#define EAS_MASKED      PPC_BIT(32)              /* Masked */
>> +#define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
>> +} XiveEAS;
>> +
>> +#endif /* PPC_XIVE_REGS_H */
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index 014a2e41f71f..c4c90a25758e 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -442,6 +442,91 @@ static const TypeInfo xive_source_info = {
>>      .class_init    = xive_source_class_init,
>>  };
>>  
>> +/*
>> + * XIVE Router (aka. Virtualization Controller or IVRE)
>> + */
>> +
>> +int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
>> +{
>> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>> +
>> +    return xrc->get_eas(xrtr, lisn, eas);
>> +}
>> +
>> +int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
>> +{
>> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>> +
>> +    return xrc->set_eas(xrtr, lisn, eas);
>> +}
>> +
>> +static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>> +{
>> +    XiveRouter *xrtr = XIVE_ROUTER(xf);
>> +    XiveEAS eas;
>> +
>> +    /* EAS cache lookup */
>> +    if (xive_router_get_eas(xrtr, lisn, &eas)) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
>> +        return;
>> +    }
> 
> AFAICT a bad LISN here means a qemu error (in the source, probably),
> not a user or guest error, so an assert() would be more appropriate.

hmm, I would say no because in the case of PowerNV, the firmware could
have badly configured the ISN offset of a source which would notify the 
router with a bad notification event data.  


>> +
>> +    /* The IVRE has a State Bit Cache for its internal sources which
>> +     * is also involed at this point. We skip the SBC lookup because
>> +     * the state bits of the sources are modeled internally in QEMU.
>> +     */
>> +
>> +    if (!(eas.w & EAS_VALID)) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
>> +        return;
>> +    }
>> +
>> +    if (eas.w & EAS_MASKED) {
>> +        /* Notification completed */
>> +        return;
>> +    }
>> +}
>> +
>> +static Property xive_router_properties[] = {
>> +    DEFINE_PROP_UINT32("chip-id", XiveRouter, chip_id, 0),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void xive_router_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
>> +
>> +    dc->desc    = "XIVE Router Engine";
>> +    dc->props   = xive_router_properties;
>> +    xfc->notify = xive_router_notify;
>> +}
>> +
>> +static const TypeInfo xive_router_info = {
>> +    .name          = TYPE_XIVE_ROUTER,
>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>> +    .abstract      = true,
>> +    .class_size    = sizeof(XiveRouterClass),
>> +    .class_init    = xive_router_class_init,
>> +    .interfaces    = (InterfaceInfo[]) {
>> +        { TYPE_XIVE_FABRIC },
> 
> So as far as I can see so far, the XiveFabric interface will
> essentially have to be implemented on the router object, so I'm not
> seeing much point to having the interface rather than just a direct
> call on the router object.  But I haven't read the whole series yet,
> so maybe I'm missing something.

The PSIHB and PHB4 models are using it but there are not in the series.

I can send the PSIHB patch in the next version if you like, it's the 
patch right after PnvXive. It's attached below for the moment. Look at 
pnv_psi_notify().

Thanks,

C.

>> +        { }
>> +    }
>> +};
>> +
>> +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
>> +{
>> +    if (!(eas->w & EAS_VALID)) {
>> +        return;
>> +    }
>> +
>> +    monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
>> +                   lisn, eas->w & EAS_MASKED ? "M" : " ",
>> +                   (uint8_t)  GETFIELD(EAS_END_BLOCK, eas->w),
>> +                   (uint32_t) GETFIELD(EAS_END_INDEX, eas->w),
>> +                   (uint32_t) GETFIELD(EAS_END_DATA, eas->w));
>> +}
>> +
>>  /*
>>   * XIVE Fabric
>>   */
>> @@ -455,6 +540,7 @@ static void xive_register_types(void)
>>  {
>>      type_register_static(&xive_source_info);
>>      type_register_static(&xive_fabric_info);
>> +    type_register_static(&xive_router_info);
>>  }
>>  
>>  type_init(xive_register_types)
>
From 680fd6ff7c99e669708fbc5cfdbfcd95e83e7c07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Wed, 21 Nov 2018 10:29:45 +0100
Subject: [PATCH] ppc/pnv: add a PSI bridge model for POWER9 processor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PSI bridge on POWER9 is very similar to POWER8. The BAR is still
set through XSCOM but the controls are now entirely done with MMIOs.
More interrupts are defined and the interrupt controller interface has
changed to XIVE. The POWER9 model is a first example of the usage of
the notify() handler of the XiveFabric interface, linking the PSI
XiveSource to its owning device model.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/pnv.h       |   6 +
 include/hw/ppc/pnv_psi.h   |  50 ++++-
 include/hw/ppc/pnv_xscom.h |   3 +
 hw/ppc/pnv.c               |  20 +-
 hw/ppc/pnv_psi.c           | 390 ++++++++++++++++++++++++++++++++++---
 5 files changed, 444 insertions(+), 25 deletions(-)

diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index c402e5d5844b..8be1147481f9 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -88,6 +88,7 @@ typedef struct Pnv9Chip {
 
     /*< public >*/
     PnvXive      xive;
+    PnvPsi       psi;
 } Pnv9Chip;
 
 typedef struct PnvChipClass {
@@ -250,11 +251,16 @@ void pnv_bmc_powerdown(IPMIBmc *bmc);
 #define PNV9_XIVE_PC_SIZE            0x0000001000000000ull
 #define PNV9_XIVE_PC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006018000000000ull)
 
+#define PNV9_PSIHB_SIZE              0x0000000000100000ull
+#define PNV9_PSIHB_BASE(chip)        PNV9_CHIP_BASE(chip, 0x0006030203000000ull)
+
 #define PNV9_XIVE_IC_SIZE            0x0000000000080000ull
 #define PNV9_XIVE_IC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006030203100000ull)
 
 #define PNV9_XIVE_TM_SIZE            0x0000000000040000ull
 #define PNV9_XIVE_TM_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006030203180000ull)
 
+#define PNV9_PSIHB_ESB_SIZE          0x0000000000010000ull
+#define PNV9_PSIHB_ESB_BASE(chip)    PNV9_CHIP_BASE(chip, 0x00060302031c0000ull)
 
 #endif /* _PPC_PNV_H */
diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h
index f6af5eae1fa8..b8f8d082bcf9 100644
--- a/include/hw/ppc/pnv_psi.h
+++ b/include/hw/ppc/pnv_psi.h
@@ -21,10 +21,35 @@
 
 #include "hw/sysbus.h"
 #include "hw/ppc/xics.h"
+#include "hw/ppc/xive.h"
 
 #define TYPE_PNV_PSI "pnv-psi"
 #define PNV_PSI(obj) \
      OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI)
+#define PNV_PSI_CLASS(klass) \
+     OBJECT_CLASS_CHECK(PnvPsiClass, (klass), TYPE_PNV_PSI)
+#define PNV_PSI_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(PnvPsiClass, (obj), TYPE_PNV_PSI)
+
+typedef struct PnvPsi PnvPsi;
+typedef struct PnvChip PnvChip;
+typedef struct PnvPsiClass {
+    SysBusDeviceClass parent_class;
+
+    int chip_type;
+    uint32_t xscom_pcba;
+    uint32_t xscom_size;
+
+    void (*irq_set)(PnvPsi *psi, int, bool state);
+} PnvPsiClass;
+
+#define TYPE_PNV_PSI_POWER8 TYPE_PNV_PSI "-POWER8"
+#define PNV_PSI_POWER8(obj) \
+    OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI_POWER8)
+
+#define TYPE_PNV_PSI_POWER9 TYPE_PNV_PSI "-POWER9"
+#define PNV_PSI_POWER9(obj) \
+    OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI_POWER9)
 
 #define PSIHB_XSCOM_MAX         0x20
 
@@ -38,9 +63,12 @@ typedef struct PnvPsi {
     /* MemoryRegion fsp_mr; */
     uint64_t fsp_bar;
 
-    /* Interrupt generation */
+    /* P8 Interrupt generation */
     ICSState ics;
 
+    /* P9 Interrupt generation */
+    XiveSource source;
+
     /* Registers */
     uint64_t regs[PSIHB_XSCOM_MAX];
 
@@ -60,6 +88,24 @@ typedef enum PnvPsiIrq {
 
 #define PSI_NUM_INTERRUPTS 6
 
-extern void pnv_psi_irq_set(PnvPsi *psi, PnvPsiIrq irq, bool state);
+/* P9 PSI Interrupts */
+#define PSIHB9_IRQ_PSI          0
+#define PSIHB9_IRQ_OCC          1
+#define PSIHB9_IRQ_FSI          2
+#define PSIHB9_IRQ_LPCHC        3
+#define PSIHB9_IRQ_LOCAL_ERR    4
+#define PSIHB9_IRQ_GLOBAL_ERR   5
+#define PSIHB9_IRQ_TPM          6
+#define PSIHB9_IRQ_LPC_SIRQ0    7
+#define PSIHB9_IRQ_LPC_SIRQ1    8
+#define PSIHB9_IRQ_LPC_SIRQ2    9
+#define PSIHB9_IRQ_LPC_SIRQ3    10
+#define PSIHB9_IRQ_SBE_I2C      11
+#define PSIHB9_IRQ_DIO          12
+#define PSIHB9_IRQ_PSU          13
+#define PSIHB9_NUM_IRQS         14
+
+void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state);
+void pnv_psi_pic_print_info(PnvPsi *psi, Monitor *mon);
 
 #endif /* _PPC_PNV_PSI_H */
diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index 5bd43467a1ab..019b45bf9189 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h
@@ -82,6 +82,9 @@ typedef struct PnvXScomInterfaceClass {
 #define PNV_XSCOM_PBCQ_SPCI_BASE  0x9013c00
 #define PNV_XSCOM_PBCQ_SPCI_SIZE  0x5
 
+#define PNV9_XSCOM_PSIHB_BASE     0x5012900
+#define PNV9_XSCOM_PSIHB_SIZE     0x100
+
 #define PNV9_XSCOM_XIVE_BASE      0x5013000
 #define PNV9_XSCOM_XIVE_SIZE      0x300
 
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index b6af896c30e4..e67c9d7d3995 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -743,7 +743,7 @@ static void pnv_chip_power8_instance_init(Object *obj)
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
     int i;
 
-    object_initialize(&chip8->psi, sizeof(chip8->psi), TYPE_PNV_PSI);
+    object_initialize(&chip8->psi, sizeof(chip8->psi), TYPE_PNV_PSI_POWER8);
     object_property_add_child(obj, "psi", OBJECT(&chip8->psi), NULL);
     object_property_add_const_link(OBJECT(&chip8->psi), "xics",
                                    OBJECT(qdev_get_machine()), &error_abort);
@@ -923,6 +923,11 @@ static void pnv_chip_power9_instance_init(Object *obj)
     object_property_add_child(obj, "xive", OBJECT(&chip9->xive), NULL);
     object_property_add_const_link(OBJECT(&chip9->xive), "chip", obj,
                                    &error_abort);
+
+    object_initialize(&chip9->psi, sizeof(chip9->psi), TYPE_PNV_PSI_POWER9);
+    object_property_add_child(obj, "psi", OBJECT(&chip9->psi), NULL);
+    object_property_add_const_link(OBJECT(&chip9->psi), "chip", obj,
+                                   &error_abort);
 }
 
 static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
@@ -955,6 +960,18 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
     qdev_set_parent_bus(DEVICE(&chip9->xive), sysbus_get_default());
     pnv_xscom_add_subregion(chip, PNV9_XSCOM_XIVE_BASE,
                             &chip9->xive.xscom_regs);
+
+    /* Processor Service Interface (PSI) Host Bridge */
+    object_property_set_int(OBJECT(&chip9->psi), PNV9_PSIHB_BASE(chip),
+                            "bar", &error_fatal);
+    object_property_set_bool(OBJECT(&chip9->psi), true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    qdev_set_parent_bus(DEVICE(&chip9->psi), sysbus_get_default());
+    pnv_xscom_add_subregion(chip, PNV9_XSCOM_PSIHB_BASE,
+                            &chip9->psi.xscom_regs);
 }
 
 static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
@@ -1188,6 +1205,7 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj,
             Pnv9Chip *chip9 = PNV9_CHIP(chip);
 
              pnv_xive_pic_print_info(&chip9->xive, mon);
+             pnv_psi_pic_print_info(&chip9->psi, mon);
         } else {
             Pnv8Chip *chip8 = PNV8_CHIP(chip);
 
diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
index 5b969127c303..8b85dd9555e8 100644
--- a/hw/ppc/pnv_psi.c
+++ b/hw/ppc/pnv_psi.c
@@ -22,6 +22,7 @@
 #include "target/ppc/cpu.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
+#include "monitor/monitor.h"
 
 #include "exec/address-spaces.h"
 
@@ -114,12 +115,14 @@
 #define PSIHB_BAR_MASK                  0x0003fffffff00000ull
 #define PSIHB_FSPBAR_MASK               0x0003ffff00000000ull
 
+#define PSIHB_REG(addr) (((addr) >> 3) + PSIHB_XSCOM_BAR)
+
 static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
 {
     MemoryRegion *sysmem = get_system_memory();
     uint64_t old = psi->regs[PSIHB_XSCOM_BAR];
 
-    psi->regs[PSIHB_XSCOM_BAR] = bar & (PSIHB_BAR_MASK | PSIHB_BAR_EN);
+    psi->regs[PSIHB_XSCOM_BAR] = bar;
 
     /* Update MR, always remove it first */
     if (old & PSIHB_BAR_EN) {
@@ -128,7 +131,7 @@ static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
 
     /* Then add it back if needed */
     if (bar & PSIHB_BAR_EN) {
-        uint64_t addr = bar & PSIHB_BAR_MASK;
+        uint64_t addr = bar & ~PSIHB_BAR_EN;
         memory_region_add_subregion(sysmem, addr, &psi->regs_mr);
     }
 }
@@ -205,7 +208,12 @@ static const uint64_t stat_bits[] = {
     [PSIHB_IRQ_EXTERNAL]  = PSIHB_IRQ_STAT_EXT,
 };
 
-void pnv_psi_irq_set(PnvPsi *psi, PnvPsiIrq irq, bool state)
+void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state)
+{
+    PNV_PSI_GET_CLASS(psi)->irq_set(psi, irq, state);
+}
+
+static void pnv_psi_power8_irq_set(PnvPsi *psi, int irq, bool state)
 {
     ICSState *ics = &psi->ics;
     uint32_t xivr_reg;
@@ -324,7 +332,7 @@ static uint64_t pnv_psi_reg_read(PnvPsi *psi, uint32_t offset, bool mmio)
         val = psi->regs[offset];
         break;
     default:
-        qemu_log_mask(LOG_UNIMP, "PSI: read at Ox%" PRIx32 "\n", offset);
+        qemu_log_mask(LOG_UNIMP, "PSI: read at 0x%" PRIx32 "\n", offset);
     }
     return val;
 }
@@ -383,7 +391,7 @@ static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
         pnv_psi_set_irsn(psi, val);
         break;
     default:
-        qemu_log_mask(LOG_UNIMP, "PSI: write at Ox%" PRIx32 "\n", offset);
+        qemu_log_mask(LOG_UNIMP, "PSI: write at 0x%" PRIx32 "\n", offset);
     }
 }
 
@@ -393,13 +401,13 @@ static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
  */
 static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size)
 {
-    return pnv_psi_reg_read(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, true);
+    return pnv_psi_reg_read(opaque, PSIHB_REG(addr), true);
 }
 
 static void pnv_psi_mmio_write(void *opaque, hwaddr addr,
                               uint64_t val, unsigned size)
 {
-    pnv_psi_reg_write(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, val, true);
+    pnv_psi_reg_write(opaque, PSIHB_REG(addr), val, true);
 }
 
 static const MemoryRegionOps psi_mmio_ops = {
@@ -441,7 +449,7 @@ static const MemoryRegionOps pnv_psi_xscom_ops = {
     }
 };
 
-static void pnv_psi_init(Object *obj)
+static void pnv_psi_power8_instance_init(Object *obj)
 {
     PnvPsi *psi = PNV_PSI(obj);
 
@@ -458,7 +466,7 @@ static const uint8_t irq_to_xivr[] = {
     PSIHB_XSCOM_XIVR_EXT,
 };
 
-static void pnv_psi_realize(DeviceState *dev, Error **errp)
+static void pnv_psi_power8_realize(DeviceState *dev, Error **errp)
 {
     PnvPsi *psi = PNV_PSI(dev);
     ICSState *ics = &psi->ics;
@@ -510,28 +518,34 @@ static void pnv_psi_realize(DeviceState *dev, Error **errp)
     }
 }
 
+static const char compat_p8[] = "ibm,power8-psihb-x\0ibm,psihb-x";
+static const char compat_p9[] = "ibm,power9-psihb-x\0ibm,psihb-x";
+
 static int pnv_psi_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset)
 {
-    const char compat[] = "ibm,power8-psihb-x\0ibm,psihb-x";
+    PnvPsiClass *ppc = PNV_PSI_GET_CLASS(dev);
     char *name;
     int offset;
-    uint32_t lpc_pcba = PNV_XSCOM_PSIHB_BASE;
     uint32_t reg[] = {
-        cpu_to_be32(lpc_pcba),
-        cpu_to_be32(PNV_XSCOM_PSIHB_SIZE)
+        cpu_to_be32(ppc->xscom_pcba),
+        cpu_to_be32(ppc->xscom_size)
     };
 
-    name = g_strdup_printf("psihb@%x", lpc_pcba);
+    name = g_strdup_printf("psihb@%x", ppc->xscom_pcba);
     offset = fdt_add_subnode(fdt, xscom_offset, name);
     _FDT(offset);
     g_free(name);
 
-    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
-
-    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
-    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
-    _FDT((fdt_setprop(fdt, offset, "compatible", compat,
-                      sizeof(compat))));
+    _FDT(fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)));
+    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", 2));
+    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", 1));
+    if (ppc->chip_type == PNV_CHIP_POWER9) {
+        _FDT(fdt_setprop(fdt, offset, "compatible", compat_p9,
+                         sizeof(compat_p9)));
+    } else {
+        _FDT(fdt_setprop(fdt, offset, "compatible", compat_p8,
+                         sizeof(compat_p8)));
+    }
     return 0;
 }
 
@@ -541,6 +555,324 @@ static Property pnv_psi_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static void pnv_psi_power8_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+
+    dc->desc    = "PowerNV PSI Controller POWER8";
+    dc->realize = pnv_psi_power8_realize;
+
+    ppc->chip_type =  PNV_CHIP_POWER8;
+    ppc->xscom_pcba = PNV_XSCOM_PSIHB_BASE;
+    ppc->xscom_size = PNV_XSCOM_PSIHB_SIZE;
+    ppc->irq_set    = pnv_psi_power8_irq_set;
+}
+
+static const TypeInfo pnv_psi_power8_info = {
+    .name          = TYPE_PNV_PSI_POWER8,
+    .parent        = TYPE_PNV_PSI,
+    .instance_init = pnv_psi_power8_instance_init,
+    .class_init    = pnv_psi_power8_class_init,
+};
+
+/* Common registers */
+
+#define PSIHB9_CR                       0x20
+#define PSIHB9_SEMR                     0x28
+
+/* P9 registers */
+
+#define PSIHB9_INTERRUPT_CONTROL        0x58
+#define   PSIHB9_IRQ_METHOD             PPC_BIT(0)
+#define   PSIHB9_IRQ_RESET              PPC_BIT(1)
+#define PSIHB9_ESB_CI_BASE              0x60
+#define   PSIHB9_ESB_CI_VALID           1
+#define PSIHB9_ESB_NOTIF_ADDR           0x68
+#define   PSIHB9_ESB_NOTIF_VALID        1
+#define PSIHB9_IVT_OFFSET               0x70
+#define   PSIHB9_IVT_OFF_SHIFT          32
+
+#define PSIHB9_IRQ_LEVEL                0x78 /* assertion */
+#define   PSIHB9_IRQ_LEVEL_PSI          PPC_BIT(0)
+#define   PSIHB9_IRQ_LEVEL_OCC          PPC_BIT(1)
+#define   PSIHB9_IRQ_LEVEL_FSI          PPC_BIT(2)
+#define   PSIHB9_IRQ_LEVEL_LPCHC        PPC_BIT(3)
+#define   PSIHB9_IRQ_LEVEL_LOCAL_ERR    PPC_BIT(4)
+#define   PSIHB9_IRQ_LEVEL_GLOBAL_ERR   PPC_BIT(5)
+#define   PSIHB9_IRQ_LEVEL_TPM          PPC_BIT(6)
+#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ1    PPC_BIT(7)
+#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ2    PPC_BIT(8)
+#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ3    PPC_BIT(9)
+#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ4    PPC_BIT(10)
+#define   PSIHB9_IRQ_LEVEL_SBE_I2C      PPC_BIT(11)
+#define   PSIHB9_IRQ_LEVEL_DIO          PPC_BIT(12)
+#define   PSIHB9_IRQ_LEVEL_PSU          PPC_BIT(13)
+#define   PSIHB9_IRQ_LEVEL_I2C_C        PPC_BIT(14)
+#define   PSIHB9_IRQ_LEVEL_I2C_D        PPC_BIT(15)
+#define   PSIHB9_IRQ_LEVEL_I2C_E        PPC_BIT(16)
+#define   PSIHB9_IRQ_LEVEL_SBE          PPC_BIT(19)
+
+#define PSIHB9_IRQ_STAT                 0x80 /* P bit */
+#define   PSIHB9_IRQ_STAT_PSI           PPC_BIT(0)
+#define   PSIHB9_IRQ_STAT_OCC           PPC_BIT(1)
+#define   PSIHB9_IRQ_STAT_FSI           PPC_BIT(2)
+#define   PSIHB9_IRQ_STAT_LPCHC         PPC_BIT(3)
+#define   PSIHB9_IRQ_STAT_LOCAL_ERR     PPC_BIT(4)
+#define   PSIHB9_IRQ_STAT_GLOBAL_ERR    PPC_BIT(5)
+#define   PSIHB9_IRQ_STAT_TPM           PPC_BIT(6)
+#define   PSIHB9_IRQ_STAT_LPC_SIRQ1     PPC_BIT(7)
+#define   PSIHB9_IRQ_STAT_LPC_SIRQ2     PPC_BIT(8)
+#define   PSIHB9_IRQ_STAT_LPC_SIRQ3     PPC_BIT(9)
+#define   PSIHB9_IRQ_STAT_LPC_SIRQ4     PPC_BIT(10)
+#define   PSIHB9_IRQ_STAT_SBE_I2C       PPC_BIT(11)
+#define   PSIHB9_IRQ_STAT_DIO           PPC_BIT(12)
+#define   PSIHB9_IRQ_STAT_PSU           PPC_BIT(13)
+
+static void pnv_psi_notify(XiveFabric *xf, uint32_t srcno)
+{
+    PnvPsi *psi = PNV_PSI(xf);
+    uint64_t notif_port = psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
+    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
+    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
+
+    uint32_t offset =
+        (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
+    uint64_t lisn = cpu_to_be64(offset + srcno);
+
+    if (valid) {
+        cpu_physical_memory_write(notify_addr, &lisn, sizeof(lisn));
+    }
+}
+
+/*
+ * TODO : move to parent class
+ */
+static void pnv_psi_reset(DeviceState *dev)
+{
+    PnvPsi *psi = PNV_PSI(dev);
+
+    memset(psi->regs, 0x0, sizeof(psi->regs));
+
+    psi->regs[PSIHB_XSCOM_BAR] = psi->bar | PSIHB_BAR_EN;
+}
+
+static uint64_t pnv_psi_p9_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvPsi *psi = PNV_PSI(opaque);
+    uint32_t reg = PSIHB_REG(addr);
+    uint64_t val = -1;
+
+    switch (addr) {
+    case PSIHB9_CR:
+    case PSIHB9_SEMR:
+        /* FSP stuff */
+    case PSIHB9_INTERRUPT_CONTROL:
+    case PSIHB9_ESB_CI_BASE:
+    case PSIHB9_ESB_NOTIF_ADDR:
+    case PSIHB9_IVT_OFFSET:
+        val = psi->regs[reg];
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: read at 0x%" PRIx64 "\n", addr);
+    }
+
+    return val;
+}
+
+static void pnv_psi_p9_mmio_write(void *opaque, hwaddr addr,
+                                  uint64_t val, unsigned size)
+{
+    PnvPsi *psi = PNV_PSI(opaque);
+    uint32_t reg = PSIHB_REG(addr);
+    MemoryRegion *sysmem = get_system_memory();
+
+    switch (addr) {
+    case PSIHB9_CR:
+    case PSIHB9_SEMR:
+        /* FSP stuff */
+        break;
+    case PSIHB9_INTERRUPT_CONTROL:
+        if (val & PSIHB9_IRQ_RESET) {
+            device_reset(DEVICE(&psi->source));
+        }
+        psi->regs[reg] = val;
+        break;
+
+    case PSIHB9_ESB_CI_BASE:
+        if (!(val & PSIHB9_ESB_CI_VALID)) {
+            if (psi->regs[reg] & PSIHB9_ESB_CI_VALID) {
+                memory_region_del_subregion(sysmem, &psi->source.esb_mmio);
+            }
+        } else {
+            if (!(psi->regs[reg] & PSIHB9_ESB_CI_VALID)) {
+                memory_region_add_subregion(sysmem,
+                                        val & ~PSIHB9_ESB_CI_VALID,
+                                        &psi->source.esb_mmio);
+            }
+        }
+        psi->regs[reg] = val;
+        break;
+
+    case PSIHB9_ESB_NOTIF_ADDR:
+        psi->regs[reg] = val;
+        break;
+    case PSIHB9_IVT_OFFSET:
+        psi->regs[reg] = val;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: write at 0x%" PRIx64 "\n", addr);
+    }
+}
+
+static const MemoryRegionOps pnv_psi_p9_mmio_ops = {
+    .read = pnv_psi_p9_mmio_read,
+    .write = pnv_psi_p9_mmio_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+};
+
+static uint64_t pnv_psi_p9_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+    /* No read are expected */
+    qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom read at 0x%" PRIx64 "\n", addr);
+    return -1;
+}
+
+static void pnv_psi_p9_xscom_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    PnvPsi *psi = PNV_PSI(opaque);
+
+    /* XSCOM is only used to set the PSIHB MMIO region */
+    switch (addr >> 3) {
+    case PSIHB_XSCOM_BAR:
+        pnv_psi_set_bar(psi, val);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom write at 0x%" PRIx64 "\n",
+                      addr);
+    }
+}
+
+static const MemoryRegionOps pnv_psi_p9_xscom_ops = {
+    .read = pnv_psi_p9_xscom_read,
+    .write = pnv_psi_p9_xscom_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    }
+};
+
+static void pnv_psi_power9_irq_set(PnvPsi *psi, int irq, bool state)
+{
+    uint32_t irq_method = psi->regs[PSIHB_REG(PSIHB9_INTERRUPT_CONTROL)];
+
+    if (irq > PSIHB9_NUM_IRQS) {
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq);
+        return;
+    }
+
+    if (irq_method & PSIHB9_IRQ_METHOD) {
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: LSI IRQ method no supported\n");
+        return;
+    }
+
+    /* Update LSI levels */
+    if (state) {
+        psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] |= PPC_BIT(irq);
+    } else {
+        psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] &= ~PPC_BIT(irq);
+    }
+
+    qemu_set_irq(xive_source_qirq(&psi->source, irq), state);
+}
+
+static void pnv_psi_power9_instance_init(Object *obj)
+{
+    PnvPsi *psi = PNV_PSI(obj);
+
+    object_initialize(&psi->source, sizeof(psi->source), TYPE_XIVE_SOURCE);
+    object_property_add_child(obj, "source", OBJECT(&psi->source), NULL);
+}
+
+static void pnv_psi_power9_realize(DeviceState *dev, Error **errp)
+{
+    PnvPsi *psi = PNV_PSI(dev);
+    XiveSource *xsrc = &psi->source;
+    Error *local_err = NULL;
+    int i;
+
+    /* This is the only device with 4k ESB pages */
+    object_property_set_int(OBJECT(xsrc), XIVE_ESB_4K, "shift",
+                            &error_fatal);
+    object_property_set_int(OBJECT(xsrc), PSIHB9_NUM_IRQS, "nr-irqs",
+                            &error_fatal);
+    object_property_add_const_link(OBJECT(xsrc), "xive", OBJECT(psi),
+                                   &error_fatal);
+    object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    qdev_set_parent_bus(DEVICE(xsrc), sysbus_get_default());
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        xive_source_irq_set(xsrc, i, true);
+    }
+
+    /* XSCOM region for PSI registers */
+    pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_p9_xscom_ops,
+                psi, "xscom-psi", PNV9_XSCOM_PSIHB_SIZE);
+
+    /* Initialize MMIO region */
+    memory_region_init_io(&psi->regs_mr, OBJECT(dev), &pnv_psi_p9_mmio_ops, psi,
+                          "psihb", PNV9_PSIHB_SIZE);
+
+    /* Default BAR for MMIO region */
+    pnv_psi_set_bar(psi, psi->bar | PSIHB_BAR_EN);
+}
+
+static void pnv_psi_power9_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
+
+    dc->desc    = "PowerNV PSI Controller POWER9";
+    dc->realize = pnv_psi_power9_realize;
+
+    ppc->chip_type  = PNV_CHIP_POWER9;
+    ppc->xscom_pcba = PNV9_XSCOM_PSIHB_BASE;
+    ppc->xscom_size = PNV9_XSCOM_PSIHB_SIZE;
+    ppc->irq_set    = pnv_psi_power9_irq_set;
+
+    xfc->notify      = pnv_psi_notify;
+}
+
+static const TypeInfo pnv_psi_power9_info = {
+    .name          = TYPE_PNV_PSI_POWER9,
+    .parent        = TYPE_PNV_PSI,
+    .instance_init = pnv_psi_power9_instance_init,
+    .class_init    = pnv_psi_power9_class_init,
+    .interfaces = (InterfaceInfo[]) {
+            { TYPE_XIVE_FABRIC },
+            { },
+    },
+};
+
 static void pnv_psi_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -548,16 +880,18 @@ static void pnv_psi_class_init(ObjectClass *klass, void *data)
 
     xdc->dt_xscom = pnv_psi_dt_xscom;
 
-    dc->realize = pnv_psi_realize;
+    dc->desc = "PowerNV PSI Controller";
     dc->props = pnv_psi_properties;
+    dc->reset  = pnv_psi_reset;
 }
 
 static const TypeInfo pnv_psi_info = {
     .name          = TYPE_PNV_PSI,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(PnvPsi),
-    .instance_init = pnv_psi_init,
     .class_init    = pnv_psi_class_init,
+    .class_size    = sizeof(PnvPsiClass),
+    .abstract      = true,
     .interfaces    = (InterfaceInfo[]) {
         { TYPE_PNV_XSCOM_INTERFACE },
         { }
@@ -567,6 +901,18 @@ static const TypeInfo pnv_psi_info = {
 static void pnv_psi_register_types(void)
 {
     type_register_static(&pnv_psi_info);
+    type_register_static(&pnv_psi_power8_info);
+    type_register_static(&pnv_psi_power9_info);
 }
 
 type_init(pnv_psi_register_types)
+
+void pnv_psi_pic_print_info(PnvPsi *psi, Monitor *mon)
+{
+    uint32_t offset =
+        (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
+
+    monitor_printf(mon, "PSIHB Source %08x .. %08x\n",
+                  offset, offset + psi->source.nr_irqs - 1);
+    xive_source_pic_print_info(&psi->source, offset, mon);
+}
Cédric Le Goater Nov. 22, 2018, 7:59 a.m. UTC | #5
On 11/22/18 7:50 AM, Benjamin Herrenschmidt wrote:
> On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
>>
>> Sorry, didn't think of this in my first reply.
>>
>> 1) Does the hardware ever actually write back to the EAS?  I know it
>> does for the END, but it's not clear why it would need to for the
>> EAS.  If not, we don't need the setter.
> 
> Nope, though the PAPR model will via hcalls

Indeed. The H_INT_SET_SOURCE_CONFIG hcall updates the EAT.

>> 2) The signatures are a bit odd here.  For the setter, a value would
>> make sense than a (XiveEAS *), since it's just a word.  For the getter
>> you could return the EAS value directly rather than using a pointer -
>> there's already a valid bit in the EAS so you can construct a value
>> with that cleared if the lisn is out of bounds.

Yes we could. I think I made it that way to be consistent with the 
other XIVE internal structures which are bigger : END, NVT

There might be other reasons in Pnv. One was to use generic accessors 
to the guest RAM but I didn't do it finally. Take a look at the Pnv
model and we might decide to change the prototype then. I don't 
think it's a major change.

Thanks,

C.
David Gibson Nov. 23, 2018, 1:10 a.m. UTC | #6
On Thu, Nov 22, 2018 at 05:50:07PM +1100, Benjamin Herrenschmidt wrote:
> On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
> > 
> > Sorry, didn't think of this in my first reply.
> > 
> > 1) Does the hardware ever actually write back to the EAS?  I know it
> > does for the END, but it's not clear why it would need to for the
> > EAS.  If not, we don't need the setter.
> 
> Nope, though the PAPR model will via hcalls

Right, bit AIUI the set_eas hook is about abstracting PAPR vs bare
metal details.  Since the hcall knows it's PAPR it can just update the
backing information for the EAS directly, and no need for an
abstracted hook.

> 
> > 
> > 2) The signatures are a bit odd here.  For the setter, a value would
> > make sense than a (XiveEAS *), since it's just a word.  For the getter
> > you could return the EAS value directly rather than using a pointer -
> > there's already a valid bit in the EAS so you can construct a value
> > with that cleared if the lisn is out of bounds.
>
David Gibson Nov. 23, 2018, 1:17 a.m. UTC | #7
On Thu, Nov 22, 2018 at 08:59:32AM +0100, Cédric Le Goater wrote:
> On 11/22/18 7:50 AM, Benjamin Herrenschmidt wrote:
> > On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
> >>
> >> Sorry, didn't think of this in my first reply.
> >>
> >> 1) Does the hardware ever actually write back to the EAS?  I know it
> >> does for the END, but it's not clear why it would need to for the
> >> EAS.  If not, we don't need the setter.
> > 
> > Nope, though the PAPR model will via hcalls
> 
> Indeed. The H_INT_SET_SOURCE_CONFIG hcall updates the EAT.
> 
> >> 2) The signatures are a bit odd here.  For the setter, a value would
> >> make sense than a (XiveEAS *), since it's just a word.  For the getter
> >> you could return the EAS value directly rather than using a pointer -
> >> there's already a valid bit in the EAS so you can construct a value
> >> with that cleared if the lisn is out of bounds.
> 
> Yes we could. I think I made it that way to be consistent with the 
> other XIVE internal structures which are bigger : END, NVT

Yeah, but as noted elsewhere I don't really like the get/set model for
the bigger-than-word-size structures.  It gives the impression that
they're atomic updates when they can't be, as well as unnecessarily
copying a bunch of stuff, sometimes on hot paths

> There might be other reasons in Pnv. One was to use generic accessors 
> to the guest RAM but I didn't do it finally. Take a look at the Pnv
> model and we might decide to change the prototype then. I don't 
> think it's a major change.

Hmmm.
David Gibson Nov. 23, 2018, 3:50 a.m. UTC | #8
On Thu, Nov 22, 2018 at 08:53:00AM +0100, Cédric Le Goater wrote:
> On 11/22/18 5:11 AM, David Gibson wrote:
> > On Fri, Nov 16, 2018 at 11:56:57AM +0100, Cédric Le Goater wrote:
> >> The XiveRouter models the second sub-engine of the overall XIVE
> >> architecture : the Interrupt Virtualization Routing Engine (IVRE).
> >>
> >> The IVRE handles event notifications of the IVSE through MMIO stores
> >> and performs the interrupt routing process. For this purpose, it uses
> >> a set of table stored in system memory, the first of which being the
> >> Event Assignment Structure (EAS) table.
> >>
> >> The EAT associates an interrupt source number with an Event Notification
> >> Descriptor (END) which will be used in a second phase of the routing
> >> process to identify a Notification Virtual Target.
> >>
> >> The XiveRouter is an abstract class which needs to be inherited from
> >> to define a storage for the EAT, and other upcoming tables. The
> >> 'chip-id' atttribute is not strictly necessary for the sPAPR and
> >> PowerNV machines but it's a good way to test the routing algorithm.
> >> Without this atttribute, the XiveRouter could be a simple QOM
> >> interface.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  include/hw/ppc/xive.h      | 32 ++++++++++++++
> >>  include/hw/ppc/xive_regs.h | 31 ++++++++++++++
> >>  hw/intc/xive.c             | 86 ++++++++++++++++++++++++++++++++++++++
> >>  3 files changed, 149 insertions(+)
> >>  create mode 100644 include/hw/ppc/xive_regs.h
> >>
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index be93fae6317b..5a0696366577 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -11,6 +11,7 @@
> >>  #define PPC_XIVE_H
> >>  
> >>  #include "hw/sysbus.h"
> > 
> > Again, I don't think making this a SysBusDevice is quite right.
> > Even more so for the router than the source, because at least for PAPR
> > it might not have any MMIO presence at all.
> 
> The controller model inherits from the XiveRouter and manages the
> TIMA.

Um.. I'm not sure what you mean by the "controller model".  Surely the
presenter should own the TIMA, not the router?

> 
> >> +#include "hw/ppc/xive_regs.h"
> >>  
> >>  /*
> >>   * XIVE Fabric (Interface between Source and Router)
> >> @@ -168,4 +169,35 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
> >>      }
> >>  }
> >>  
> >> +/*
> >> + * XIVE Router
> >> + */
> >> +
> >> +typedef struct XiveRouter {
> >> +    SysBusDevice    parent;
> >> +
> >> +    uint32_t        chip_id;
> > 
> > I don't think this belongs in the base class.  The PowerNV specific
> > variants will need it, but it doesn't make sense for the PAPR version.
> 
> yeah. I am using it as a END and NVT block identifier but it's not 
> required for sPAPR, it could just be zero. 
> 
> It was good to test the routing algo which should not assume that the 
> block id is zero. 
>  
> > 
> >> +} XiveRouter;
> >> +
> >> +#define TYPE_XIVE_ROUTER "xive-router"
> >> +#define XIVE_ROUTER(obj)                                \
> >> +    OBJECT_CHECK(XiveRouter, (obj), TYPE_XIVE_ROUTER)
> >> +#define XIVE_ROUTER_CLASS(klass)                                        \
> >> +    OBJECT_CLASS_CHECK(XiveRouterClass, (klass), TYPE_XIVE_ROUTER)
> >> +#define XIVE_ROUTER_GET_CLASS(obj)                              \
> >> +    OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
> >> +
> >> +typedef struct XiveRouterClass {
> >> +    SysBusDeviceClass parent;
> >> +
> >> +    /* XIVE table accessors */
> >> +    int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> >> +    int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> >> +} XiveRouterClass;
> >> +
> >> +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
> >> +
> >> +int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> >> +int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> >> +
> >>  #endif /* PPC_XIVE_H */
> >> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
> >> new file mode 100644
> >> index 000000000000..12499b33614c
> >> --- /dev/null
> >> +++ b/include/hw/ppc/xive_regs.h
> >> @@ -0,0 +1,31 @@
> >> +/*
> >> + * QEMU PowerPC XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2016-2018, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> >> + */
> >> +
> >> +#ifndef PPC_XIVE_REGS_H
> >> +#define PPC_XIVE_REGS_H
> >> +
> >> +/* EAS (Event Assignment Structure)
> >> + *
> >> + * One per interrupt source. Targets an interrupt to a given Event
> >> + * Notification Descriptor (END) and provides the corresponding
> >> + * logical interrupt number (END data)
> >> + */
> >> +typedef struct XiveEAS {
> >> +        /* Use a single 64-bit definition to make it easier to
> >> +         * perform atomic updates
> >> +         */
> >> +        uint64_t        w;
> >> +#define EAS_VALID       PPC_BIT(0)
> >> +#define EAS_END_BLOCK   PPC_BITMASK(4, 7)        /* Destination END block# */
> >> +#define EAS_END_INDEX   PPC_BITMASK(8, 31)       /* Destination END index */
> >> +#define EAS_MASKED      PPC_BIT(32)              /* Masked */
> >> +#define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
> >> +} XiveEAS;
> >> +
> >> +#endif /* PPC_XIVE_REGS_H */
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> index 014a2e41f71f..c4c90a25758e 100644
> >> --- a/hw/intc/xive.c
> >> +++ b/hw/intc/xive.c
> >> @@ -442,6 +442,91 @@ static const TypeInfo xive_source_info = {
> >>      .class_init    = xive_source_class_init,
> >>  };
> >>  
> >> +/*
> >> + * XIVE Router (aka. Virtualization Controller or IVRE)
> >> + */
> >> +
> >> +int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
> >> +{
> >> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
> >> +
> >> +    return xrc->get_eas(xrtr, lisn, eas);
> >> +}
> >> +
> >> +int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
> >> +{
> >> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
> >> +
> >> +    return xrc->set_eas(xrtr, lisn, eas);
> >> +}
> >> +
> >> +static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
> >> +{
> >> +    XiveRouter *xrtr = XIVE_ROUTER(xf);
> >> +    XiveEAS eas;
> >> +
> >> +    /* EAS cache lookup */
> >> +    if (xive_router_get_eas(xrtr, lisn, &eas)) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
> >> +        return;
> >> +    }
> > 
> > AFAICT a bad LISN here means a qemu error (in the source, probably),
> > not a user or guest error, so an assert() would be more appropriate.
> 
> hmm, I would say no because in the case of PowerNV, the firmware could
> have badly configured the ISN offset of a source which would notify the 
> router with a bad notification event data.

Ah, good point.  That's fine as it is then.

> >> +
> >> +    /* The IVRE has a State Bit Cache for its internal sources which
> >> +     * is also involed at this point. We skip the SBC lookup because
> >> +     * the state bits of the sources are modeled internally in QEMU.
> >> +     */
> >> +
> >> +    if (!(eas.w & EAS_VALID)) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
> >> +        return;
> >> +    }
> >> +
> >> +    if (eas.w & EAS_MASKED) {
> >> +        /* Notification completed */
> >> +        return;
> >> +    }
> >> +}
> >> +
> >> +static Property xive_router_properties[] = {
> >> +    DEFINE_PROP_UINT32("chip-id", XiveRouter, chip_id, 0),
> >> +    DEFINE_PROP_END_OF_LIST(),
> >> +};
> >> +
> >> +static void xive_router_class_init(ObjectClass *klass, void *data)
> >> +{
> >> +    DeviceClass *dc = DEVICE_CLASS(klass);
> >> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
> >> +
> >> +    dc->desc    = "XIVE Router Engine";
> >> +    dc->props   = xive_router_properties;
> >> +    xfc->notify = xive_router_notify;
> >> +}
> >> +
> >> +static const TypeInfo xive_router_info = {
> >> +    .name          = TYPE_XIVE_ROUTER,
> >> +    .parent        = TYPE_SYS_BUS_DEVICE,
> >> +    .abstract      = true,
> >> +    .class_size    = sizeof(XiveRouterClass),
> >> +    .class_init    = xive_router_class_init,
> >> +    .interfaces    = (InterfaceInfo[]) {
> >> +        { TYPE_XIVE_FABRIC },
> > 
> > So as far as I can see so far, the XiveFabric interface will
> > essentially have to be implemented on the router object, so I'm not
> > seeing much point to having the interface rather than just a direct
> > call on the router object.  But I haven't read the whole series yet,
> > so maybe I'm missing something.
> 
> The PSIHB and PHB4 models are using it but there are not in the series.
> 
> I can send the PSIHB patch in the next version if you like, it's the 
> patch right after PnvXive. It's attached below for the moment. Look at 
> pnv_psi_notify().

Hrm, I see.  This seems like a really convoluted way of achieving what
you need here.  We want to abstract exactly how the source delivers
notifies, but doing it with an interface on some object that's not
necessarily either the source or the router seems odd.  At the very
least the names need to change (of both interface and property for the
target object).

> 
> Thanks,
> 
> C.
> 
> >> +        { }
> >> +    }
> >> +};
> >> +
> >> +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
> >> +{
> >> +    if (!(eas->w & EAS_VALID)) {
> >> +        return;
> >> +    }
> >> +
> >> +    monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
> >> +                   lisn, eas->w & EAS_MASKED ? "M" : " ",
> >> +                   (uint8_t)  GETFIELD(EAS_END_BLOCK, eas->w),
> >> +                   (uint32_t) GETFIELD(EAS_END_INDEX, eas->w),
> >> +                   (uint32_t) GETFIELD(EAS_END_DATA, eas->w));
> >> +}
> >> +
> >>  /*
> >>   * XIVE Fabric
> >>   */
> >> @@ -455,6 +540,7 @@ static void xive_register_types(void)
> >>  {
> >>      type_register_static(&xive_source_info);
> >>      type_register_static(&xive_fabric_info);
> >> +    type_register_static(&xive_router_info);
> >>  }
> >>  
> >>  type_init(xive_register_types)
> > 
> 

> >From 680fd6ff7c99e669708fbc5cfdbfcd95e83e7c07 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
> Date: Wed, 21 Nov 2018 10:29:45 +0100
> Subject: [PATCH] ppc/pnv: add a PSI bridge model for POWER9 processor
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> The PSI bridge on POWER9 is very similar to POWER8. The BAR is still
> set through XSCOM but the controls are now entirely done with MMIOs.
> More interrupts are defined and the interrupt controller interface has
> changed to XIVE. The POWER9 model is a first example of the usage of
> the notify() handler of the XiveFabric interface, linking the PSI
> XiveSource to its owning device model.
> 
> Signed-off-by: C??dric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/pnv.h       |   6 +
>  include/hw/ppc/pnv_psi.h   |  50 ++++-
>  include/hw/ppc/pnv_xscom.h |   3 +
>  hw/ppc/pnv.c               |  20 +-
>  hw/ppc/pnv_psi.c           | 390 ++++++++++++++++++++++++++++++++++---
>  5 files changed, 444 insertions(+), 25 deletions(-)
> 
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> index c402e5d5844b..8be1147481f9 100644
> --- a/include/hw/ppc/pnv.h
> +++ b/include/hw/ppc/pnv.h
> @@ -88,6 +88,7 @@ typedef struct Pnv9Chip {
>  
>      /*< public >*/
>      PnvXive      xive;
> +    PnvPsi       psi;
>  } Pnv9Chip;
>  
>  typedef struct PnvChipClass {
> @@ -250,11 +251,16 @@ void pnv_bmc_powerdown(IPMIBmc *bmc);
>  #define PNV9_XIVE_PC_SIZE            0x0000001000000000ull
>  #define PNV9_XIVE_PC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006018000000000ull)
>  
> +#define PNV9_PSIHB_SIZE              0x0000000000100000ull
> +#define PNV9_PSIHB_BASE(chip)        PNV9_CHIP_BASE(chip, 0x0006030203000000ull)
> +
>  #define PNV9_XIVE_IC_SIZE            0x0000000000080000ull
>  #define PNV9_XIVE_IC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006030203100000ull)
>  
>  #define PNV9_XIVE_TM_SIZE            0x0000000000040000ull
>  #define PNV9_XIVE_TM_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006030203180000ull)
>  
> +#define PNV9_PSIHB_ESB_SIZE          0x0000000000010000ull
> +#define PNV9_PSIHB_ESB_BASE(chip)    PNV9_CHIP_BASE(chip, 0x00060302031c0000ull)
>  
>  #endif /* _PPC_PNV_H */
> diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h
> index f6af5eae1fa8..b8f8d082bcf9 100644
> --- a/include/hw/ppc/pnv_psi.h
> +++ b/include/hw/ppc/pnv_psi.h
> @@ -21,10 +21,35 @@
>  
>  #include "hw/sysbus.h"
>  #include "hw/ppc/xics.h"
> +#include "hw/ppc/xive.h"
>  
>  #define TYPE_PNV_PSI "pnv-psi"
>  #define PNV_PSI(obj) \
>       OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI)
> +#define PNV_PSI_CLASS(klass) \
> +     OBJECT_CLASS_CHECK(PnvPsiClass, (klass), TYPE_PNV_PSI)
> +#define PNV_PSI_GET_CLASS(obj) \
> +     OBJECT_GET_CLASS(PnvPsiClass, (obj), TYPE_PNV_PSI)
> +
> +typedef struct PnvPsi PnvPsi;
> +typedef struct PnvChip PnvChip;
> +typedef struct PnvPsiClass {
> +    SysBusDeviceClass parent_class;
> +
> +    int chip_type;
> +    uint32_t xscom_pcba;
> +    uint32_t xscom_size;
> +
> +    void (*irq_set)(PnvPsi *psi, int, bool state);
> +} PnvPsiClass;
> +
> +#define TYPE_PNV_PSI_POWER8 TYPE_PNV_PSI "-POWER8"
> +#define PNV_PSI_POWER8(obj) \
> +    OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI_POWER8)
> +
> +#define TYPE_PNV_PSI_POWER9 TYPE_PNV_PSI "-POWER9"
> +#define PNV_PSI_POWER9(obj) \
> +    OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI_POWER9)
>  
>  #define PSIHB_XSCOM_MAX         0x20
>  
> @@ -38,9 +63,12 @@ typedef struct PnvPsi {
>      /* MemoryRegion fsp_mr; */
>      uint64_t fsp_bar;
>  
> -    /* Interrupt generation */
> +    /* P8 Interrupt generation */
>      ICSState ics;
>  
> +    /* P9 Interrupt generation */
> +    XiveSource source;
> +
>      /* Registers */
>      uint64_t regs[PSIHB_XSCOM_MAX];
>  
> @@ -60,6 +88,24 @@ typedef enum PnvPsiIrq {
>  
>  #define PSI_NUM_INTERRUPTS 6
>  
> -extern void pnv_psi_irq_set(PnvPsi *psi, PnvPsiIrq irq, bool state);
> +/* P9 PSI Interrupts */
> +#define PSIHB9_IRQ_PSI          0
> +#define PSIHB9_IRQ_OCC          1
> +#define PSIHB9_IRQ_FSI          2
> +#define PSIHB9_IRQ_LPCHC        3
> +#define PSIHB9_IRQ_LOCAL_ERR    4
> +#define PSIHB9_IRQ_GLOBAL_ERR   5
> +#define PSIHB9_IRQ_TPM          6
> +#define PSIHB9_IRQ_LPC_SIRQ0    7
> +#define PSIHB9_IRQ_LPC_SIRQ1    8
> +#define PSIHB9_IRQ_LPC_SIRQ2    9
> +#define PSIHB9_IRQ_LPC_SIRQ3    10
> +#define PSIHB9_IRQ_SBE_I2C      11
> +#define PSIHB9_IRQ_DIO          12
> +#define PSIHB9_IRQ_PSU          13
> +#define PSIHB9_NUM_IRQS         14
> +
> +void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state);
> +void pnv_psi_pic_print_info(PnvPsi *psi, Monitor *mon);
>  
>  #endif /* _PPC_PNV_PSI_H */
> diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
> index 5bd43467a1ab..019b45bf9189 100644
> --- a/include/hw/ppc/pnv_xscom.h
> +++ b/include/hw/ppc/pnv_xscom.h
> @@ -82,6 +82,9 @@ typedef struct PnvXScomInterfaceClass {
>  #define PNV_XSCOM_PBCQ_SPCI_BASE  0x9013c00
>  #define PNV_XSCOM_PBCQ_SPCI_SIZE  0x5
>  
> +#define PNV9_XSCOM_PSIHB_BASE     0x5012900
> +#define PNV9_XSCOM_PSIHB_SIZE     0x100
> +
>  #define PNV9_XSCOM_XIVE_BASE      0x5013000
>  #define PNV9_XSCOM_XIVE_SIZE      0x300
>  
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index b6af896c30e4..e67c9d7d3995 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -743,7 +743,7 @@ static void pnv_chip_power8_instance_init(Object *obj)
>      PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
>      int i;
>  
> -    object_initialize(&chip8->psi, sizeof(chip8->psi), TYPE_PNV_PSI);
> +    object_initialize(&chip8->psi, sizeof(chip8->psi), TYPE_PNV_PSI_POWER8);
>      object_property_add_child(obj, "psi", OBJECT(&chip8->psi), NULL);
>      object_property_add_const_link(OBJECT(&chip8->psi), "xics",
>                                     OBJECT(qdev_get_machine()), &error_abort);
> @@ -923,6 +923,11 @@ static void pnv_chip_power9_instance_init(Object *obj)
>      object_property_add_child(obj, "xive", OBJECT(&chip9->xive), NULL);
>      object_property_add_const_link(OBJECT(&chip9->xive), "chip", obj,
>                                     &error_abort);
> +
> +    object_initialize(&chip9->psi, sizeof(chip9->psi), TYPE_PNV_PSI_POWER9);
> +    object_property_add_child(obj, "psi", OBJECT(&chip9->psi), NULL);
> +    object_property_add_const_link(OBJECT(&chip9->psi), "chip", obj,
> +                                   &error_abort);
>  }
>  
>  static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
> @@ -955,6 +960,18 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
>      qdev_set_parent_bus(DEVICE(&chip9->xive), sysbus_get_default());
>      pnv_xscom_add_subregion(chip, PNV9_XSCOM_XIVE_BASE,
>                              &chip9->xive.xscom_regs);
> +
> +    /* Processor Service Interface (PSI) Host Bridge */
> +    object_property_set_int(OBJECT(&chip9->psi), PNV9_PSIHB_BASE(chip),
> +                            "bar", &error_fatal);
> +    object_property_set_bool(OBJECT(&chip9->psi), true, "realized", &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +    qdev_set_parent_bus(DEVICE(&chip9->psi), sysbus_get_default());
> +    pnv_xscom_add_subregion(chip, PNV9_XSCOM_PSIHB_BASE,
> +                            &chip9->psi.xscom_regs);
>  }
>  
>  static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
> @@ -1188,6 +1205,7 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj,
>              Pnv9Chip *chip9 = PNV9_CHIP(chip);
>  
>               pnv_xive_pic_print_info(&chip9->xive, mon);
> +             pnv_psi_pic_print_info(&chip9->psi, mon);
>          } else {
>              Pnv8Chip *chip8 = PNV8_CHIP(chip);
>  
> diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
> index 5b969127c303..8b85dd9555e8 100644
> --- a/hw/ppc/pnv_psi.c
> +++ b/hw/ppc/pnv_psi.c
> @@ -22,6 +22,7 @@
>  #include "target/ppc/cpu.h"
>  #include "qemu/log.h"
>  #include "qapi/error.h"
> +#include "monitor/monitor.h"
>  
>  #include "exec/address-spaces.h"
>  
> @@ -114,12 +115,14 @@
>  #define PSIHB_BAR_MASK                  0x0003fffffff00000ull
>  #define PSIHB_FSPBAR_MASK               0x0003ffff00000000ull
>  
> +#define PSIHB_REG(addr) (((addr) >> 3) + PSIHB_XSCOM_BAR)
> +
>  static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
>  {
>      MemoryRegion *sysmem = get_system_memory();
>      uint64_t old = psi->regs[PSIHB_XSCOM_BAR];
>  
> -    psi->regs[PSIHB_XSCOM_BAR] = bar & (PSIHB_BAR_MASK | PSIHB_BAR_EN);
> +    psi->regs[PSIHB_XSCOM_BAR] = bar;
>  
>      /* Update MR, always remove it first */
>      if (old & PSIHB_BAR_EN) {
> @@ -128,7 +131,7 @@ static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
>  
>      /* Then add it back if needed */
>      if (bar & PSIHB_BAR_EN) {
> -        uint64_t addr = bar & PSIHB_BAR_MASK;
> +        uint64_t addr = bar & ~PSIHB_BAR_EN;
>          memory_region_add_subregion(sysmem, addr, &psi->regs_mr);
>      }
>  }
> @@ -205,7 +208,12 @@ static const uint64_t stat_bits[] = {
>      [PSIHB_IRQ_EXTERNAL]  = PSIHB_IRQ_STAT_EXT,
>  };
>  
> -void pnv_psi_irq_set(PnvPsi *psi, PnvPsiIrq irq, bool state)
> +void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state)
> +{
> +    PNV_PSI_GET_CLASS(psi)->irq_set(psi, irq, state);
> +}
> +
> +static void pnv_psi_power8_irq_set(PnvPsi *psi, int irq, bool state)
>  {
>      ICSState *ics = &psi->ics;
>      uint32_t xivr_reg;
> @@ -324,7 +332,7 @@ static uint64_t pnv_psi_reg_read(PnvPsi *psi, uint32_t offset, bool mmio)
>          val = psi->regs[offset];
>          break;
>      default:
> -        qemu_log_mask(LOG_UNIMP, "PSI: read at Ox%" PRIx32 "\n", offset);
> +        qemu_log_mask(LOG_UNIMP, "PSI: read at 0x%" PRIx32 "\n", offset);
>      }
>      return val;
>  }
> @@ -383,7 +391,7 @@ static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
>          pnv_psi_set_irsn(psi, val);
>          break;
>      default:
> -        qemu_log_mask(LOG_UNIMP, "PSI: write at Ox%" PRIx32 "\n", offset);
> +        qemu_log_mask(LOG_UNIMP, "PSI: write at 0x%" PRIx32 "\n", offset);
>      }
>  }
>  
> @@ -393,13 +401,13 @@ static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
>   */
>  static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size)
>  {
> -    return pnv_psi_reg_read(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, true);
> +    return pnv_psi_reg_read(opaque, PSIHB_REG(addr), true);
>  }
>  
>  static void pnv_psi_mmio_write(void *opaque, hwaddr addr,
>                                uint64_t val, unsigned size)
>  {
> -    pnv_psi_reg_write(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, val, true);
> +    pnv_psi_reg_write(opaque, PSIHB_REG(addr), val, true);
>  }
>  
>  static const MemoryRegionOps psi_mmio_ops = {
> @@ -441,7 +449,7 @@ static const MemoryRegionOps pnv_psi_xscom_ops = {
>      }
>  };
>  
> -static void pnv_psi_init(Object *obj)
> +static void pnv_psi_power8_instance_init(Object *obj)
>  {
>      PnvPsi *psi = PNV_PSI(obj);
>  
> @@ -458,7 +466,7 @@ static const uint8_t irq_to_xivr[] = {
>      PSIHB_XSCOM_XIVR_EXT,
>  };
>  
> -static void pnv_psi_realize(DeviceState *dev, Error **errp)
> +static void pnv_psi_power8_realize(DeviceState *dev, Error **errp)
>  {
>      PnvPsi *psi = PNV_PSI(dev);
>      ICSState *ics = &psi->ics;
> @@ -510,28 +518,34 @@ static void pnv_psi_realize(DeviceState *dev, Error **errp)
>      }
>  }
>  
> +static const char compat_p8[] = "ibm,power8-psihb-x\0ibm,psihb-x";
> +static const char compat_p9[] = "ibm,power9-psihb-x\0ibm,psihb-x";
> +
>  static int pnv_psi_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset)
>  {
> -    const char compat[] = "ibm,power8-psihb-x\0ibm,psihb-x";
> +    PnvPsiClass *ppc = PNV_PSI_GET_CLASS(dev);
>      char *name;
>      int offset;
> -    uint32_t lpc_pcba = PNV_XSCOM_PSIHB_BASE;
>      uint32_t reg[] = {
> -        cpu_to_be32(lpc_pcba),
> -        cpu_to_be32(PNV_XSCOM_PSIHB_SIZE)
> +        cpu_to_be32(ppc->xscom_pcba),
> +        cpu_to_be32(ppc->xscom_size)
>      };
>  
> -    name = g_strdup_printf("psihb@%x", lpc_pcba);
> +    name = g_strdup_printf("psihb@%x", ppc->xscom_pcba);
>      offset = fdt_add_subnode(fdt, xscom_offset, name);
>      _FDT(offset);
>      g_free(name);
>  
> -    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
> -
> -    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
> -    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
> -    _FDT((fdt_setprop(fdt, offset, "compatible", compat,
> -                      sizeof(compat))));
> +    _FDT(fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)));
> +    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", 2));
> +    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", 1));
> +    if (ppc->chip_type == PNV_CHIP_POWER9) {
> +        _FDT(fdt_setprop(fdt, offset, "compatible", compat_p9,
> +                         sizeof(compat_p9)));
> +    } else {
> +        _FDT(fdt_setprop(fdt, offset, "compatible", compat_p8,
> +                         sizeof(compat_p8)));
> +    }
>      return 0;
>  }
>  
> @@ -541,6 +555,324 @@ static Property pnv_psi_properties[] = {
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> +static void pnv_psi_power8_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
> +
> +    dc->desc    = "PowerNV PSI Controller POWER8";
> +    dc->realize = pnv_psi_power8_realize;
> +
> +    ppc->chip_type =  PNV_CHIP_POWER8;
> +    ppc->xscom_pcba = PNV_XSCOM_PSIHB_BASE;
> +    ppc->xscom_size = PNV_XSCOM_PSIHB_SIZE;
> +    ppc->irq_set    = pnv_psi_power8_irq_set;
> +}
> +
> +static const TypeInfo pnv_psi_power8_info = {
> +    .name          = TYPE_PNV_PSI_POWER8,
> +    .parent        = TYPE_PNV_PSI,
> +    .instance_init = pnv_psi_power8_instance_init,
> +    .class_init    = pnv_psi_power8_class_init,
> +};
> +
> +/* Common registers */
> +
> +#define PSIHB9_CR                       0x20
> +#define PSIHB9_SEMR                     0x28
> +
> +/* P9 registers */
> +
> +#define PSIHB9_INTERRUPT_CONTROL        0x58
> +#define   PSIHB9_IRQ_METHOD             PPC_BIT(0)
> +#define   PSIHB9_IRQ_RESET              PPC_BIT(1)
> +#define PSIHB9_ESB_CI_BASE              0x60
> +#define   PSIHB9_ESB_CI_VALID           1
> +#define PSIHB9_ESB_NOTIF_ADDR           0x68
> +#define   PSIHB9_ESB_NOTIF_VALID        1
> +#define PSIHB9_IVT_OFFSET               0x70
> +#define   PSIHB9_IVT_OFF_SHIFT          32
> +
> +#define PSIHB9_IRQ_LEVEL                0x78 /* assertion */
> +#define   PSIHB9_IRQ_LEVEL_PSI          PPC_BIT(0)
> +#define   PSIHB9_IRQ_LEVEL_OCC          PPC_BIT(1)
> +#define   PSIHB9_IRQ_LEVEL_FSI          PPC_BIT(2)
> +#define   PSIHB9_IRQ_LEVEL_LPCHC        PPC_BIT(3)
> +#define   PSIHB9_IRQ_LEVEL_LOCAL_ERR    PPC_BIT(4)
> +#define   PSIHB9_IRQ_LEVEL_GLOBAL_ERR   PPC_BIT(5)
> +#define   PSIHB9_IRQ_LEVEL_TPM          PPC_BIT(6)
> +#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ1    PPC_BIT(7)
> +#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ2    PPC_BIT(8)
> +#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ3    PPC_BIT(9)
> +#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ4    PPC_BIT(10)
> +#define   PSIHB9_IRQ_LEVEL_SBE_I2C      PPC_BIT(11)
> +#define   PSIHB9_IRQ_LEVEL_DIO          PPC_BIT(12)
> +#define   PSIHB9_IRQ_LEVEL_PSU          PPC_BIT(13)
> +#define   PSIHB9_IRQ_LEVEL_I2C_C        PPC_BIT(14)
> +#define   PSIHB9_IRQ_LEVEL_I2C_D        PPC_BIT(15)
> +#define   PSIHB9_IRQ_LEVEL_I2C_E        PPC_BIT(16)
> +#define   PSIHB9_IRQ_LEVEL_SBE          PPC_BIT(19)
> +
> +#define PSIHB9_IRQ_STAT                 0x80 /* P bit */
> +#define   PSIHB9_IRQ_STAT_PSI           PPC_BIT(0)
> +#define   PSIHB9_IRQ_STAT_OCC           PPC_BIT(1)
> +#define   PSIHB9_IRQ_STAT_FSI           PPC_BIT(2)
> +#define   PSIHB9_IRQ_STAT_LPCHC         PPC_BIT(3)
> +#define   PSIHB9_IRQ_STAT_LOCAL_ERR     PPC_BIT(4)
> +#define   PSIHB9_IRQ_STAT_GLOBAL_ERR    PPC_BIT(5)
> +#define   PSIHB9_IRQ_STAT_TPM           PPC_BIT(6)
> +#define   PSIHB9_IRQ_STAT_LPC_SIRQ1     PPC_BIT(7)
> +#define   PSIHB9_IRQ_STAT_LPC_SIRQ2     PPC_BIT(8)
> +#define   PSIHB9_IRQ_STAT_LPC_SIRQ3     PPC_BIT(9)
> +#define   PSIHB9_IRQ_STAT_LPC_SIRQ4     PPC_BIT(10)
> +#define   PSIHB9_IRQ_STAT_SBE_I2C       PPC_BIT(11)
> +#define   PSIHB9_IRQ_STAT_DIO           PPC_BIT(12)
> +#define   PSIHB9_IRQ_STAT_PSU           PPC_BIT(13)
> +
> +static void pnv_psi_notify(XiveFabric *xf, uint32_t srcno)
> +{
> +    PnvPsi *psi = PNV_PSI(xf);
> +    uint64_t notif_port = psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
> +    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
> +    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
> +
> +    uint32_t offset =
> +        (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
> +    uint64_t lisn = cpu_to_be64(offset + srcno);
> +
> +    if (valid) {
> +        cpu_physical_memory_write(notify_addr, &lisn, sizeof(lisn));
> +    }
> +}
> +
> +/*
> + * TODO : move to parent class
> + */
> +static void pnv_psi_reset(DeviceState *dev)
> +{
> +    PnvPsi *psi = PNV_PSI(dev);
> +
> +    memset(psi->regs, 0x0, sizeof(psi->regs));
> +
> +    psi->regs[PSIHB_XSCOM_BAR] = psi->bar | PSIHB_BAR_EN;
> +}
> +
> +static uint64_t pnv_psi_p9_mmio_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +    PnvPsi *psi = PNV_PSI(opaque);
> +    uint32_t reg = PSIHB_REG(addr);
> +    uint64_t val = -1;
> +
> +    switch (addr) {
> +    case PSIHB9_CR:
> +    case PSIHB9_SEMR:
> +        /* FSP stuff */
> +    case PSIHB9_INTERRUPT_CONTROL:
> +    case PSIHB9_ESB_CI_BASE:
> +    case PSIHB9_ESB_NOTIF_ADDR:
> +    case PSIHB9_IVT_OFFSET:
> +        val = psi->regs[reg];
> +        break;
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: read at 0x%" PRIx64 "\n", addr);
> +    }
> +
> +    return val;
> +}
> +
> +static void pnv_psi_p9_mmio_write(void *opaque, hwaddr addr,
> +                                  uint64_t val, unsigned size)
> +{
> +    PnvPsi *psi = PNV_PSI(opaque);
> +    uint32_t reg = PSIHB_REG(addr);
> +    MemoryRegion *sysmem = get_system_memory();
> +
> +    switch (addr) {
> +    case PSIHB9_CR:
> +    case PSIHB9_SEMR:
> +        /* FSP stuff */
> +        break;
> +    case PSIHB9_INTERRUPT_CONTROL:
> +        if (val & PSIHB9_IRQ_RESET) {
> +            device_reset(DEVICE(&psi->source));
> +        }
> +        psi->regs[reg] = val;
> +        break;
> +
> +    case PSIHB9_ESB_CI_BASE:
> +        if (!(val & PSIHB9_ESB_CI_VALID)) {
> +            if (psi->regs[reg] & PSIHB9_ESB_CI_VALID) {
> +                memory_region_del_subregion(sysmem, &psi->source.esb_mmio);
> +            }
> +        } else {
> +            if (!(psi->regs[reg] & PSIHB9_ESB_CI_VALID)) {
> +                memory_region_add_subregion(sysmem,
> +                                        val & ~PSIHB9_ESB_CI_VALID,
> +                                        &psi->source.esb_mmio);
> +            }
> +        }
> +        psi->regs[reg] = val;
> +        break;
> +
> +    case PSIHB9_ESB_NOTIF_ADDR:
> +        psi->regs[reg] = val;
> +        break;
> +    case PSIHB9_IVT_OFFSET:
> +        psi->regs[reg] = val;
> +        break;
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: write at 0x%" PRIx64 "\n", addr);
> +    }
> +}
> +
> +static const MemoryRegionOps pnv_psi_p9_mmio_ops = {
> +    .read = pnv_psi_p9_mmio_read,
> +    .write = pnv_psi_p9_mmio_write,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +    .valid = {
> +        .min_access_size = 8,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .min_access_size = 8,
> +        .max_access_size = 8,
> +    },
> +};
> +
> +static uint64_t pnv_psi_p9_xscom_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +    /* No read are expected */
> +    qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom read at 0x%" PRIx64 "\n", addr);
> +    return -1;
> +}
> +
> +static void pnv_psi_p9_xscom_write(void *opaque, hwaddr addr,
> +                                uint64_t val, unsigned size)
> +{
> +    PnvPsi *psi = PNV_PSI(opaque);
> +
> +    /* XSCOM is only used to set the PSIHB MMIO region */
> +    switch (addr >> 3) {
> +    case PSIHB_XSCOM_BAR:
> +        pnv_psi_set_bar(psi, val);
> +        break;
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom write at 0x%" PRIx64 "\n",
> +                      addr);
> +    }
> +}
> +
> +static const MemoryRegionOps pnv_psi_p9_xscom_ops = {
> +    .read = pnv_psi_p9_xscom_read,
> +    .write = pnv_psi_p9_xscom_write,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +    .valid = {
> +        .min_access_size = 8,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .min_access_size = 8,
> +        .max_access_size = 8,
> +    }
> +};
> +
> +static void pnv_psi_power9_irq_set(PnvPsi *psi, int irq, bool state)
> +{
> +    uint32_t irq_method = psi->regs[PSIHB_REG(PSIHB9_INTERRUPT_CONTROL)];
> +
> +    if (irq > PSIHB9_NUM_IRQS) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq);
> +        return;
> +    }
> +
> +    if (irq_method & PSIHB9_IRQ_METHOD) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: LSI IRQ method no supported\n");
> +        return;
> +    }
> +
> +    /* Update LSI levels */
> +    if (state) {
> +        psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] |= PPC_BIT(irq);
> +    } else {
> +        psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] &= ~PPC_BIT(irq);
> +    }
> +
> +    qemu_set_irq(xive_source_qirq(&psi->source, irq), state);
> +}
> +
> +static void pnv_psi_power9_instance_init(Object *obj)
> +{
> +    PnvPsi *psi = PNV_PSI(obj);
> +
> +    object_initialize(&psi->source, sizeof(psi->source), TYPE_XIVE_SOURCE);
> +    object_property_add_child(obj, "source", OBJECT(&psi->source), NULL);
> +}
> +
> +static void pnv_psi_power9_realize(DeviceState *dev, Error **errp)
> +{
> +    PnvPsi *psi = PNV_PSI(dev);
> +    XiveSource *xsrc = &psi->source;
> +    Error *local_err = NULL;
> +    int i;
> +
> +    /* This is the only device with 4k ESB pages */
> +    object_property_set_int(OBJECT(xsrc), XIVE_ESB_4K, "shift",
> +                            &error_fatal);
> +    object_property_set_int(OBJECT(xsrc), PSIHB9_NUM_IRQS, "nr-irqs",
> +                            &error_fatal);
> +    object_property_add_const_link(OBJECT(xsrc), "xive", OBJECT(psi),
> +                                   &error_fatal);
> +    object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +    qdev_set_parent_bus(DEVICE(xsrc), sysbus_get_default());
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        xive_source_irq_set(xsrc, i, true);
> +    }
> +
> +    /* XSCOM region for PSI registers */
> +    pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_p9_xscom_ops,
> +                psi, "xscom-psi", PNV9_XSCOM_PSIHB_SIZE);
> +
> +    /* Initialize MMIO region */
> +    memory_region_init_io(&psi->regs_mr, OBJECT(dev), &pnv_psi_p9_mmio_ops, psi,
> +                          "psihb", PNV9_PSIHB_SIZE);
> +
> +    /* Default BAR for MMIO region */
> +    pnv_psi_set_bar(psi, psi->bar | PSIHB_BAR_EN);
> +}
> +
> +static void pnv_psi_power9_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
> +
> +    dc->desc    = "PowerNV PSI Controller POWER9";
> +    dc->realize = pnv_psi_power9_realize;
> +
> +    ppc->chip_type  = PNV_CHIP_POWER9;
> +    ppc->xscom_pcba = PNV9_XSCOM_PSIHB_BASE;
> +    ppc->xscom_size = PNV9_XSCOM_PSIHB_SIZE;
> +    ppc->irq_set    = pnv_psi_power9_irq_set;
> +
> +    xfc->notify      = pnv_psi_notify;
> +}
> +
> +static const TypeInfo pnv_psi_power9_info = {
> +    .name          = TYPE_PNV_PSI_POWER9,
> +    .parent        = TYPE_PNV_PSI,
> +    .instance_init = pnv_psi_power9_instance_init,
> +    .class_init    = pnv_psi_power9_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +            { TYPE_XIVE_FABRIC },
> +            { },
> +    },
> +};
> +
>  static void pnv_psi_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -548,16 +880,18 @@ static void pnv_psi_class_init(ObjectClass *klass, void *data)
>  
>      xdc->dt_xscom = pnv_psi_dt_xscom;
>  
> -    dc->realize = pnv_psi_realize;
> +    dc->desc = "PowerNV PSI Controller";
>      dc->props = pnv_psi_properties;
> +    dc->reset  = pnv_psi_reset;
>  }
>  
>  static const TypeInfo pnv_psi_info = {
>      .name          = TYPE_PNV_PSI,
>      .parent        = TYPE_SYS_BUS_DEVICE,
>      .instance_size = sizeof(PnvPsi),
> -    .instance_init = pnv_psi_init,
>      .class_init    = pnv_psi_class_init,
> +    .class_size    = sizeof(PnvPsiClass),
> +    .abstract      = true,
>      .interfaces    = (InterfaceInfo[]) {
>          { TYPE_PNV_XSCOM_INTERFACE },
>          { }
> @@ -567,6 +901,18 @@ static const TypeInfo pnv_psi_info = {
>  static void pnv_psi_register_types(void)
>  {
>      type_register_static(&pnv_psi_info);
> +    type_register_static(&pnv_psi_power8_info);
> +    type_register_static(&pnv_psi_power9_info);
>  }
>  
>  type_init(pnv_psi_register_types)
> +
> +void pnv_psi_pic_print_info(PnvPsi *psi, Monitor *mon)
> +{
> +    uint32_t offset =
> +        (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
> +
> +    monitor_printf(mon, "PSIHB Source %08x .. %08x\n",
> +                  offset, offset + psi->source.nr_irqs - 1);
> +    xive_source_pic_print_info(&psi->source, offset, mon);
> +}
Cédric Le Goater Nov. 23, 2018, 8:06 a.m. UTC | #9
On 11/23/18 4:50 AM, David Gibson wrote:
> On Thu, Nov 22, 2018 at 08:53:00AM +0100, Cédric Le Goater wrote:
>> On 11/22/18 5:11 AM, David Gibson wrote:
>>> On Fri, Nov 16, 2018 at 11:56:57AM +0100, Cédric Le Goater wrote:
>>>> The XiveRouter models the second sub-engine of the overall XIVE
>>>> architecture : the Interrupt Virtualization Routing Engine (IVRE).
>>>>
>>>> The IVRE handles event notifications of the IVSE through MMIO stores
>>>> and performs the interrupt routing process. For this purpose, it uses
>>>> a set of table stored in system memory, the first of which being the
>>>> Event Assignment Structure (EAS) table.
>>>>
>>>> The EAT associates an interrupt source number with an Event Notification
>>>> Descriptor (END) which will be used in a second phase of the routing
>>>> process to identify a Notification Virtual Target.
>>>>
>>>> The XiveRouter is an abstract class which needs to be inherited from
>>>> to define a storage for the EAT, and other upcoming tables. The
>>>> 'chip-id' atttribute is not strictly necessary for the sPAPR and
>>>> PowerNV machines but it's a good way to test the routing algorithm.
>>>> Without this atttribute, the XiveRouter could be a simple QOM
>>>> interface.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  include/hw/ppc/xive.h      | 32 ++++++++++++++
>>>>  include/hw/ppc/xive_regs.h | 31 ++++++++++++++
>>>>  hw/intc/xive.c             | 86 ++++++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 149 insertions(+)
>>>>  create mode 100644 include/hw/ppc/xive_regs.h
>>>>
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> index be93fae6317b..5a0696366577 100644
>>>> --- a/include/hw/ppc/xive.h
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -11,6 +11,7 @@
>>>>  #define PPC_XIVE_H
>>>>  
>>>>  #include "hw/sysbus.h"
>>>
>>> Again, I don't think making this a SysBusDevice is quite right.
>>> Even more so for the router than the source, because at least for PAPR
>>> it might not have any MMIO presence at all.
>>
>> The controller model inherits from the XiveRouter and manages the
>> TIMA.
> 
> Um.. I'm not sure what you mean by the "controller model".  Surely the
> presenter should own the TIMA, not the router?

The XIVE VC (routing) and PC (presente) subengines are merged under the 
controller model sPAPRXIVE and the PC matching algo looking for a target 
is reduced to one function. We don't want to model the exchanges on the
PowerBUS.

The TIMA MMIO region exposing the thread interrupt context is managed by 
the controller sPAPRXIVE (easier for KVM) and the interrupt context 
registers are under model XiveTCXT. This is our XICS/ICP equivalent 
in XIVE.

>>>> +#include "hw/ppc/xive_regs.h"
>>>>  
>>>>  /*
>>>>   * XIVE Fabric (Interface between Source and Router)
>>>> @@ -168,4 +169,35 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
>>>>      }
>>>>  }
>>>>  
>>>> +/*
>>>> + * XIVE Router
>>>> + */
>>>> +
>>>> +typedef struct XiveRouter {
>>>> +    SysBusDevice    parent;
>>>> +
>>>> +    uint32_t        chip_id;
>>>
>>> I don't think this belongs in the base class.  The PowerNV specific
>>> variants will need it, but it doesn't make sense for the PAPR version.
>>
>> yeah. I am using it as a END and NVT block identifier but it's not 
>> required for sPAPR, it could just be zero. 
>>
>> It was good to test the routing algo which should not assume that the 
>> block id is zero. 
>>  
>>>
>>>> +} XiveRouter;
>>>> +
>>>> +#define TYPE_XIVE_ROUTER "xive-router"
>>>> +#define XIVE_ROUTER(obj)                                \
>>>> +    OBJECT_CHECK(XiveRouter, (obj), TYPE_XIVE_ROUTER)
>>>> +#define XIVE_ROUTER_CLASS(klass)                                        \
>>>> +    OBJECT_CLASS_CHECK(XiveRouterClass, (klass), TYPE_XIVE_ROUTER)
>>>> +#define XIVE_ROUTER_GET_CLASS(obj)                              \
>>>> +    OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
>>>> +
>>>> +typedef struct XiveRouterClass {
>>>> +    SysBusDeviceClass parent;
>>>> +
>>>> +    /* XIVE table accessors */
>>>> +    int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>>> +    int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>>> +} XiveRouterClass;
>>>> +
>>>> +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
>>>> +
>>>> +int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>>> +int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>>> +
>>>>  #endif /* PPC_XIVE_H */
>>>> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
>>>> new file mode 100644
>>>> index 000000000000..12499b33614c
>>>> --- /dev/null
>>>> +++ b/include/hw/ppc/xive_regs.h
>>>> @@ -0,0 +1,31 @@
>>>> +/*
>>>> + * QEMU PowerPC XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2016-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#ifndef PPC_XIVE_REGS_H
>>>> +#define PPC_XIVE_REGS_H
>>>> +
>>>> +/* EAS (Event Assignment Structure)
>>>> + *
>>>> + * One per interrupt source. Targets an interrupt to a given Event
>>>> + * Notification Descriptor (END) and provides the corresponding
>>>> + * logical interrupt number (END data)
>>>> + */
>>>> +typedef struct XiveEAS {
>>>> +        /* Use a single 64-bit definition to make it easier to
>>>> +         * perform atomic updates
>>>> +         */
>>>> +        uint64_t        w;
>>>> +#define EAS_VALID       PPC_BIT(0)
>>>> +#define EAS_END_BLOCK   PPC_BITMASK(4, 7)        /* Destination END block# */
>>>> +#define EAS_END_INDEX   PPC_BITMASK(8, 31)       /* Destination END index */
>>>> +#define EAS_MASKED      PPC_BIT(32)              /* Masked */
>>>> +#define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
>>>> +} XiveEAS;
>>>> +
>>>> +#endif /* PPC_XIVE_REGS_H */
>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>> index 014a2e41f71f..c4c90a25758e 100644
>>>> --- a/hw/intc/xive.c
>>>> +++ b/hw/intc/xive.c
>>>> @@ -442,6 +442,91 @@ static const TypeInfo xive_source_info = {
>>>>      .class_init    = xive_source_class_init,
>>>>  };
>>>>  
>>>> +/*
>>>> + * XIVE Router (aka. Virtualization Controller or IVRE)
>>>> + */
>>>> +
>>>> +int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
>>>> +{
>>>> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>>>> +
>>>> +    return xrc->get_eas(xrtr, lisn, eas);
>>>> +}
>>>> +
>>>> +int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
>>>> +{
>>>> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>>>> +
>>>> +    return xrc->set_eas(xrtr, lisn, eas);
>>>> +}
>>>> +
>>>> +static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>>>> +{
>>>> +    XiveRouter *xrtr = XIVE_ROUTER(xf);
>>>> +    XiveEAS eas;
>>>> +
>>>> +    /* EAS cache lookup */
>>>> +    if (xive_router_get_eas(xrtr, lisn, &eas)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
>>>> +        return;
>>>> +    }
>>>
>>> AFAICT a bad LISN here means a qemu error (in the source, probably),
>>> not a user or guest error, so an assert() would be more appropriate.
>>
>> hmm, I would say no because in the case of PowerNV, the firmware could
>> have badly configured the ISN offset of a source which would notify the 
>> router with a bad notification event data.
> 
> Ah, good point.  That's fine as it is then.
> 
>>>> +
>>>> +    /* The IVRE has a State Bit Cache for its internal sources which
>>>> +     * is also involed at this point. We skip the SBC lookup because
>>>> +     * the state bits of the sources are modeled internally in QEMU.
>>>> +     */
>>>> +
>>>> +    if (!(eas.w & EAS_VALID)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (eas.w & EAS_MASKED) {
>>>> +        /* Notification completed */
>>>> +        return;
>>>> +    }
>>>> +}
>>>> +
>>>> +static Property xive_router_properties[] = {
>>>> +    DEFINE_PROP_UINT32("chip-id", XiveRouter, chip_id, 0),
>>>> +    DEFINE_PROP_END_OF_LIST(),
>>>> +};
>>>> +
>>>> +static void xive_router_class_init(ObjectClass *klass, void *data)
>>>> +{
>>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
>>>> +
>>>> +    dc->desc    = "XIVE Router Engine";
>>>> +    dc->props   = xive_router_properties;
>>>> +    xfc->notify = xive_router_notify;
>>>> +}
>>>> +
>>>> +static const TypeInfo xive_router_info = {
>>>> +    .name          = TYPE_XIVE_ROUTER,
>>>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>>>> +    .abstract      = true,
>>>> +    .class_size    = sizeof(XiveRouterClass),
>>>> +    .class_init    = xive_router_class_init,
>>>> +    .interfaces    = (InterfaceInfo[]) {
>>>> +        { TYPE_XIVE_FABRIC },
>>>
>>> So as far as I can see so far, the XiveFabric interface will
>>> essentially have to be implemented on the router object, so I'm not
>>> seeing much point to having the interface rather than just a direct
>>> call on the router object.  But I haven't read the whole series yet,
>>> so maybe I'm missing something.
>>
>> The PSIHB and PHB4 models are using it but there are not in the series.
>>
>> I can send the PSIHB patch in the next version if you like, it's the 
>> patch right after PnvXive. It's attached below for the moment. Look at 
>> pnv_psi_notify().
> 
> Hrm, I see.  This seems like a really convoluted way of achieving what
> you need here.  We want to abstract exactly how the source delivers
> notifies, 

on sPAPR, I agree that the forwarding of event notification could be a 
simple XiveRouter call but the XiveRouter covers both machines :/

On PowerNV, HW uses MMIOs to forward events and only the device knows 
about the IRQ number offset in the global IRQ number space and the 
notification port to use for the MMIO store. A PowerNV XIVE source 
would forward the event notification to a piece of logic which sends 
a PowerBUS event notification message. How it reaches the XIVE IC is
beyong QEMU as it would means modeling the PowerBUS. 

> but doing it with an interface on some object that's not necessarily
> either the source or the router seems odd.  
There is no direct link between the device owing the source and the 
XIVE controller, they could be on the same Power chip but the routing 
could be done by some other chips. This scenario is covered btw.

See it as a connector object.

> At the very least the names need to change (of both interface and > property for the target object).

I am fine with renaming it. With the above explanations, if they are 
clear enough, how do see them ?

Thanks,

C. 

> 
>>
>> Thanks,
>>
>> C.
>>
>>>> +        { }
>>>> +    }
>>>> +};
>>>> +
>>>> +void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
>>>> +{
>>>> +    if (!(eas->w & EAS_VALID)) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
>>>> +                   lisn, eas->w & EAS_MASKED ? "M" : " ",
>>>> +                   (uint8_t)  GETFIELD(EAS_END_BLOCK, eas->w),
>>>> +                   (uint32_t) GETFIELD(EAS_END_INDEX, eas->w),
>>>> +                   (uint32_t) GETFIELD(EAS_END_DATA, eas->w));
>>>> +}
>>>> +
>>>>  /*
>>>>   * XIVE Fabric
>>>>   */
>>>> @@ -455,6 +540,7 @@ static void xive_register_types(void)
>>>>  {
>>>>      type_register_static(&xive_source_info);
>>>>      type_register_static(&xive_fabric_info);
>>>> +    type_register_static(&xive_router_info);
>>>>  }
>>>>  
>>>>  type_init(xive_register_types)
>>>
>>
> 
>> >From 680fd6ff7c99e669708fbc5cfdbfcd95e83e7c07 Mon Sep 17 00:00:00 2001
>> From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
>> Date: Wed, 21 Nov 2018 10:29:45 +0100
>> Subject: [PATCH] ppc/pnv: add a PSI bridge model for POWER9 processor
>> MIME-Version: 1.0
>> Content-Type: text/plain; charset=UTF-8
>> Content-Transfer-Encoding: 8bit
>>
>> The PSI bridge on POWER9 is very similar to POWER8. The BAR is still
>> set through XSCOM but the controls are now entirely done with MMIOs.
>> More interrupts are defined and the interrupt controller interface has
>> changed to XIVE. The POWER9 model is a first example of the usage of
>> the notify() handler of the XiveFabric interface, linking the PSI
>> XiveSource to its owning device model.
>>
>> Signed-off-by: C??dric Le Goater <clg@kaod.org>
>> ---
>>  include/hw/ppc/pnv.h       |   6 +
>>  include/hw/ppc/pnv_psi.h   |  50 ++++-
>>  include/hw/ppc/pnv_xscom.h |   3 +
>>  hw/ppc/pnv.c               |  20 +-
>>  hw/ppc/pnv_psi.c           | 390 ++++++++++++++++++++++++++++++++++---
>>  5 files changed, 444 insertions(+), 25 deletions(-)
>>
>> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
>> index c402e5d5844b..8be1147481f9 100644
>> --- a/include/hw/ppc/pnv.h
>> +++ b/include/hw/ppc/pnv.h
>> @@ -88,6 +88,7 @@ typedef struct Pnv9Chip {
>>  
>>      /*< public >*/
>>      PnvXive      xive;
>> +    PnvPsi       psi;
>>  } Pnv9Chip;
>>  
>>  typedef struct PnvChipClass {
>> @@ -250,11 +251,16 @@ void pnv_bmc_powerdown(IPMIBmc *bmc);
>>  #define PNV9_XIVE_PC_SIZE            0x0000001000000000ull
>>  #define PNV9_XIVE_PC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006018000000000ull)
>>  
>> +#define PNV9_PSIHB_SIZE              0x0000000000100000ull
>> +#define PNV9_PSIHB_BASE(chip)        PNV9_CHIP_BASE(chip, 0x0006030203000000ull)
>> +
>>  #define PNV9_XIVE_IC_SIZE            0x0000000000080000ull
>>  #define PNV9_XIVE_IC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006030203100000ull)
>>  
>>  #define PNV9_XIVE_TM_SIZE            0x0000000000040000ull
>>  #define PNV9_XIVE_TM_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006030203180000ull)
>>  
>> +#define PNV9_PSIHB_ESB_SIZE          0x0000000000010000ull
>> +#define PNV9_PSIHB_ESB_BASE(chip)    PNV9_CHIP_BASE(chip, 0x00060302031c0000ull)
>>  
>>  #endif /* _PPC_PNV_H */
>> diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h
>> index f6af5eae1fa8..b8f8d082bcf9 100644
>> --- a/include/hw/ppc/pnv_psi.h
>> +++ b/include/hw/ppc/pnv_psi.h
>> @@ -21,10 +21,35 @@
>>  
>>  #include "hw/sysbus.h"
>>  #include "hw/ppc/xics.h"
>> +#include "hw/ppc/xive.h"
>>  
>>  #define TYPE_PNV_PSI "pnv-psi"
>>  #define PNV_PSI(obj) \
>>       OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI)
>> +#define PNV_PSI_CLASS(klass) \
>> +     OBJECT_CLASS_CHECK(PnvPsiClass, (klass), TYPE_PNV_PSI)
>> +#define PNV_PSI_GET_CLASS(obj) \
>> +     OBJECT_GET_CLASS(PnvPsiClass, (obj), TYPE_PNV_PSI)
>> +
>> +typedef struct PnvPsi PnvPsi;
>> +typedef struct PnvChip PnvChip;
>> +typedef struct PnvPsiClass {
>> +    SysBusDeviceClass parent_class;
>> +
>> +    int chip_type;
>> +    uint32_t xscom_pcba;
>> +    uint32_t xscom_size;
>> +
>> +    void (*irq_set)(PnvPsi *psi, int, bool state);
>> +} PnvPsiClass;
>> +
>> +#define TYPE_PNV_PSI_POWER8 TYPE_PNV_PSI "-POWER8"
>> +#define PNV_PSI_POWER8(obj) \
>> +    OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI_POWER8)
>> +
>> +#define TYPE_PNV_PSI_POWER9 TYPE_PNV_PSI "-POWER9"
>> +#define PNV_PSI_POWER9(obj) \
>> +    OBJECT_CHECK(PnvPsi, (obj), TYPE_PNV_PSI_POWER9)
>>  
>>  #define PSIHB_XSCOM_MAX         0x20
>>  
>> @@ -38,9 +63,12 @@ typedef struct PnvPsi {
>>      /* MemoryRegion fsp_mr; */
>>      uint64_t fsp_bar;
>>  
>> -    /* Interrupt generation */
>> +    /* P8 Interrupt generation */
>>      ICSState ics;
>>  
>> +    /* P9 Interrupt generation */
>> +    XiveSource source;
>> +
>>      /* Registers */
>>      uint64_t regs[PSIHB_XSCOM_MAX];
>>  
>> @@ -60,6 +88,24 @@ typedef enum PnvPsiIrq {
>>  
>>  #define PSI_NUM_INTERRUPTS 6
>>  
>> -extern void pnv_psi_irq_set(PnvPsi *psi, PnvPsiIrq irq, bool state);
>> +/* P9 PSI Interrupts */
>> +#define PSIHB9_IRQ_PSI          0
>> +#define PSIHB9_IRQ_OCC          1
>> +#define PSIHB9_IRQ_FSI          2
>> +#define PSIHB9_IRQ_LPCHC        3
>> +#define PSIHB9_IRQ_LOCAL_ERR    4
>> +#define PSIHB9_IRQ_GLOBAL_ERR   5
>> +#define PSIHB9_IRQ_TPM          6
>> +#define PSIHB9_IRQ_LPC_SIRQ0    7
>> +#define PSIHB9_IRQ_LPC_SIRQ1    8
>> +#define PSIHB9_IRQ_LPC_SIRQ2    9
>> +#define PSIHB9_IRQ_LPC_SIRQ3    10
>> +#define PSIHB9_IRQ_SBE_I2C      11
>> +#define PSIHB9_IRQ_DIO          12
>> +#define PSIHB9_IRQ_PSU          13
>> +#define PSIHB9_NUM_IRQS         14
>> +
>> +void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state);
>> +void pnv_psi_pic_print_info(PnvPsi *psi, Monitor *mon);
>>  
>>  #endif /* _PPC_PNV_PSI_H */
>> diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
>> index 5bd43467a1ab..019b45bf9189 100644
>> --- a/include/hw/ppc/pnv_xscom.h
>> +++ b/include/hw/ppc/pnv_xscom.h
>> @@ -82,6 +82,9 @@ typedef struct PnvXScomInterfaceClass {
>>  #define PNV_XSCOM_PBCQ_SPCI_BASE  0x9013c00
>>  #define PNV_XSCOM_PBCQ_SPCI_SIZE  0x5
>>  
>> +#define PNV9_XSCOM_PSIHB_BASE     0x5012900
>> +#define PNV9_XSCOM_PSIHB_SIZE     0x100
>> +
>>  #define PNV9_XSCOM_XIVE_BASE      0x5013000
>>  #define PNV9_XSCOM_XIVE_SIZE      0x300
>>  
>> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
>> index b6af896c30e4..e67c9d7d3995 100644
>> --- a/hw/ppc/pnv.c
>> +++ b/hw/ppc/pnv.c
>> @@ -743,7 +743,7 @@ static void pnv_chip_power8_instance_init(Object *obj)
>>      PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
>>      int i;
>>  
>> -    object_initialize(&chip8->psi, sizeof(chip8->psi), TYPE_PNV_PSI);
>> +    object_initialize(&chip8->psi, sizeof(chip8->psi), TYPE_PNV_PSI_POWER8);
>>      object_property_add_child(obj, "psi", OBJECT(&chip8->psi), NULL);
>>      object_property_add_const_link(OBJECT(&chip8->psi), "xics",
>>                                     OBJECT(qdev_get_machine()), &error_abort);
>> @@ -923,6 +923,11 @@ static void pnv_chip_power9_instance_init(Object *obj)
>>      object_property_add_child(obj, "xive", OBJECT(&chip9->xive), NULL);
>>      object_property_add_const_link(OBJECT(&chip9->xive), "chip", obj,
>>                                     &error_abort);
>> +
>> +    object_initialize(&chip9->psi, sizeof(chip9->psi), TYPE_PNV_PSI_POWER9);
>> +    object_property_add_child(obj, "psi", OBJECT(&chip9->psi), NULL);
>> +    object_property_add_const_link(OBJECT(&chip9->psi), "chip", obj,
>> +                                   &error_abort);
>>  }
>>  
>>  static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
>> @@ -955,6 +960,18 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
>>      qdev_set_parent_bus(DEVICE(&chip9->xive), sysbus_get_default());
>>      pnv_xscom_add_subregion(chip, PNV9_XSCOM_XIVE_BASE,
>>                              &chip9->xive.xscom_regs);
>> +
>> +    /* Processor Service Interface (PSI) Host Bridge */
>> +    object_property_set_int(OBJECT(&chip9->psi), PNV9_PSIHB_BASE(chip),
>> +                            "bar", &error_fatal);
>> +    object_property_set_bool(OBJECT(&chip9->psi), true, "realized", &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +    qdev_set_parent_bus(DEVICE(&chip9->psi), sysbus_get_default());
>> +    pnv_xscom_add_subregion(chip, PNV9_XSCOM_PSIHB_BASE,
>> +                            &chip9->psi.xscom_regs);
>>  }
>>  
>>  static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
>> @@ -1188,6 +1205,7 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj,
>>              Pnv9Chip *chip9 = PNV9_CHIP(chip);
>>  
>>               pnv_xive_pic_print_info(&chip9->xive, mon);
>> +             pnv_psi_pic_print_info(&chip9->psi, mon);
>>          } else {
>>              Pnv8Chip *chip8 = PNV8_CHIP(chip);
>>  
>> diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
>> index 5b969127c303..8b85dd9555e8 100644
>> --- a/hw/ppc/pnv_psi.c
>> +++ b/hw/ppc/pnv_psi.c
>> @@ -22,6 +22,7 @@
>>  #include "target/ppc/cpu.h"
>>  #include "qemu/log.h"
>>  #include "qapi/error.h"
>> +#include "monitor/monitor.h"
>>  
>>  #include "exec/address-spaces.h"
>>  
>> @@ -114,12 +115,14 @@
>>  #define PSIHB_BAR_MASK                  0x0003fffffff00000ull
>>  #define PSIHB_FSPBAR_MASK               0x0003ffff00000000ull
>>  
>> +#define PSIHB_REG(addr) (((addr) >> 3) + PSIHB_XSCOM_BAR)
>> +
>>  static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
>>  {
>>      MemoryRegion *sysmem = get_system_memory();
>>      uint64_t old = psi->regs[PSIHB_XSCOM_BAR];
>>  
>> -    psi->regs[PSIHB_XSCOM_BAR] = bar & (PSIHB_BAR_MASK | PSIHB_BAR_EN);
>> +    psi->regs[PSIHB_XSCOM_BAR] = bar;
>>  
>>      /* Update MR, always remove it first */
>>      if (old & PSIHB_BAR_EN) {
>> @@ -128,7 +131,7 @@ static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
>>  
>>      /* Then add it back if needed */
>>      if (bar & PSIHB_BAR_EN) {
>> -        uint64_t addr = bar & PSIHB_BAR_MASK;
>> +        uint64_t addr = bar & ~PSIHB_BAR_EN;
>>          memory_region_add_subregion(sysmem, addr, &psi->regs_mr);
>>      }
>>  }
>> @@ -205,7 +208,12 @@ static const uint64_t stat_bits[] = {
>>      [PSIHB_IRQ_EXTERNAL]  = PSIHB_IRQ_STAT_EXT,
>>  };
>>  
>> -void pnv_psi_irq_set(PnvPsi *psi, PnvPsiIrq irq, bool state)
>> +void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state)
>> +{
>> +    PNV_PSI_GET_CLASS(psi)->irq_set(psi, irq, state);
>> +}
>> +
>> +static void pnv_psi_power8_irq_set(PnvPsi *psi, int irq, bool state)
>>  {
>>      ICSState *ics = &psi->ics;
>>      uint32_t xivr_reg;
>> @@ -324,7 +332,7 @@ static uint64_t pnv_psi_reg_read(PnvPsi *psi, uint32_t offset, bool mmio)
>>          val = psi->regs[offset];
>>          break;
>>      default:
>> -        qemu_log_mask(LOG_UNIMP, "PSI: read at Ox%" PRIx32 "\n", offset);
>> +        qemu_log_mask(LOG_UNIMP, "PSI: read at 0x%" PRIx32 "\n", offset);
>>      }
>>      return val;
>>  }
>> @@ -383,7 +391,7 @@ static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
>>          pnv_psi_set_irsn(psi, val);
>>          break;
>>      default:
>> -        qemu_log_mask(LOG_UNIMP, "PSI: write at Ox%" PRIx32 "\n", offset);
>> +        qemu_log_mask(LOG_UNIMP, "PSI: write at 0x%" PRIx32 "\n", offset);
>>      }
>>  }
>>  
>> @@ -393,13 +401,13 @@ static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
>>   */
>>  static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size)
>>  {
>> -    return pnv_psi_reg_read(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, true);
>> +    return pnv_psi_reg_read(opaque, PSIHB_REG(addr), true);
>>  }
>>  
>>  static void pnv_psi_mmio_write(void *opaque, hwaddr addr,
>>                                uint64_t val, unsigned size)
>>  {
>> -    pnv_psi_reg_write(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, val, true);
>> +    pnv_psi_reg_write(opaque, PSIHB_REG(addr), val, true);
>>  }
>>  
>>  static const MemoryRegionOps psi_mmio_ops = {
>> @@ -441,7 +449,7 @@ static const MemoryRegionOps pnv_psi_xscom_ops = {
>>      }
>>  };
>>  
>> -static void pnv_psi_init(Object *obj)
>> +static void pnv_psi_power8_instance_init(Object *obj)
>>  {
>>      PnvPsi *psi = PNV_PSI(obj);
>>  
>> @@ -458,7 +466,7 @@ static const uint8_t irq_to_xivr[] = {
>>      PSIHB_XSCOM_XIVR_EXT,
>>  };
>>  
>> -static void pnv_psi_realize(DeviceState *dev, Error **errp)
>> +static void pnv_psi_power8_realize(DeviceState *dev, Error **errp)
>>  {
>>      PnvPsi *psi = PNV_PSI(dev);
>>      ICSState *ics = &psi->ics;
>> @@ -510,28 +518,34 @@ static void pnv_psi_realize(DeviceState *dev, Error **errp)
>>      }
>>  }
>>  
>> +static const char compat_p8[] = "ibm,power8-psihb-x\0ibm,psihb-x";
>> +static const char compat_p9[] = "ibm,power9-psihb-x\0ibm,psihb-x";
>> +
>>  static int pnv_psi_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset)
>>  {
>> -    const char compat[] = "ibm,power8-psihb-x\0ibm,psihb-x";
>> +    PnvPsiClass *ppc = PNV_PSI_GET_CLASS(dev);
>>      char *name;
>>      int offset;
>> -    uint32_t lpc_pcba = PNV_XSCOM_PSIHB_BASE;
>>      uint32_t reg[] = {
>> -        cpu_to_be32(lpc_pcba),
>> -        cpu_to_be32(PNV_XSCOM_PSIHB_SIZE)
>> +        cpu_to_be32(ppc->xscom_pcba),
>> +        cpu_to_be32(ppc->xscom_size)
>>      };
>>  
>> -    name = g_strdup_printf("psihb@%x", lpc_pcba);
>> +    name = g_strdup_printf("psihb@%x", ppc->xscom_pcba);
>>      offset = fdt_add_subnode(fdt, xscom_offset, name);
>>      _FDT(offset);
>>      g_free(name);
>>  
>> -    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
>> -
>> -    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
>> -    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
>> -    _FDT((fdt_setprop(fdt, offset, "compatible", compat,
>> -                      sizeof(compat))));
>> +    _FDT(fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)));
>> +    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", 2));
>> +    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", 1));
>> +    if (ppc->chip_type == PNV_CHIP_POWER9) {
>> +        _FDT(fdt_setprop(fdt, offset, "compatible", compat_p9,
>> +                         sizeof(compat_p9)));
>> +    } else {
>> +        _FDT(fdt_setprop(fdt, offset, "compatible", compat_p8,
>> +                         sizeof(compat_p8)));
>> +    }
>>      return 0;
>>  }
>>  
>> @@ -541,6 +555,324 @@ static Property pnv_psi_properties[] = {
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>  
>> +static void pnv_psi_power8_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
>> +
>> +    dc->desc    = "PowerNV PSI Controller POWER8";
>> +    dc->realize = pnv_psi_power8_realize;
>> +
>> +    ppc->chip_type =  PNV_CHIP_POWER8;
>> +    ppc->xscom_pcba = PNV_XSCOM_PSIHB_BASE;
>> +    ppc->xscom_size = PNV_XSCOM_PSIHB_SIZE;
>> +    ppc->irq_set    = pnv_psi_power8_irq_set;
>> +}
>> +
>> +static const TypeInfo pnv_psi_power8_info = {
>> +    .name          = TYPE_PNV_PSI_POWER8,
>> +    .parent        = TYPE_PNV_PSI,
>> +    .instance_init = pnv_psi_power8_instance_init,
>> +    .class_init    = pnv_psi_power8_class_init,
>> +};
>> +
>> +/* Common registers */
>> +
>> +#define PSIHB9_CR                       0x20
>> +#define PSIHB9_SEMR                     0x28
>> +
>> +/* P9 registers */
>> +
>> +#define PSIHB9_INTERRUPT_CONTROL        0x58
>> +#define   PSIHB9_IRQ_METHOD             PPC_BIT(0)
>> +#define   PSIHB9_IRQ_RESET              PPC_BIT(1)
>> +#define PSIHB9_ESB_CI_BASE              0x60
>> +#define   PSIHB9_ESB_CI_VALID           1
>> +#define PSIHB9_ESB_NOTIF_ADDR           0x68
>> +#define   PSIHB9_ESB_NOTIF_VALID        1
>> +#define PSIHB9_IVT_OFFSET               0x70
>> +#define   PSIHB9_IVT_OFF_SHIFT          32
>> +
>> +#define PSIHB9_IRQ_LEVEL                0x78 /* assertion */
>> +#define   PSIHB9_IRQ_LEVEL_PSI          PPC_BIT(0)
>> +#define   PSIHB9_IRQ_LEVEL_OCC          PPC_BIT(1)
>> +#define   PSIHB9_IRQ_LEVEL_FSI          PPC_BIT(2)
>> +#define   PSIHB9_IRQ_LEVEL_LPCHC        PPC_BIT(3)
>> +#define   PSIHB9_IRQ_LEVEL_LOCAL_ERR    PPC_BIT(4)
>> +#define   PSIHB9_IRQ_LEVEL_GLOBAL_ERR   PPC_BIT(5)
>> +#define   PSIHB9_IRQ_LEVEL_TPM          PPC_BIT(6)
>> +#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ1    PPC_BIT(7)
>> +#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ2    PPC_BIT(8)
>> +#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ3    PPC_BIT(9)
>> +#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ4    PPC_BIT(10)
>> +#define   PSIHB9_IRQ_LEVEL_SBE_I2C      PPC_BIT(11)
>> +#define   PSIHB9_IRQ_LEVEL_DIO          PPC_BIT(12)
>> +#define   PSIHB9_IRQ_LEVEL_PSU          PPC_BIT(13)
>> +#define   PSIHB9_IRQ_LEVEL_I2C_C        PPC_BIT(14)
>> +#define   PSIHB9_IRQ_LEVEL_I2C_D        PPC_BIT(15)
>> +#define   PSIHB9_IRQ_LEVEL_I2C_E        PPC_BIT(16)
>> +#define   PSIHB9_IRQ_LEVEL_SBE          PPC_BIT(19)
>> +
>> +#define PSIHB9_IRQ_STAT                 0x80 /* P bit */
>> +#define   PSIHB9_IRQ_STAT_PSI           PPC_BIT(0)
>> +#define   PSIHB9_IRQ_STAT_OCC           PPC_BIT(1)
>> +#define   PSIHB9_IRQ_STAT_FSI           PPC_BIT(2)
>> +#define   PSIHB9_IRQ_STAT_LPCHC         PPC_BIT(3)
>> +#define   PSIHB9_IRQ_STAT_LOCAL_ERR     PPC_BIT(4)
>> +#define   PSIHB9_IRQ_STAT_GLOBAL_ERR    PPC_BIT(5)
>> +#define   PSIHB9_IRQ_STAT_TPM           PPC_BIT(6)
>> +#define   PSIHB9_IRQ_STAT_LPC_SIRQ1     PPC_BIT(7)
>> +#define   PSIHB9_IRQ_STAT_LPC_SIRQ2     PPC_BIT(8)
>> +#define   PSIHB9_IRQ_STAT_LPC_SIRQ3     PPC_BIT(9)
>> +#define   PSIHB9_IRQ_STAT_LPC_SIRQ4     PPC_BIT(10)
>> +#define   PSIHB9_IRQ_STAT_SBE_I2C       PPC_BIT(11)
>> +#define   PSIHB9_IRQ_STAT_DIO           PPC_BIT(12)
>> +#define   PSIHB9_IRQ_STAT_PSU           PPC_BIT(13)
>> +
>> +static void pnv_psi_notify(XiveFabric *xf, uint32_t srcno)
>> +{
>> +    PnvPsi *psi = PNV_PSI(xf);
>> +    uint64_t notif_port = psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
>> +    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
>> +    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
>> +
>> +    uint32_t offset =
>> +        (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
>> +    uint64_t lisn = cpu_to_be64(offset + srcno);
>> +
>> +    if (valid) {
>> +        cpu_physical_memory_write(notify_addr, &lisn, sizeof(lisn));
>> +    }
>> +}
>> +
>> +/*
>> + * TODO : move to parent class
>> + */
>> +static void pnv_psi_reset(DeviceState *dev)
>> +{
>> +    PnvPsi *psi = PNV_PSI(dev);
>> +
>> +    memset(psi->regs, 0x0, sizeof(psi->regs));
>> +
>> +    psi->regs[PSIHB_XSCOM_BAR] = psi->bar | PSIHB_BAR_EN;
>> +}
>> +
>> +static uint64_t pnv_psi_p9_mmio_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> +    PnvPsi *psi = PNV_PSI(opaque);
>> +    uint32_t reg = PSIHB_REG(addr);
>> +    uint64_t val = -1;
>> +
>> +    switch (addr) {
>> +    case PSIHB9_CR:
>> +    case PSIHB9_SEMR:
>> +        /* FSP stuff */
>> +    case PSIHB9_INTERRUPT_CONTROL:
>> +    case PSIHB9_ESB_CI_BASE:
>> +    case PSIHB9_ESB_NOTIF_ADDR:
>> +    case PSIHB9_IVT_OFFSET:
>> +        val = psi->regs[reg];
>> +        break;
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: read at 0x%" PRIx64 "\n", addr);
>> +    }
>> +
>> +    return val;
>> +}
>> +
>> +static void pnv_psi_p9_mmio_write(void *opaque, hwaddr addr,
>> +                                  uint64_t val, unsigned size)
>> +{
>> +    PnvPsi *psi = PNV_PSI(opaque);
>> +    uint32_t reg = PSIHB_REG(addr);
>> +    MemoryRegion *sysmem = get_system_memory();
>> +
>> +    switch (addr) {
>> +    case PSIHB9_CR:
>> +    case PSIHB9_SEMR:
>> +        /* FSP stuff */
>> +        break;
>> +    case PSIHB9_INTERRUPT_CONTROL:
>> +        if (val & PSIHB9_IRQ_RESET) {
>> +            device_reset(DEVICE(&psi->source));
>> +        }
>> +        psi->regs[reg] = val;
>> +        break;
>> +
>> +    case PSIHB9_ESB_CI_BASE:
>> +        if (!(val & PSIHB9_ESB_CI_VALID)) {
>> +            if (psi->regs[reg] & PSIHB9_ESB_CI_VALID) {
>> +                memory_region_del_subregion(sysmem, &psi->source.esb_mmio);
>> +            }
>> +        } else {
>> +            if (!(psi->regs[reg] & PSIHB9_ESB_CI_VALID)) {
>> +                memory_region_add_subregion(sysmem,
>> +                                        val & ~PSIHB9_ESB_CI_VALID,
>> +                                        &psi->source.esb_mmio);
>> +            }
>> +        }
>> +        psi->regs[reg] = val;
>> +        break;
>> +
>> +    case PSIHB9_ESB_NOTIF_ADDR:
>> +        psi->regs[reg] = val;
>> +        break;
>> +    case PSIHB9_IVT_OFFSET:
>> +        psi->regs[reg] = val;
>> +        break;
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: write at 0x%" PRIx64 "\n", addr);
>> +    }
>> +}
>> +
>> +static const MemoryRegionOps pnv_psi_p9_mmio_ops = {
>> +    .read = pnv_psi_p9_mmio_read,
>> +    .write = pnv_psi_p9_mmio_write,
>> +    .endianness = DEVICE_BIG_ENDIAN,
>> +    .valid = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +    .impl = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +};
>> +
>> +static uint64_t pnv_psi_p9_xscom_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> +    /* No read are expected */
>> +    qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom read at 0x%" PRIx64 "\n", addr);
>> +    return -1;
>> +}
>> +
>> +static void pnv_psi_p9_xscom_write(void *opaque, hwaddr addr,
>> +                                uint64_t val, unsigned size)
>> +{
>> +    PnvPsi *psi = PNV_PSI(opaque);
>> +
>> +    /* XSCOM is only used to set the PSIHB MMIO region */
>> +    switch (addr >> 3) {
>> +    case PSIHB_XSCOM_BAR:
>> +        pnv_psi_set_bar(psi, val);
>> +        break;
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom write at 0x%" PRIx64 "\n",
>> +                      addr);
>> +    }
>> +}
>> +
>> +static const MemoryRegionOps pnv_psi_p9_xscom_ops = {
>> +    .read = pnv_psi_p9_xscom_read,
>> +    .write = pnv_psi_p9_xscom_write,
>> +    .endianness = DEVICE_BIG_ENDIAN,
>> +    .valid = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +    .impl = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    }
>> +};
>> +
>> +static void pnv_psi_power9_irq_set(PnvPsi *psi, int irq, bool state)
>> +{
>> +    uint32_t irq_method = psi->regs[PSIHB_REG(PSIHB9_INTERRUPT_CONTROL)];
>> +
>> +    if (irq > PSIHB9_NUM_IRQS) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq);
>> +        return;
>> +    }
>> +
>> +    if (irq_method & PSIHB9_IRQ_METHOD) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "PSI: LSI IRQ method no supported\n");
>> +        return;
>> +    }
>> +
>> +    /* Update LSI levels */
>> +    if (state) {
>> +        psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] |= PPC_BIT(irq);
>> +    } else {
>> +        psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] &= ~PPC_BIT(irq);
>> +    }
>> +
>> +    qemu_set_irq(xive_source_qirq(&psi->source, irq), state);
>> +}
>> +
>> +static void pnv_psi_power9_instance_init(Object *obj)
>> +{
>> +    PnvPsi *psi = PNV_PSI(obj);
>> +
>> +    object_initialize(&psi->source, sizeof(psi->source), TYPE_XIVE_SOURCE);
>> +    object_property_add_child(obj, "source", OBJECT(&psi->source), NULL);
>> +}
>> +
>> +static void pnv_psi_power9_realize(DeviceState *dev, Error **errp)
>> +{
>> +    PnvPsi *psi = PNV_PSI(dev);
>> +    XiveSource *xsrc = &psi->source;
>> +    Error *local_err = NULL;
>> +    int i;
>> +
>> +    /* This is the only device with 4k ESB pages */
>> +    object_property_set_int(OBJECT(xsrc), XIVE_ESB_4K, "shift",
>> +                            &error_fatal);
>> +    object_property_set_int(OBJECT(xsrc), PSIHB9_NUM_IRQS, "nr-irqs",
>> +                            &error_fatal);
>> +    object_property_add_const_link(OBJECT(xsrc), "xive", OBJECT(psi),
>> +                                   &error_fatal);
>> +    object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +    qdev_set_parent_bus(DEVICE(xsrc), sysbus_get_default());
>> +
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        xive_source_irq_set(xsrc, i, true);
>> +    }
>> +
>> +    /* XSCOM region for PSI registers */
>> +    pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_p9_xscom_ops,
>> +                psi, "xscom-psi", PNV9_XSCOM_PSIHB_SIZE);
>> +
>> +    /* Initialize MMIO region */
>> +    memory_region_init_io(&psi->regs_mr, OBJECT(dev), &pnv_psi_p9_mmio_ops, psi,
>> +                          "psihb", PNV9_PSIHB_SIZE);
>> +
>> +    /* Default BAR for MMIO region */
>> +    pnv_psi_set_bar(psi, psi->bar | PSIHB_BAR_EN);
>> +}
>> +
>> +static void pnv_psi_power9_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
>> +    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
>> +
>> +    dc->desc    = "PowerNV PSI Controller POWER9";
>> +    dc->realize = pnv_psi_power9_realize;
>> +
>> +    ppc->chip_type  = PNV_CHIP_POWER9;
>> +    ppc->xscom_pcba = PNV9_XSCOM_PSIHB_BASE;
>> +    ppc->xscom_size = PNV9_XSCOM_PSIHB_SIZE;
>> +    ppc->irq_set    = pnv_psi_power9_irq_set;
>> +
>> +    xfc->notify      = pnv_psi_notify;
>> +}
>> +
>> +static const TypeInfo pnv_psi_power9_info = {
>> +    .name          = TYPE_PNV_PSI_POWER9,
>> +    .parent        = TYPE_PNV_PSI,
>> +    .instance_init = pnv_psi_power9_instance_init,
>> +    .class_init    = pnv_psi_power9_class_init,
>> +    .interfaces = (InterfaceInfo[]) {
>> +            { TYPE_XIVE_FABRIC },
>> +            { },
>> +    },
>> +};
>> +
>>  static void pnv_psi_class_init(ObjectClass *klass, void *data)
>>  {
>>      DeviceClass *dc = DEVICE_CLASS(klass);
>> @@ -548,16 +880,18 @@ static void pnv_psi_class_init(ObjectClass *klass, void *data)
>>  
>>      xdc->dt_xscom = pnv_psi_dt_xscom;
>>  
>> -    dc->realize = pnv_psi_realize;
>> +    dc->desc = "PowerNV PSI Controller";
>>      dc->props = pnv_psi_properties;
>> +    dc->reset  = pnv_psi_reset;
>>  }
>>  
>>  static const TypeInfo pnv_psi_info = {
>>      .name          = TYPE_PNV_PSI,
>>      .parent        = TYPE_SYS_BUS_DEVICE,
>>      .instance_size = sizeof(PnvPsi),
>> -    .instance_init = pnv_psi_init,
>>      .class_init    = pnv_psi_class_init,
>> +    .class_size    = sizeof(PnvPsiClass),
>> +    .abstract      = true,
>>      .interfaces    = (InterfaceInfo[]) {
>>          { TYPE_PNV_XSCOM_INTERFACE },
>>          { }
>> @@ -567,6 +901,18 @@ static const TypeInfo pnv_psi_info = {
>>  static void pnv_psi_register_types(void)
>>  {
>>      type_register_static(&pnv_psi_info);
>> +    type_register_static(&pnv_psi_power8_info);
>> +    type_register_static(&pnv_psi_power9_info);
>>  }
>>  
>>  type_init(pnv_psi_register_types)
>> +
>> +void pnv_psi_pic_print_info(PnvPsi *psi, Monitor *mon)
>> +{
>> +    uint32_t offset =
>> +        (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
>> +
>> +    monitor_printf(mon, "PSIHB Source %08x .. %08x\n",
>> +                  offset, offset + psi->source.nr_irqs - 1);
>> +    xive_source_pic_print_info(&psi->source, offset, mon);
>> +}
> 
>
Cédric Le Goater Nov. 23, 2018, 10:28 a.m. UTC | #10
On 11/23/18 2:10 AM, David Gibson wrote:
> On Thu, Nov 22, 2018 at 05:50:07PM +1100, Benjamin Herrenschmidt wrote:
>> On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
>>>
>>> Sorry, didn't think of this in my first reply.
>>>
>>> 1) Does the hardware ever actually write back to the EAS?  I know it
>>> does for the END, but it's not clear why it would need to for the
>>> EAS.  If not, we don't need the setter.
>>
>> Nope, though the PAPR model will via hcalls
> 
> Right, bit AIUI the set_eas hook is about abstracting PAPR vs bare
> metal details.  Since the hcall knows it's PAPR it can just update the
> backing information for the EAS directly, and no need for an
> abstracted hook.

Indeed, the first versions of the XIVE patchset did not use such hooks, 
but when discussed we said we wanted abstract methods for the router 
to validate the overall XIVE model, which is useful for PowerNV. 

We can change again and have the hcalls get/set directly in the EAT
and ENDT. It would certainly simplify the sPAPR model. 

C.


> 
>>
>>>
>>> 2) The signatures are a bit odd here.  For the setter, a value would
>>> make sense than a (XiveEAS *), since it's just a word.  For the getter
>>> you could return the EAS value directly rather than using a pointer -
>>> there's already a valid bit in the EAS so you can construct a value
>>> with that cleared if the lisn is out of bounds.
>>
>
David Gibson Nov. 26, 2018, 5:44 a.m. UTC | #11
On Fri, Nov 23, 2018 at 11:28:24AM +0100, Cédric Le Goater wrote:
> On 11/23/18 2:10 AM, David Gibson wrote:
> > On Thu, Nov 22, 2018 at 05:50:07PM +1100, Benjamin Herrenschmidt wrote:
> >> On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
> >>>
> >>> Sorry, didn't think of this in my first reply.
> >>>
> >>> 1) Does the hardware ever actually write back to the EAS?  I know it
> >>> does for the END, but it's not clear why it would need to for the
> >>> EAS.  If not, we don't need the setter.
> >>
> >> Nope, though the PAPR model will via hcalls
> > 
> > Right, bit AIUI the set_eas hook is about abstracting PAPR vs bare
> > metal details.  Since the hcall knows it's PAPR it can just update the
> > backing information for the EAS directly, and no need for an
> > abstracted hook.
> 
> Indeed, the first versions of the XIVE patchset did not use such hooks, 
> but when discussed we said we wanted abstract methods for the router 
> to validate the overall XIVE model, which is useful for PowerNV. 
> 
> We can change again and have the hcalls get/set directly in the EAT
> and ENDT. It would certainly simplify the sPAPR model.

I think that's the better approach.

> >>> 2) The signatures are a bit odd here.  For the setter, a value would
> >>> make sense than a (XiveEAS *), since it's just a word.  For the getter
> >>> you could return the EAS value directly rather than using a pointer -
> >>> there's already a valid bit in the EAS so you can construct a value
> >>> with that cleared if the lisn is out of bounds.
> >>
> > 
>
Cédric Le Goater Nov. 26, 2018, 9:39 a.m. UTC | #12
On 11/26/18 6:44 AM, David Gibson wrote:
> On Fri, Nov 23, 2018 at 11:28:24AM +0100, Cédric Le Goater wrote:
>> On 11/23/18 2:10 AM, David Gibson wrote:
>>> On Thu, Nov 22, 2018 at 05:50:07PM +1100, Benjamin Herrenschmidt wrote:
>>>> On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
>>>>>
>>>>> Sorry, didn't think of this in my first reply.
>>>>>
>>>>> 1) Does the hardware ever actually write back to the EAS?  I know it
>>>>> does for the END, but it's not clear why it would need to for the
>>>>> EAS.  If not, we don't need the setter.
>>>>
>>>> Nope, though the PAPR model will via hcalls
>>>
>>> Right, bit AIUI the set_eas hook is about abstracting PAPR vs bare
>>> metal details.  Since the hcall knows it's PAPR it can just update the
>>> backing information for the EAS directly, and no need for an
>>> abstracted hook.
>>
>> Indeed, the first versions of the XIVE patchset did not use such hooks, 
>> but when discussed we said we wanted abstract methods for the router 
>> to validate the overall XIVE model, which is useful for PowerNV. 
>>
>> We can change again and have the hcalls get/set directly in the EAT
>> and ENDT. It would certainly simplify the sPAPR model.
> 
> I think that's the better approach.

ok. let's keep that in mind for  : 

 [PATCH v5 11/36] spapr/xive: use the VCPU id as a NVT identifier
 [PATCH v5 16/36] spapr: add hcalls support for the XIVE exploitation

which are using the XiveRouter methods to access the controller EAT 
and ENDT. I thought that was good practice to validate the model but 
we can use direct sPAPR table accessors or none at all.


I think these prereq patches could be merged now :

 [PATCH v5 12/36] spapr: initialize VSMT before initializing the IRQ
 [PATCH v5 13/36] spapr: introduce a spapr_irq_init() routine
 [PATCH v5 14/36] spapr: modify the irq backend 'init' method

This one also :

 [PATCH v5 21/36] spapr: extend the sPAPR IRQ backend for XICS

Thanks,

C.
David Gibson Nov. 27, 2018, 12:11 a.m. UTC | #13
On Mon, Nov 26, 2018 at 10:39:44AM +0100, Cédric Le Goater wrote:
> On 11/26/18 6:44 AM, David Gibson wrote:
> > On Fri, Nov 23, 2018 at 11:28:24AM +0100, Cédric Le Goater wrote:
> >> On 11/23/18 2:10 AM, David Gibson wrote:
> >>> On Thu, Nov 22, 2018 at 05:50:07PM +1100, Benjamin Herrenschmidt wrote:
> >>>> On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
> >>>>>
> >>>>> Sorry, didn't think of this in my first reply.
> >>>>>
> >>>>> 1) Does the hardware ever actually write back to the EAS?  I know it
> >>>>> does for the END, but it's not clear why it would need to for the
> >>>>> EAS.  If not, we don't need the setter.
> >>>>
> >>>> Nope, though the PAPR model will via hcalls
> >>>
> >>> Right, bit AIUI the set_eas hook is about abstracting PAPR vs bare
> >>> metal details.  Since the hcall knows it's PAPR it can just update the
> >>> backing information for the EAS directly, and no need for an
> >>> abstracted hook.
> >>
> >> Indeed, the first versions of the XIVE patchset did not use such hooks, 
> >> but when discussed we said we wanted abstract methods for the router 
> >> to validate the overall XIVE model, which is useful for PowerNV. 
> >>
> >> We can change again and have the hcalls get/set directly in the EAT
> >> and ENDT. It would certainly simplify the sPAPR model.
> > 
> > I think that's the better approach.
> 
> ok. let's keep that in mind for  : 
> 
>  [PATCH v5 11/36] spapr/xive: use the VCPU id as a NVT identifier
>  [PATCH v5 16/36] spapr: add hcalls support for the XIVE exploitation
> 
> which are using the XiveRouter methods to access the controller EAT 
> and ENDT. I thought that was good practice to validate the model but 
> we can use direct sPAPR table accessors or none at all.

Ok.  Consistency is good as a general rule, but I don't think it makes
sense to force the EAT and the ENDT into the same model.  The EAT is
pure configuration, whereas the the ENDT has both configuration and
status.  Or to look at it another way, the EAT is purely software
controlled, whereas the ENDT is at least partially hardware
controlled.

(I realize that gets a bit fuzzy when considering PAPR, but I think
from the point of view of the XIVE model it makes sense to treat the
PAPR hypervisor logic as "software", even though it's "hardware" from the
guest point of view).

> 
> 
> I think these prereq patches could be merged now :
> 
>  [PATCH v5 12/36] spapr: initialize VSMT before initializing the IRQ
>  [PATCH v5 13/36] spapr: introduce a spapr_irq_init() routine
>  [PATCH v5 14/36] spapr: modify the irq backend 'init' method
> 
> This one also :
> 
>  [PATCH v5 21/36] spapr: extend the sPAPR IRQ backend for XICS
> 
> Thanks,
> 
> C. 
>
David Gibson Nov. 27, 2018, 1:54 a.m. UTC | #14
On Fri, Nov 23, 2018 at 09:06:07AM +0100, Cédric Le Goater wrote:
> On 11/23/18 4:50 AM, David Gibson wrote:
> > On Thu, Nov 22, 2018 at 08:53:00AM +0100, Cédric Le Goater wrote:
> >> On 11/22/18 5:11 AM, David Gibson wrote:
> >>> On Fri, Nov 16, 2018 at 11:56:57AM +0100, Cédric Le Goater
> wrote:
[snip]
> >>> So as far as I can see so far, the XiveFabric interface will
> >>> essentially have to be implemented on the router object, so I'm not
> >>> seeing much point to having the interface rather than just a direct
> >>> call on the router object.  But I haven't read the whole series yet,
> >>> so maybe I'm missing something.
> >>
> >> The PSIHB and PHB4 models are using it but there are not in the series.
> >>
> >> I can send the PSIHB patch in the next version if you like, it's the 
> >> patch right after PnvXive. It's attached below for the moment. Look at 
> >> pnv_psi_notify().
> > 
> > Hrm, I see.  This seems like a really convoluted way of achieving what
> > you need here.  We want to abstract exactly how the source delivers
> > notifies, 
> 
> on sPAPR, I agree that the forwarding of event notification could be a 
> simple XiveRouter call but the XiveRouter covers both machines :/
> 
> On PowerNV, HW uses MMIOs to forward events and only the device knows 
> about the IRQ number offset in the global IRQ number space and the 
> notification port to use for the MMIO store. A PowerNV XIVE source 
> would forward the event notification to a piece of logic which sends 
> a PowerBUS event notification message. How it reaches the XIVE IC is
> beyong QEMU as it would means modeling the PowerBUS. 
> 
> > but doing it with an interface on some object that's not necessarily
> > either the source or the router seems odd.  
> There is no direct link between the device owing the source and the 
> XIVE controller, they could be on the same Power chip but the routing 
> could be done by some other chips. This scenario is covered btw.
> 
> See it as a connector object.
> 
> > At the very least the names need to change (of both interface and > property for the target object).
> 
> I am fine with renaming it. With the above explanations, if they are 
> clear enough, how do see them ?

TBH, I didn't find the info above particularly illuminating.  However,
I think perusing the code has finally gotten my head around the model
(sorry it's taken so long).  I think two things were confusing me.

1) The name had be thinking in terms of the XicsFabric, but the
function here is totally different.

2) I was thinking of the XiveSource as handling all source-side irq
related logic, but I guess it's real function is a bit more limited.
As I now understand it, it's only really handling the ESB and
immediately surrounding logic - the "owning" device (e.g. PHB or PSI)
is responsible for the connection "up the stack" as it were.

So, I'm ok with the model.  Just to verify that my understanding is
correct, can you confirm my reasoning below:

  * For PowerNV, we'd generally expect the notify function to be
    implemented by the "owning" device.  For the XIVE internal source,
    that would be the XiveRouter itself, immediately triggering the
    right EAS.  For the PHB and PSI irq sources, that will code in the
    PHB/PSI which performs the MMIO to poke a router.

  * For PAPR, for simplicity, we'd expect to wire all sources direct
    to a single system-wide router object.

I definitely think it needs a name change though.  "XiveNotify"
perhaps?  And the property to configure it on the XiveSource, maybe
"notify" or "notify_via".
Cédric Le Goater Nov. 27, 2018, 7:30 a.m. UTC | #15
On 11/27/18 1:11 AM, David Gibson wrote:
> On Mon, Nov 26, 2018 at 10:39:44AM +0100, Cédric Le Goater wrote:
>> On 11/26/18 6:44 AM, David Gibson wrote:
>>> On Fri, Nov 23, 2018 at 11:28:24AM +0100, Cédric Le Goater wrote:
>>>> On 11/23/18 2:10 AM, David Gibson wrote:
>>>>> On Thu, Nov 22, 2018 at 05:50:07PM +1100, Benjamin Herrenschmidt wrote:
>>>>>> On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
>>>>>>>
>>>>>>> Sorry, didn't think of this in my first reply.
>>>>>>>
>>>>>>> 1) Does the hardware ever actually write back to the EAS?  I know it
>>>>>>> does for the END, but it's not clear why it would need to for the
>>>>>>> EAS.  If not, we don't need the setter.
>>>>>>
>>>>>> Nope, though the PAPR model will via hcalls
>>>>>
>>>>> Right, bit AIUI the set_eas hook is about abstracting PAPR vs bare
>>>>> metal details.  Since the hcall knows it's PAPR it can just update the
>>>>> backing information for the EAS directly, and no need for an
>>>>> abstracted hook.
>>>>
>>>> Indeed, the first versions of the XIVE patchset did not use such hooks, 
>>>> but when discussed we said we wanted abstract methods for the router 
>>>> to validate the overall XIVE model, which is useful for PowerNV. 
>>>>
>>>> We can change again and have the hcalls get/set directly in the EAT
>>>> and ENDT. It would certainly simplify the sPAPR model.
>>>
>>> I think that's the better approach.
>>
>> ok. let's keep that in mind for  : 
>>
>>  [PATCH v5 11/36] spapr/xive: use the VCPU id as a NVT identifier
>>  [PATCH v5 16/36] spapr: add hcalls support for the XIVE exploitation
>>
>> which are using the XiveRouter methods to access the controller EAT 
>> and ENDT. I thought that was good practice to validate the model but 
>> we can use direct sPAPR table accessors or none at all.
> 
> Ok.  Consistency is good as a general rule, but I don't think it makes
> sense to force the EAT and the ENDT into the same model.  

What do you mean by model ? the QEMU machine IC model ?

> The EAT is
> pure configuration, whereas the the ENDT has both configuration and
> status.  Or to look at it another way, the EAT is purely software
> controlled, whereas the ENDT is at least partially hardware
> controlled.

yes but the EAT and the ENDT are XIVE internal tables of the same XIVE 
sub-engine, the IVRE, Interrupt Virtualization Routing Engine, formely 
known as VC, for Virtualization Controller. the EAS is just an entry 
point to the ENDT. I don't see why we would use different models for 
them.

> (I realize that gets a bit fuzzy when considering PAPR, but I think
> from the point of view of the XIVE model it makes sense to treat the
> PAPR hypervisor logic as "software", even though it's "hardware" from the
> guest point of view).

That I agree but the resulting code is too ugly in the hcalls. Tell me 
when you reach patch 11, which links the machine IC model sPAPRXive to 
the generic XiveRouter and also check patch 16 introducing the hcalls, 
which update the XIVE internal tables.

Thanks,

C. 

 
>>
>>
>> I think these prereq patches could be merged now :
>>
>>  [PATCH v5 12/36] spapr: initialize VSMT before initializing the IRQ
>>  [PATCH v5 13/36] spapr: introduce a spapr_irq_init() routine
>>  [PATCH v5 14/36] spapr: modify the irq backend 'init' method
>>
>> This one also :
>>
>>  [PATCH v5 21/36] spapr: extend the sPAPR IRQ backend for XICS
>>
>> Thanks,
>>
>> C. 
>>
>
Cédric Le Goater Nov. 27, 2018, 8:45 a.m. UTC | #16
On 11/27/18 2:54 AM, David Gibson wrote:
> On Fri, Nov 23, 2018 at 09:06:07AM +0100, Cédric Le Goater wrote:
>> On 11/23/18 4:50 AM, David Gibson wrote:
>>> On Thu, Nov 22, 2018 at 08:53:00AM +0100, Cédric Le Goater wrote:
>>>> On 11/22/18 5:11 AM, David Gibson wrote:
>>>>> On Fri, Nov 16, 2018 at 11:56:57AM +0100, Cédric Le Goater
>> wrote:
> [snip]
>>>>> So as far as I can see so far, the XiveFabric interface will
>>>>> essentially have to be implemented on the router object, so I'm not
>>>>> seeing much point to having the interface rather than just a direct
>>>>> call on the router object.  But I haven't read the whole series yet,
>>>>> so maybe I'm missing something.
>>>>
>>>> The PSIHB and PHB4 models are using it but there are not in the series.
>>>>
>>>> I can send the PSIHB patch in the next version if you like, it's the 
>>>> patch right after PnvXive. It's attached below for the moment. Look at 
>>>> pnv_psi_notify().
>>>
>>> Hrm, I see.  This seems like a really convoluted way of achieving what
>>> you need here.  We want to abstract exactly how the source delivers
>>> notifies, 
>>
>> on sPAPR, I agree that the forwarding of event notification could be a 
>> simple XiveRouter call but the XiveRouter covers both machines :/
>>
>> On PowerNV, HW uses MMIOs to forward events and only the device knows 
>> about the IRQ number offset in the global IRQ number space and the 
>> notification port to use for the MMIO store. A PowerNV XIVE source 
>> would forward the event notification to a piece of logic which sends 
>> a PowerBUS event notification message. How it reaches the XIVE IC is
>> beyong QEMU as it would means modeling the PowerBUS. 
>>
>>> but doing it with an interface on some object that's not necessarily
>>> either the source or the router seems odd.  
>> There is no direct link between the device owing the source and the 
>> XIVE controller, they could be on the same Power chip but the routing 
>> could be done by some other chips. This scenario is covered btw.
>>
>> See it as a connector object.
>>
>>> At the very least the names need to change (of both interface and > property for the target object).
>>
>> I am fine with renaming it. With the above explanations, if they are 
>> clear enough, how do see them ?
> 
> TBH, I didn't find the info above particularly illuminating.

This is really a PowerNV need. So, I can reshuffle the code and make 
a direct link between the XiveSource and the XiveRouter models for sPAPR.
It's a small change. and reintroduce XiveFabric (or whatever name we choose) 
later before the PowerNV Xive and PSIHB model. 

>  However,
> I think perusing the code has finally gotten my head around the model
> (sorry it's taken so long).  I think two things were confusing me.
> 
> 1) The name had be thinking in terms of the XicsFabric, but the
> function here is totally different.

Yes. I agree. It's not the same thing at all.

> 2) I was thinking of the XiveSource as handling all source-side irq
> related logic, but I guess it's real function is a bit more limited.
> As I now understand it, it's only really handling the ESB and
> immediately surrounding logic - the "owning" device (e.g. PHB or PSI)
> is responsible for the connection "up the stack" as it were.

yes.

> So, I'm ok with the model.  Just to verify that my understanding is
> correct, can you confirm my reasoning below:
> 
>   * For PowerNV, we'd generally expect the notify function to be
>     implemented by the "owning" device.  For the XIVE internal source,
>     that would be the XiveRouter itself, immediately triggering the
>     right EAS.  For the PHB and PSI irq sources, that will code in the
>     PHB/PSI which performs the MMIO to poke a router.

exactly.
 
>   * For PAPR, for simplicity, we'd expect to wire all sources direct
>     to a single system-wide router object.

yes. 

> 
> I definitely think it needs a name change though.  "XiveNotify"
> perhaps?  

Yes. XiveNotifier may be ? to use a noun and not a verb.

> And the property to configure it on the XiveSource, maybe "notify" 
> or "notify_via".

What the XIVE engines are doing is forwarding a trigger event to the 
next engine that can possibly do the routing to the final target. 
  
In the specs, the verbs 'trigger', 'forward', 'notify', 'route' are 
commonly used. I think 'notify' is the most frequent.

ok for 'notify'.

Thanks,

C.
David Gibson Nov. 27, 2018, 10:56 p.m. UTC | #17
On Tue, Nov 27, 2018 at 08:30:15AM +0100, Cédric Le Goater wrote:
> On 11/27/18 1:11 AM, David Gibson wrote:
> > On Mon, Nov 26, 2018 at 10:39:44AM +0100, Cédric Le Goater wrote:
> >> On 11/26/18 6:44 AM, David Gibson wrote:
> >>> On Fri, Nov 23, 2018 at 11:28:24AM +0100, Cédric Le Goater wrote:
> >>>> On 11/23/18 2:10 AM, David Gibson wrote:
> >>>>> On Thu, Nov 22, 2018 at 05:50:07PM +1100, Benjamin Herrenschmidt wrote:
> >>>>>> On Thu, 2018-11-22 at 15:44 +1100, David Gibson wrote:
> >>>>>>>
> >>>>>>> Sorry, didn't think of this in my first reply.
> >>>>>>>
> >>>>>>> 1) Does the hardware ever actually write back to the EAS?  I know it
> >>>>>>> does for the END, but it's not clear why it would need to for the
> >>>>>>> EAS.  If not, we don't need the setter.
> >>>>>>
> >>>>>> Nope, though the PAPR model will via hcalls
> >>>>>
> >>>>> Right, bit AIUI the set_eas hook is about abstracting PAPR vs bare
> >>>>> metal details.  Since the hcall knows it's PAPR it can just update the
> >>>>> backing information for the EAS directly, and no need for an
> >>>>> abstracted hook.
> >>>>
> >>>> Indeed, the first versions of the XIVE patchset did not use such hooks, 
> >>>> but when discussed we said we wanted abstract methods for the router 
> >>>> to validate the overall XIVE model, which is useful for PowerNV. 
> >>>>
> >>>> We can change again and have the hcalls get/set directly in the EAT
> >>>> and ENDT. It would certainly simplify the sPAPR model.
> >>>
> >>> I think that's the better approach.
> >>
> >> ok. let's keep that in mind for  : 
> >>
> >>  [PATCH v5 11/36] spapr/xive: use the VCPU id as a NVT identifier
> >>  [PATCH v5 16/36] spapr: add hcalls support for the XIVE exploitation
> >>
> >> which are using the XiveRouter methods to access the controller EAT 
> >> and ENDT. I thought that was good practice to validate the model but 
> >> we can use direct sPAPR table accessors or none at all.
> > 
> > Ok.  Consistency is good as a general rule, but I don't think it makes
> > sense to force the EAT and the ENDT into the same model.  
> 
> What do you mean by model ? the QEMU machine IC model ?

Oh, sorry, nothing that formal.  By "model" I just meant the same
pattern of accessor hooks.  They certainly do belong in the same qemu
object.

> > The EAT is
> > pure configuration, whereas the the ENDT has both configuration and
> > status.  Or to look at it another way, the EAT is purely software
> > controlled, whereas the ENDT is at least partially hardware
> > controlled.
> 
> yes but the EAT and the ENDT are XIVE internal tables of the same XIVE 
> sub-engine, the IVRE, Interrupt Virtualization Routing Engine, formely 
> known as VC, for Virtualization Controller. the EAS is just an entry 
> point to the ENDT. I don't see why we would use different models for 
> them.
> 
> > (I realize that gets a bit fuzzy when considering PAPR, but I think
> > from the point of view of the XIVE model it makes sense to treat the
> > PAPR hypervisor logic as "software", even though it's "hardware" from the
> > guest point of view).
> 
> That I agree but the resulting code is too ugly in the hcalls. Tell me 
> when you reach patch 11, which links the machine IC model sPAPRXive to 
> the generic XiveRouter and also check patch 16 introducing the hcalls, 
> which update the XIVE internal tables.
> 
> Thanks,
> 
> C. 
> 
>  
> >>
> >>
> >> I think these prereq patches could be merged now :
> >>
> >>  [PATCH v5 12/36] spapr: initialize VSMT before initializing the IRQ
> >>  [PATCH v5 13/36] spapr: introduce a spapr_irq_init() routine
> >>  [PATCH v5 14/36] spapr: modify the irq backend 'init' method
> >>
> >> This one also :
> >>
> >>  [PATCH v5 21/36] spapr: extend the sPAPR IRQ backend for XICS
> >>
> >> Thanks,
> >>
> >> C. 
> >>
> > 
>
diff mbox series

Patch

diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index be93fae6317b..5a0696366577 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -11,6 +11,7 @@ 
 #define PPC_XIVE_H
 
 #include "hw/sysbus.h"
+#include "hw/ppc/xive_regs.h"
 
 /*
  * XIVE Fabric (Interface between Source and Router)
@@ -168,4 +169,35 @@  static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
     }
 }
 
+/*
+ * XIVE Router
+ */
+
+typedef struct XiveRouter {
+    SysBusDevice    parent;
+
+    uint32_t        chip_id;
+} XiveRouter;
+
+#define TYPE_XIVE_ROUTER "xive-router"
+#define XIVE_ROUTER(obj)                                \
+    OBJECT_CHECK(XiveRouter, (obj), TYPE_XIVE_ROUTER)
+#define XIVE_ROUTER_CLASS(klass)                                        \
+    OBJECT_CLASS_CHECK(XiveRouterClass, (klass), TYPE_XIVE_ROUTER)
+#define XIVE_ROUTER_GET_CLASS(obj)                              \
+    OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
+
+typedef struct XiveRouterClass {
+    SysBusDeviceClass parent;
+
+    /* XIVE table accessors */
+    int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
+    int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
+} XiveRouterClass;
+
+void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
+
+int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
+int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
+
 #endif /* PPC_XIVE_H */
diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
new file mode 100644
index 000000000000..12499b33614c
--- /dev/null
+++ b/include/hw/ppc/xive_regs.h
@@ -0,0 +1,31 @@ 
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2016-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_XIVE_REGS_H
+#define PPC_XIVE_REGS_H
+
+/* EAS (Event Assignment Structure)
+ *
+ * One per interrupt source. Targets an interrupt to a given Event
+ * Notification Descriptor (END) and provides the corresponding
+ * logical interrupt number (END data)
+ */
+typedef struct XiveEAS {
+        /* Use a single 64-bit definition to make it easier to
+         * perform atomic updates
+         */
+        uint64_t        w;
+#define EAS_VALID       PPC_BIT(0)
+#define EAS_END_BLOCK   PPC_BITMASK(4, 7)        /* Destination END block# */
+#define EAS_END_INDEX   PPC_BITMASK(8, 31)       /* Destination END index */
+#define EAS_MASKED      PPC_BIT(32)              /* Masked */
+#define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
+} XiveEAS;
+
+#endif /* PPC_XIVE_REGS_H */
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 014a2e41f71f..c4c90a25758e 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -442,6 +442,91 @@  static const TypeInfo xive_source_info = {
     .class_init    = xive_source_class_init,
 };
 
+/*
+ * XIVE Router (aka. Virtualization Controller or IVRE)
+ */
+
+int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
+{
+    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+    return xrc->get_eas(xrtr, lisn, eas);
+}
+
+int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
+{
+    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+    return xrc->set_eas(xrtr, lisn, eas);
+}
+
+static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
+{
+    XiveRouter *xrtr = XIVE_ROUTER(xf);
+    XiveEAS eas;
+
+    /* EAS cache lookup */
+    if (xive_router_get_eas(xrtr, lisn, &eas)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
+        return;
+    }
+
+    /* The IVRE has a State Bit Cache for its internal sources which
+     * is also involed at this point. We skip the SBC lookup because
+     * the state bits of the sources are modeled internally in QEMU.
+     */
+
+    if (!(eas.w & EAS_VALID)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
+        return;
+    }
+
+    if (eas.w & EAS_MASKED) {
+        /* Notification completed */
+        return;
+    }
+}
+
+static Property xive_router_properties[] = {
+    DEFINE_PROP_UINT32("chip-id", XiveRouter, chip_id, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_router_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(klass);
+
+    dc->desc    = "XIVE Router Engine";
+    dc->props   = xive_router_properties;
+    xfc->notify = xive_router_notify;
+}
+
+static const TypeInfo xive_router_info = {
+    .name          = TYPE_XIVE_ROUTER,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .abstract      = true,
+    .class_size    = sizeof(XiveRouterClass),
+    .class_init    = xive_router_class_init,
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_XIVE_FABRIC },
+        { }
+    }
+};
+
+void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
+{
+    if (!(eas->w & EAS_VALID)) {
+        return;
+    }
+
+    monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
+                   lisn, eas->w & EAS_MASKED ? "M" : " ",
+                   (uint8_t)  GETFIELD(EAS_END_BLOCK, eas->w),
+                   (uint32_t) GETFIELD(EAS_END_INDEX, eas->w),
+                   (uint32_t) GETFIELD(EAS_END_DATA, eas->w));
+}
+
 /*
  * XIVE Fabric
  */
@@ -455,6 +540,7 @@  static void xive_register_types(void)
 {
     type_register_static(&xive_source_info);
     type_register_static(&xive_fabric_info);
+    type_register_static(&xive_router_info);
 }
 
 type_init(xive_register_types)