diff mbox series

[v3,03/35] ppc/xive: introduce the XiveFabric interface

Message ID 20180419124331.3915-4-clg@kaod.org
State New
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater April 19, 2018, 12:42 p.m. UTC
The XiveFabric offers a simple interface, between the XiveSourve
object and the device model owning the interrupt sources, to forward
an event notification to the XIVE interrupt controller of the machine
and if the owner is the controller, to call directly the routing
sub-engine.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
 include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)

Comments

David Gibson April 23, 2018, 6:46 a.m. UTC | #1
On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
> The XiveFabric offers a simple interface, between the XiveSourve
> object and the device model owning the interrupt sources, to forward
> an event notification to the XIVE interrupt controller of the machine
> and if the owner is the controller, to call directly the routing
> sub-engine.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
>  2 files changed, 61 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 060976077dd7..b4c3d06c1219 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -17,6 +17,21 @@
>  #include "hw/ppc/xive.h"
>  
>  /*
> + * XIVE Fabric
> + */
> +
> +static void xive_fabric_route(XiveFabric *xf, int lisn)
> +{
> +
> +}
> +
> +static const TypeInfo xive_fabric_info = {
> +    .name = TYPE_XIVE_FABRIC,
> +    .parent = TYPE_INTERFACE,
> +    .class_size = sizeof(XiveFabricClass),
> +};
> +
> +/*
>   * XIVE Interrupt Source
>   */
>  
> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
>  
>  /*
>   * Forward the source event notification to the associated XiveFabric,
> - * the device owning the sources.
> + * the device owning the sources, or perform the routing if the device
> + * is the interrupt controller.
>   */
>  static void xive_source_notify(XiveSource *xsrc, int srcno)
>  {
>  
> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
> +
> +    if (xfc->notify) {
> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
> +    } else {
> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
> +    }

Why 2 cases?  Can't the XiveFabric object just make its notify equal
to xive_fabric_route if that's what it wants?

>  }
>  
>  /*
> @@ -302,6 +325,17 @@ static void xive_source_reset(DeviceState *dev)
>  static void xive_source_realize(DeviceState *dev, Error **errp)
>  {
>      XiveSource *xsrc = XIVE_SOURCE(dev);
> +    Object *obj;
> +    Error *local_err = NULL;
> +
> +    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
> +    if (!obj) {
> +        error_propagate(errp, local_err);
> +        error_prepend(errp, "required link 'xive' not found: ");
> +        return;
> +    }
> +
> +    xsrc->xive = XIVE_FABRIC(obj);
>  
>      if (!xsrc->nr_irqs) {
>          error_setg(errp, "Number of interrupt needs to be greater than 0");
> @@ -376,6 +410,7 @@ static const TypeInfo xive_source_info = {
>  static void xive_register_types(void)
>  {
>      type_register_static(&xive_source_info);
> +    type_register_static(&xive_fabric_info);
>  }
>  
>  type_init(xive_register_types)
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 0b76dd278d9b..4fcae2c763e6 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -12,6 +12,8 @@
>  
>  #include "hw/sysbus.h"
>  
> +typedef struct XiveFabric XiveFabric;
> +
>  /*
>   * XIVE Interrupt Source
>   */
> @@ -46,6 +48,8 @@ typedef struct XiveSource {
>      hwaddr       esb_base;
>      uint32_t     esb_shift;
>      MemoryRegion esb_mmio;
> +
> +    XiveFabric   *xive;
>  } XiveSource;
>  
>  /*
> @@ -143,4 +147,25 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
>      xsrc->status[srcno] |= lsi ? XIVE_STATUS_LSI : 0;
>  }
>  
> +/*
> + * XIVE Fabric
> + */
> +
> +typedef struct XiveFabric {
> +    Object parent;
> +} XiveFabric;
> +
> +#define TYPE_XIVE_FABRIC "xive-fabric"
> +#define XIVE_FABRIC(obj)                                     \
> +    OBJECT_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
> +#define XIVE_FABRIC_CLASS(klass)                                     \
> +    OBJECT_CLASS_CHECK(XiveFabricClass, (klass), TYPE_XIVE_FABRIC)
> +#define XIVE_FABRIC_GET_CLASS(obj)                                   \
> +    OBJECT_GET_CLASS(XiveFabricClass, (obj), TYPE_XIVE_FABRIC)
> +
> +typedef struct XiveFabricClass {
> +    InterfaceClass parent;
> +    void (*notify)(XiveFabric *xf, uint32_t lisn);
> +} XiveFabricClass;
> +
>  #endif /* PPC_XIVE_H */
Cédric Le Goater April 23, 2018, 7:58 a.m. UTC | #2
On 04/23/2018 08:46 AM, David Gibson wrote:
> On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
>> The XiveFabric offers a simple interface, between the XiveSourve
>> object and the device model owning the interrupt sources, to forward
>> an event notification to the XIVE interrupt controller of the machine
>> and if the owner is the controller, to call directly the routing
>> sub-engine.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
>>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
>>  2 files changed, 61 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index 060976077dd7..b4c3d06c1219 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -17,6 +17,21 @@
>>  #include "hw/ppc/xive.h"
>>  
>>  /*
>> + * XIVE Fabric
>> + */
>> +
>> +static void xive_fabric_route(XiveFabric *xf, int lisn)
>> +{
>> +
>> +}
>> +
>> +static const TypeInfo xive_fabric_info = {
>> +    .name = TYPE_XIVE_FABRIC,
>> +    .parent = TYPE_INTERFACE,
>> +    .class_size = sizeof(XiveFabricClass),
>> +};
>> +
>> +/*
>>   * XIVE Interrupt Source
>>   */
>>  
>> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
>>  
>>  /*
>>   * Forward the source event notification to the associated XiveFabric,
>> - * the device owning the sources.
>> + * the device owning the sources, or perform the routing if the device
>> + * is the interrupt controller.
>>   */
>>  static void xive_source_notify(XiveSource *xsrc, int srcno)
>>  {
>>  
>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
>> +
>> +    if (xfc->notify) {
>> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
>> +    } else {
>> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
>> +    }
> 
> Why 2 cases?  Can't the XiveFabric object just make its notify equal
> to xive_fabric_route if that's what it wants?
Under sPAPR, all the sources, IPIs and virtual device interrupts, 
generate events which are directly routed by xive_fabric_route(). 
There is no need of an extra hop. Indeed. 

Under PowerNV, some sources forward the notification to the routing 
engine using a specific MMIO load on a notify address which is stored 
in one of the controller registers. So we need a hop to reach the 
device model, owning the sources, and do that load : 

	static void pnv_psi_notify(XiveFabric *xf, uint32_t lisn)
	{
	    PnvPsi *psi = PNV_PSI(xf);
	    uint64_t notif_port =
	        psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
	    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
	    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
	    uint32_t data = cpu_to_be32(lisn);
	
	    if (valid) {
	        cpu_physical_memory_write(notify_addr, &data, sizeof(data));
	    }
	}

The PnvXive model handles the load and forwards to the fabric again.  

The IPIs under PowerNV do not need an extra hop so they reach the 
routing routine directly without the extra notify() hop. 

However, PowerNV at the end should be using xive_fabric_route() 
but there are some differences on how the NVT registers are 
updated (HV vs. OS mode) and it's not handled yet so it uses a 
notify() handler. But is should disappear and call directly 
xive_fabric_route() in a near future.


May be, XiveFabricNotifier would be a better name for this feature ?
I am adding a few ops later which are more related to routing.

Thanks,

C.


> 
>>  }
>>  
>>  /*
>> @@ -302,6 +325,17 @@ static void xive_source_reset(DeviceState *dev)
>>  static void xive_source_realize(DeviceState *dev, Error **errp)
>>  {
>>      XiveSource *xsrc = XIVE_SOURCE(dev);
>> +    Object *obj;
>> +    Error *local_err = NULL;
>> +
>> +    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
>> +    if (!obj) {
>> +        error_propagate(errp, local_err);
>> +        error_prepend(errp, "required link 'xive' not found: ");
>> +        return;
>> +    }
>> +
>> +    xsrc->xive = XIVE_FABRIC(obj);
>>  
>>      if (!xsrc->nr_irqs) {
>>          error_setg(errp, "Number of interrupt needs to be greater than 0");
>> @@ -376,6 +410,7 @@ static const TypeInfo xive_source_info = {
>>  static void xive_register_types(void)
>>  {
>>      type_register_static(&xive_source_info);
>> +    type_register_static(&xive_fabric_info);
>>  }
>>  
>>  type_init(xive_register_types)
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index 0b76dd278d9b..4fcae2c763e6 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -12,6 +12,8 @@
>>  
>>  #include "hw/sysbus.h"
>>  
>> +typedef struct XiveFabric XiveFabric;
>> +
>>  /*
>>   * XIVE Interrupt Source
>>   */
>> @@ -46,6 +48,8 @@ typedef struct XiveSource {
>>      hwaddr       esb_base;
>>      uint32_t     esb_shift;
>>      MemoryRegion esb_mmio;
>> +
>> +    XiveFabric   *xive;
>>  } XiveSource;
>>  
>>  /*
>> @@ -143,4 +147,25 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
>>      xsrc->status[srcno] |= lsi ? XIVE_STATUS_LSI : 0;
>>  }
>>  
>> +/*
>> + * XIVE Fabric
>> + */
>> +
>> +typedef struct XiveFabric {
>> +    Object parent;
>> +} XiveFabric;
>> +
>> +#define TYPE_XIVE_FABRIC "xive-fabric"
>> +#define XIVE_FABRIC(obj)                                     \
>> +    OBJECT_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
>> +#define XIVE_FABRIC_CLASS(klass)                                     \
>> +    OBJECT_CLASS_CHECK(XiveFabricClass, (klass), TYPE_XIVE_FABRIC)
>> +#define XIVE_FABRIC_GET_CLASS(obj)                                   \
>> +    OBJECT_GET_CLASS(XiveFabricClass, (obj), TYPE_XIVE_FABRIC)
>> +
>> +typedef struct XiveFabricClass {
>> +    InterfaceClass parent;
>> +    void (*notify)(XiveFabric *xf, uint32_t lisn);
>> +} XiveFabricClass;
>> +
>>  #endif /* PPC_XIVE_H */
>
David Gibson April 24, 2018, 6:46 a.m. UTC | #3
On Mon, Apr 23, 2018 at 09:58:43AM +0200, Cédric Le Goater wrote:
> On 04/23/2018 08:46 AM, David Gibson wrote:
> > On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
> >> The XiveFabric offers a simple interface, between the XiveSourve
> >> object and the device model owning the interrupt sources, to forward
> >> an event notification to the XIVE interrupt controller of the machine
> >> and if the owner is the controller, to call directly the routing
> >> sub-engine.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
> >>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
> >>  2 files changed, 61 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> index 060976077dd7..b4c3d06c1219 100644
> >> --- a/hw/intc/xive.c
> >> +++ b/hw/intc/xive.c
> >> @@ -17,6 +17,21 @@
> >>  #include "hw/ppc/xive.h"
> >>  
> >>  /*
> >> + * XIVE Fabric
> >> + */
> >> +
> >> +static void xive_fabric_route(XiveFabric *xf, int lisn)
> >> +{
> >> +
> >> +}
> >> +
> >> +static const TypeInfo xive_fabric_info = {
> >> +    .name = TYPE_XIVE_FABRIC,
> >> +    .parent = TYPE_INTERFACE,
> >> +    .class_size = sizeof(XiveFabricClass),
> >> +};
> >> +
> >> +/*
> >>   * XIVE Interrupt Source
> >>   */
> >>  
> >> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
> >>  
> >>  /*
> >>   * Forward the source event notification to the associated XiveFabric,
> >> - * the device owning the sources.
> >> + * the device owning the sources, or perform the routing if the device
> >> + * is the interrupt controller.
> >>   */
> >>  static void xive_source_notify(XiveSource *xsrc, int srcno)
> >>  {
> >>  
> >> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
> >> +
> >> +    if (xfc->notify) {
> >> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
> >> +    } else {
> >> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
> >> +    }
> > 
> > Why 2 cases?  Can't the XiveFabric object just make its notify equal
> > to xive_fabric_route if that's what it wants?
> Under sPAPR, all the sources, IPIs and virtual device interrupts, 
> generate events which are directly routed by xive_fabric_route(). 
> There is no need of an extra hop. Indeed. 

Ok.

> Under PowerNV, some sources forward the notification to the routing 
> engine using a specific MMIO load on a notify address which is stored 
> in one of the controller registers. So we need a hop to reach the 
> device model, owning the sources, and do that load :

Hm.  So you're saying that in pnv some sources send their notification
to some other unit, that would then (after possible masking) forward
on to the overall xive fabric?

That seems like a property of the source object, rather than a
property of the fabric.  Indeed varying this by source object would
require the objects have a different xive pointer, when I thought the
idea was that the XiveFabric was global.

> 	static void pnv_psi_notify(XiveFabric *xf, uint32_t lisn)
> 	{
> 	    PnvPsi *psi = PNV_PSI(xf);
> 	    uint64_t notif_port =
> 	        psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
> 	    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
> 	    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
> 	    uint32_t data = cpu_to_be32(lisn);
> 	
> 	    if (valid) {
> 	        cpu_physical_memory_write(notify_addr, &data, sizeof(data));
> 	    }
> 	}
> 
> The PnvXive model handles the load and forwards to the fabric again.  
> 
> The IPIs under PowerNV do not need an extra hop so they reach the 
> routing routine directly without the extra notify() hop. 
> 
> However, PowerNV at the end should be using xive_fabric_route() 
> but there are some differences on how the NVT registers are 
> updated (HV vs. OS mode) and it's not handled yet so it uses a 
> notify() handler. But is should disappear and call directly 
> xive_fabric_route() in a near future.
> 
> 
> May be, XiveFabricNotifier would be a better name for this feature ?
> I am adding a few ops later which are more related to routing.
> 
> Thanks,
> 
> C.
> 
> 
> > 
> >>  }
> >>  
> >>  /*
> >> @@ -302,6 +325,17 @@ static void xive_source_reset(DeviceState *dev)
> >>  static void xive_source_realize(DeviceState *dev, Error **errp)
> >>  {
> >>      XiveSource *xsrc = XIVE_SOURCE(dev);
> >> +    Object *obj;
> >> +    Error *local_err = NULL;
> >> +
> >> +    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
> >> +    if (!obj) {
> >> +        error_propagate(errp, local_err);
> >> +        error_prepend(errp, "required link 'xive' not found: ");
> >> +        return;
> >> +    }
> >> +
> >> +    xsrc->xive = XIVE_FABRIC(obj);
> >>  
> >>      if (!xsrc->nr_irqs) {
> >>          error_setg(errp, "Number of interrupt needs to be greater than 0");
> >> @@ -376,6 +410,7 @@ static const TypeInfo xive_source_info = {
> >>  static void xive_register_types(void)
> >>  {
> >>      type_register_static(&xive_source_info);
> >> +    type_register_static(&xive_fabric_info);
> >>  }
> >>  
> >>  type_init(xive_register_types)
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index 0b76dd278d9b..4fcae2c763e6 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -12,6 +12,8 @@
> >>  
> >>  #include "hw/sysbus.h"
> >>  
> >> +typedef struct XiveFabric XiveFabric;
> >> +
> >>  /*
> >>   * XIVE Interrupt Source
> >>   */
> >> @@ -46,6 +48,8 @@ typedef struct XiveSource {
> >>      hwaddr       esb_base;
> >>      uint32_t     esb_shift;
> >>      MemoryRegion esb_mmio;
> >> +
> >> +    XiveFabric   *xive;
> >>  } XiveSource;
> >>  
> >>  /*
> >> @@ -143,4 +147,25 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
> >>      xsrc->status[srcno] |= lsi ? XIVE_STATUS_LSI : 0;
> >>  }
> >>  
> >> +/*
> >> + * XIVE Fabric
> >> + */
> >> +
> >> +typedef struct XiveFabric {
> >> +    Object parent;
> >> +} XiveFabric;
> >> +
> >> +#define TYPE_XIVE_FABRIC "xive-fabric"
> >> +#define XIVE_FABRIC(obj)                                     \
> >> +    OBJECT_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
> >> +#define XIVE_FABRIC_CLASS(klass)                                     \
> >> +    OBJECT_CLASS_CHECK(XiveFabricClass, (klass), TYPE_XIVE_FABRIC)
> >> +#define XIVE_FABRIC_GET_CLASS(obj)                                   \
> >> +    OBJECT_GET_CLASS(XiveFabricClass, (obj), TYPE_XIVE_FABRIC)
> >> +
> >> +typedef struct XiveFabricClass {
> >> +    InterfaceClass parent;
> >> +    void (*notify)(XiveFabric *xf, uint32_t lisn);
> >> +} XiveFabricClass;
> >> +
> >>  #endif /* PPC_XIVE_H */
> > 
>
Cédric Le Goater April 24, 2018, 9:33 a.m. UTC | #4
On 04/24/2018 08:46 AM, David Gibson wrote:
> On Mon, Apr 23, 2018 at 09:58:43AM +0200, Cédric Le Goater wrote:
>> On 04/23/2018 08:46 AM, David Gibson wrote:
>>> On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
>>>> The XiveFabric offers a simple interface, between the XiveSourve
>>>> object and the device model owning the interrupt sources, to forward
>>>> an event notification to the XIVE interrupt controller of the machine
>>>> and if the owner is the controller, to call directly the routing
>>>> sub-engine.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
>>>>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
>>>>  2 files changed, 61 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>> index 060976077dd7..b4c3d06c1219 100644
>>>> --- a/hw/intc/xive.c
>>>> +++ b/hw/intc/xive.c
>>>> @@ -17,6 +17,21 @@
>>>>  #include "hw/ppc/xive.h"
>>>>  
>>>>  /*
>>>> + * XIVE Fabric
>>>> + */
>>>> +
>>>> +static void xive_fabric_route(XiveFabric *xf, int lisn)
>>>> +{
>>>> +
>>>> +}
>>>> +
>>>> +static const TypeInfo xive_fabric_info = {
>>>> +    .name = TYPE_XIVE_FABRIC,
>>>> +    .parent = TYPE_INTERFACE,
>>>> +    .class_size = sizeof(XiveFabricClass),
>>>> +};
>>>> +
>>>> +/*
>>>>   * XIVE Interrupt Source
>>>>   */
>>>>  
>>>> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
>>>>  
>>>>  /*
>>>>   * Forward the source event notification to the associated XiveFabric,
>>>> - * the device owning the sources.
>>>> + * the device owning the sources, or perform the routing if the device
>>>> + * is the interrupt controller.
>>>>   */
>>>>  static void xive_source_notify(XiveSource *xsrc, int srcno)
>>>>  {
>>>>  
>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
>>>> +
>>>> +    if (xfc->notify) {
>>>> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
>>>> +    } else {
>>>> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
>>>> +    }
>>>
>>> Why 2 cases?  Can't the XiveFabric object just make its notify equal
>>> to xive_fabric_route if that's what it wants?
>> Under sPAPR, all the sources, IPIs and virtual device interrupts, 
>> generate events which are directly routed by xive_fabric_route(). 
>> There is no need of an extra hop. Indeed. 
> 
> Ok.
> 
>> Under PowerNV, some sources forward the notification to the routing 
>> engine using a specific MMIO load on a notify address which is stored 
>> in one of the controller registers. So we need a hop to reach the 
>> device model, owning the sources, and do that load :
> 
> Hm.  So you're saying that in pnv some sources send their notification
> to some other unit, 

Not to any unit/device, to the device owning the sources.

For the XiveSource object under PSI, the XIVEFabric interface is the 
PSI device object it self, which knows how to forward the notification 
on the XIVE Power "bus". To be more precise, the PSI HB device has 
14 interrupt sources, which notifications are forwarded using a MMIO 
load to some address. The load address is configured (by skiboot) in 
one of the PSI device registers, and points to a MMIO region of the 
main XIVE interrupt controller. 

The PHB4 sources should be the same.

For the XiveSource object (all interrupts) under sPAPRXive, the 
XIVEFabric is the main interrupt controller sPAPRXive.

For the XiveSource object (IPIs) under PnvXive, the XIVEFabric is 
also the main interrupt controller PnvXive.

> that would then (after possible masking) forward on to the overall> xive fabric ? 

yes. May be XIVEFabric is a confusing name. What about XIVEForwarder ? 

> That seems like a property of the source object, 

The source object is generic. It's a bunch of PQ bits that can be 
controlled by MMIOs. Nothing more.  

> rather than a
> property of the fabric.  Indeed varying this by source object would
> require the objects have a different xive pointer, when I thought the
> idea was that the XiveFabric was global.

When a notification is forwarded, the sources needs to call an 
interface which generally is implemented by the source owner, 
which is not necessarily the main IC. 

>> 	static void pnv_psi_notify(XiveFabric *xf, uint32_t lisn)
>> 	{
>> 	    PnvPsi *psi = PNV_PSI(xf);
>> 	    uint64_t notif_port =
>> 	        psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
>> 	    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
>> 	    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
>> 	    uint32_t data = cpu_to_be32(lisn);
>> 	
>> 	    if (valid) {
>> 	        cpu_physical_memory_write(notify_addr, &data, sizeof(data));
>> 	    }
>> 	}
>>
>> The PnvXive model handles the load and forwards to the fabric again.  
>>
>> The IPIs under PowerNV do not need an extra hop so they reach the 
>> routing routine directly without the extra notify() hop. 
>>
>> However, PowerNV at the end should be using xive_fabric_route() 
>> but there are some differences on how the NVT registers are 
>> updated (HV vs. OS mode) and it's not handled yet so it uses a 
>> notify() handler. But is should disappear and call directly 
>> xive_fabric_route() in a near future.
>>
>>
>> May be, XiveFabricNotifier would be a better name for this feature ?
>> I am adding a few ops later which are more related to routing.
>>
>> Thanks,
>>
>> C.
>>
>>
>>>
>>>>  }
>>>>  
>>>>  /*
>>>> @@ -302,6 +325,17 @@ static void xive_source_reset(DeviceState *dev)
>>>>  static void xive_source_realize(DeviceState *dev, Error **errp)
>>>>  {
>>>>      XiveSource *xsrc = XIVE_SOURCE(dev);
>>>> +    Object *obj;
>>>> +    Error *local_err = NULL;
>>>> +
>>>> +    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
>>>> +    if (!obj) {
>>>> +        error_propagate(errp, local_err);
>>>> +        error_prepend(errp, "required link 'xive' not found: ");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    xsrc->xive = XIVE_FABRIC(obj);
>>>>  
>>>>      if (!xsrc->nr_irqs) {
>>>>          error_setg(errp, "Number of interrupt needs to be greater than 0");
>>>> @@ -376,6 +410,7 @@ static const TypeInfo xive_source_info = {
>>>>  static void xive_register_types(void)
>>>>  {
>>>>      type_register_static(&xive_source_info);
>>>> +    type_register_static(&xive_fabric_info);
>>>>  }
>>>>  
>>>>  type_init(xive_register_types)
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> index 0b76dd278d9b..4fcae2c763e6 100644
>>>> --- a/include/hw/ppc/xive.h
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -12,6 +12,8 @@
>>>>  
>>>>  #include "hw/sysbus.h"
>>>>  
>>>> +typedef struct XiveFabric XiveFabric;
>>>> +
>>>>  /*
>>>>   * XIVE Interrupt Source
>>>>   */
>>>> @@ -46,6 +48,8 @@ typedef struct XiveSource {
>>>>      hwaddr       esb_base;
>>>>      uint32_t     esb_shift;
>>>>      MemoryRegion esb_mmio;
>>>> +
>>>> +    XiveFabric   *xive;
>>>>  } XiveSource;
>>>>  
>>>>  /*
>>>> @@ -143,4 +147,25 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
>>>>      xsrc->status[srcno] |= lsi ? XIVE_STATUS_LSI : 0;
>>>>  }
>>>>  
>>>> +/*
>>>> + * XIVE Fabric
>>>> + */
>>>> +
>>>> +typedef struct XiveFabric {
>>>> +    Object parent;
>>>> +} XiveFabric;
>>>> +
>>>> +#define TYPE_XIVE_FABRIC "xive-fabric"
>>>> +#define XIVE_FABRIC(obj)                                     \
>>>> +    OBJECT_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
>>>> +#define XIVE_FABRIC_CLASS(klass)                                     \
>>>> +    OBJECT_CLASS_CHECK(XiveFabricClass, (klass), TYPE_XIVE_FABRIC)
>>>> +#define XIVE_FABRIC_GET_CLASS(obj)                                   \
>>>> +    OBJECT_GET_CLASS(XiveFabricClass, (obj), TYPE_XIVE_FABRIC)
>>>> +
>>>> +typedef struct XiveFabricClass {
>>>> +    InterfaceClass parent;
>>>> +    void (*notify)(XiveFabric *xf, uint32_t lisn);
>>>> +} XiveFabricClass;
>>>> +
>>>>  #endif /* PPC_XIVE_H */
>>>
>>
>
David Gibson April 26, 2018, 3:54 a.m. UTC | #5
On Tue, Apr 24, 2018 at 11:33:11AM +0200, Cédric Le Goater wrote:
> On 04/24/2018 08:46 AM, David Gibson wrote:
> > On Mon, Apr 23, 2018 at 09:58:43AM +0200, Cédric Le Goater wrote:
> >> On 04/23/2018 08:46 AM, David Gibson wrote:
> >>> On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
> >>>> The XiveFabric offers a simple interface, between the XiveSourve
> >>>> object and the device model owning the interrupt sources, to forward
> >>>> an event notification to the XIVE interrupt controller of the machine
> >>>> and if the owner is the controller, to call directly the routing
> >>>> sub-engine.
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>> ---
> >>>>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
> >>>>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
> >>>>  2 files changed, 61 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >>>> index 060976077dd7..b4c3d06c1219 100644
> >>>> --- a/hw/intc/xive.c
> >>>> +++ b/hw/intc/xive.c
> >>>> @@ -17,6 +17,21 @@
> >>>>  #include "hw/ppc/xive.h"
> >>>>  
> >>>>  /*
> >>>> + * XIVE Fabric
> >>>> + */
> >>>> +
> >>>> +static void xive_fabric_route(XiveFabric *xf, int lisn)
> >>>> +{
> >>>> +
> >>>> +}
> >>>> +
> >>>> +static const TypeInfo xive_fabric_info = {
> >>>> +    .name = TYPE_XIVE_FABRIC,
> >>>> +    .parent = TYPE_INTERFACE,
> >>>> +    .class_size = sizeof(XiveFabricClass),
> >>>> +};
> >>>> +
> >>>> +/*
> >>>>   * XIVE Interrupt Source
> >>>>   */
> >>>>  
> >>>> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
> >>>>  
> >>>>  /*
> >>>>   * Forward the source event notification to the associated XiveFabric,
> >>>> - * the device owning the sources.
> >>>> + * the device owning the sources, or perform the routing if the device
> >>>> + * is the interrupt controller.
> >>>>   */
> >>>>  static void xive_source_notify(XiveSource *xsrc, int srcno)
> >>>>  {
> >>>>  
> >>>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
> >>>> +
> >>>> +    if (xfc->notify) {
> >>>> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
> >>>> +    } else {
> >>>> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
> >>>> +    }
> >>>
> >>> Why 2 cases?  Can't the XiveFabric object just make its notify equal
> >>> to xive_fabric_route if that's what it wants?
> >> Under sPAPR, all the sources, IPIs and virtual device interrupts, 
> >> generate events which are directly routed by xive_fabric_route(). 
> >> There is no need of an extra hop. Indeed. 
> > 
> > Ok.
> > 
> >> Under PowerNV, some sources forward the notification to the routing 
> >> engine using a specific MMIO load on a notify address which is stored 
> >> in one of the controller registers. So we need a hop to reach the 
> >> device model, owning the sources, and do that load :
> > 
> > Hm.  So you're saying that in pnv some sources send their notification
> > to some other unit, 
> 
> Not to any unit/device, to the device owning the sources.
> 
> For the XiveSource object under PSI, the XIVEFabric interface is the 
> PSI device object it self, which knows how to forward the notification 
> on the XIVE Power "bus". To be more precise, the PSI HB device has 
> 14 interrupt sources, which notifications are forwarded using a MMIO 
> load to some address. The load address is configured (by skiboot) in 
> one of the PSI device registers, and points to a MMIO region of the 
> main XIVE interrupt controller. 
> 
> The PHB4 sources should be the same.
> 
> For the XiveSource object (all interrupts) under sPAPRXive, the 
> XIVEFabric is the main interrupt controller sPAPRXive.
> 
> For the XiveSource object (IPIs) under PnvXive, the XIVEFabric is 
> also the main interrupt controller PnvXive.

Hrm.  Apparently I'm missing something, I'm really not getting what
you're trying to explain here.

> > that would then (after possible masking) forward on to the overall> xive fabric ? 
> 
> yes. May be XIVEFabric is a confusing name. What about XIVEForwarder ? 

Maybe..?

> > That seems like a property of the source object, 
> 
> The source object is generic. It's a bunch of PQ bits that can be 
> controlled by MMIOs. Nothing more.

Hmm.  Isn't the source object also responsible for forwarding the
interrupt to something up the chain (whatever that is)?

> 
> > rather than a
> > property of the fabric.  Indeed varying this by source object would
> > require the objects have a different xive pointer, when I thought the
> > idea was that the XiveFabric was global.
> 
> When a notification is forwarded, the sources needs to call an 
> interface which generally is implemented by the source owner,

I'm not quite sure what you mean by "source owner".

> which is not necessarily the main IC. 
> 
> >> 	static void pnv_psi_notify(XiveFabric *xf, uint32_t lisn)
> >> 	{
> >> 	    PnvPsi *psi = PNV_PSI(xf);
> >> 	    uint64_t notif_port =
> >> 	        psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
> >> 	    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
> >> 	    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
> >> 	    uint32_t data = cpu_to_be32(lisn);
> >> 	
> >> 	    if (valid) {
> >> 	        cpu_physical_memory_write(notify_addr, &data, sizeof(data));
> >> 	    }
> >> 	}
> >>
> >> The PnvXive model handles the load and forwards to the fabric again.  
> >>
> >> The IPIs under PowerNV do not need an extra hop so they reach the 
> >> routing routine directly without the extra notify() hop. 
> >>
> >> However, PowerNV at the end should be using xive_fabric_route() 
> >> but there are some differences on how the NVT registers are 
> >> updated (HV vs. OS mode) and it's not handled yet so it uses a 
> >> notify() handler. But is should disappear and call directly 
> >> xive_fabric_route() in a near future.
> >>
> >>
> >> May be, XiveFabricNotifier would be a better name for this feature ?
> >> I am adding a few ops later which are more related to routing.
> >>
> >> Thanks,
> >>
> >> C.
> >>
> >>
> >>>
> >>>>  }
> >>>>  
> >>>>  /*
> >>>> @@ -302,6 +325,17 @@ static void xive_source_reset(DeviceState *dev)
> >>>>  static void xive_source_realize(DeviceState *dev, Error **errp)
> >>>>  {
> >>>>      XiveSource *xsrc = XIVE_SOURCE(dev);
> >>>> +    Object *obj;
> >>>> +    Error *local_err = NULL;
> >>>> +
> >>>> +    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
> >>>> +    if (!obj) {
> >>>> +        error_propagate(errp, local_err);
> >>>> +        error_prepend(errp, "required link 'xive' not found: ");
> >>>> +        return;
> >>>> +    }
> >>>> +
> >>>> +    xsrc->xive = XIVE_FABRIC(obj);
> >>>>  
> >>>>      if (!xsrc->nr_irqs) {
> >>>>          error_setg(errp, "Number of interrupt needs to be greater than 0");
> >>>> @@ -376,6 +410,7 @@ static const TypeInfo xive_source_info = {
> >>>>  static void xive_register_types(void)
> >>>>  {
> >>>>      type_register_static(&xive_source_info);
> >>>> +    type_register_static(&xive_fabric_info);
> >>>>  }
> >>>>  
> >>>>  type_init(xive_register_types)
> >>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >>>> index 0b76dd278d9b..4fcae2c763e6 100644
> >>>> --- a/include/hw/ppc/xive.h
> >>>> +++ b/include/hw/ppc/xive.h
> >>>> @@ -12,6 +12,8 @@
> >>>>  
> >>>>  #include "hw/sysbus.h"
> >>>>  
> >>>> +typedef struct XiveFabric XiveFabric;
> >>>> +
> >>>>  /*
> >>>>   * XIVE Interrupt Source
> >>>>   */
> >>>> @@ -46,6 +48,8 @@ typedef struct XiveSource {
> >>>>      hwaddr       esb_base;
> >>>>      uint32_t     esb_shift;
> >>>>      MemoryRegion esb_mmio;
> >>>> +
> >>>> +    XiveFabric   *xive;
> >>>>  } XiveSource;
> >>>>  
> >>>>  /*
> >>>> @@ -143,4 +147,25 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
> >>>>      xsrc->status[srcno] |= lsi ? XIVE_STATUS_LSI : 0;
> >>>>  }
> >>>>  
> >>>> +/*
> >>>> + * XIVE Fabric
> >>>> + */
> >>>> +
> >>>> +typedef struct XiveFabric {
> >>>> +    Object parent;
> >>>> +} XiveFabric;
> >>>> +
> >>>> +#define TYPE_XIVE_FABRIC "xive-fabric"
> >>>> +#define XIVE_FABRIC(obj)                                     \
> >>>> +    OBJECT_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
> >>>> +#define XIVE_FABRIC_CLASS(klass)                                     \
> >>>> +    OBJECT_CLASS_CHECK(XiveFabricClass, (klass), TYPE_XIVE_FABRIC)
> >>>> +#define XIVE_FABRIC_GET_CLASS(obj)                                   \
> >>>> +    OBJECT_GET_CLASS(XiveFabricClass, (obj), TYPE_XIVE_FABRIC)
> >>>> +
> >>>> +typedef struct XiveFabricClass {
> >>>> +    InterfaceClass parent;
> >>>> +    void (*notify)(XiveFabric *xf, uint32_t lisn);
> >>>> +} XiveFabricClass;
> >>>> +
> >>>>  #endif /* PPC_XIVE_H */
> >>>
> >>
> > 
>
Cédric Le Goater April 26, 2018, 10:30 a.m. UTC | #6
On 04/26/2018 05:54 AM, David Gibson wrote:
> On Tue, Apr 24, 2018 at 11:33:11AM +0200, Cédric Le Goater wrote:
>> On 04/24/2018 08:46 AM, David Gibson wrote:
>>> On Mon, Apr 23, 2018 at 09:58:43AM +0200, Cédric Le Goater wrote:
>>>> On 04/23/2018 08:46 AM, David Gibson wrote:
>>>>> On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
>>>>>> The XiveFabric offers a simple interface, between the XiveSourve
>>>>>> object and the device model owning the interrupt sources, to forward
>>>>>> an event notification to the XIVE interrupt controller of the machine
>>>>>> and if the owner is the controller, to call directly the routing
>>>>>> sub-engine.
>>>>>>
>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>> ---
>>>>>>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
>>>>>>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
>>>>>>  2 files changed, 61 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>>>> index 060976077dd7..b4c3d06c1219 100644
>>>>>> --- a/hw/intc/xive.c
>>>>>> +++ b/hw/intc/xive.c
>>>>>> @@ -17,6 +17,21 @@
>>>>>>  #include "hw/ppc/xive.h"
>>>>>>  
>>>>>>  /*
>>>>>> + * XIVE Fabric
>>>>>> + */
>>>>>> +
>>>>>> +static void xive_fabric_route(XiveFabric *xf, int lisn)
>>>>>> +{
>>>>>> +
>>>>>> +}
>>>>>> +
>>>>>> +static const TypeInfo xive_fabric_info = {
>>>>>> +    .name = TYPE_XIVE_FABRIC,
>>>>>> +    .parent = TYPE_INTERFACE,
>>>>>> +    .class_size = sizeof(XiveFabricClass),
>>>>>> +};
>>>>>> +
>>>>>> +/*
>>>>>>   * XIVE Interrupt Source
>>>>>>   */
>>>>>>  
>>>>>> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
>>>>>>  
>>>>>>  /*
>>>>>>   * Forward the source event notification to the associated XiveFabric,
>>>>>> - * the device owning the sources.
>>>>>> + * the device owning the sources, or perform the routing if the device
>>>>>> + * is the interrupt controller.
>>>>>>   */
>>>>>>  static void xive_source_notify(XiveSource *xsrc, int srcno)
>>>>>>  {
>>>>>>  
>>>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
>>>>>> +
>>>>>> +    if (xfc->notify) {
>>>>>> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
>>>>>> +    } else {
>>>>>> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
>>>>>> +    }
>>>>>
>>>>> Why 2 cases?  Can't the XiveFabric object just make its notify equal
>>>>> to xive_fabric_route if that's what it wants?
>>>> Under sPAPR, all the sources, IPIs and virtual device interrupts, 
>>>> generate events which are directly routed by xive_fabric_route(). 
>>>> There is no need of an extra hop. Indeed. 
>>>
>>> Ok.
>>>
>>>> Under PowerNV, some sources forward the notification to the routing 
>>>> engine using a specific MMIO load on a notify address which is stored 
>>>> in one of the controller registers. So we need a hop to reach the 
>>>> device model, owning the sources, and do that load :
>>>
>>> Hm.  So you're saying that in pnv some sources send their notification
>>> to some other unit, 
>>
>> Not to any unit/device, to the device owning the sources.
>>
>> For the XiveSource object under PSI, the XIVEFabric interface is the 
>> PSI device object it self, which knows how to forward the notification 
>> on the XIVE Power "bus". To be more precise, the PSI HB device has 
>> 14 interrupt sources, which notifications are forwarded using a MMIO 
>> load to some address. The load address is configured (by skiboot) in 
>> one of the PSI device registers, and points to a MMIO region of the 
>> main XIVE interrupt controller. 
>>
>> The PHB4 sources should be the same.
>>
>> For the XiveSource object (all interrupts) under sPAPRXive, the 
>> XIVEFabric is the main interrupt controller sPAPRXive.
>>
>> For the XiveSource object (IPIs) under PnvXive, the XIVEFabric is 
>> also the main interrupt controller PnvXive.
> 
> Hrm.  Apparently I'm missing something, I'm really not getting what
> you're trying to explain here.

I see that. Let's try again.

>>> that would then (after possible masking) forward on to the overall> xive fabric ? 
>>
>> yes. May be XIVEFabric is a confusing name. What about XIVEForwarder ? 
> 
> Maybe..?
> 
>>> That seems like a property of the source object, 
>>
>> The source object is generic. It's a bunch of PQ bits that can be 
>> controlled by MMIOs. Nothing more.
> 
> Hmm.  Isn't the source object also responsible for forwarding the
> interrupt to something up the chain (whatever that is)?

Yes but it can not forward directly. The XiveSource is generic and 
can only call a handler :

	xfc->notify(xsrc->xive, srcno + xsrc->offset);

The device model owner, the parent of the XiveSource object, would 
do the real forward. 

It's very similar to what we have today with XICS :

	- The sPAPR model has an ICSState  
	- The PnvPSI model has an ICSState 
	- The PnvPHB3 model has two ICSStates

and the 'xics' pointer in ICSState points to the 'interrupt unit' of 
the machine to do resends and to grab ICPs. So it used for routing 
essentially.

in Xive 

	- sPAPRXive model has a XiveSource
	- PnvXive model has a XiveSource
	- PnvPSI model has a XiveSource
	- PnvPHB4 model should have also.

and the 'xive' pointer in XiveSource points to the parent object,
which will handle the event notification forwarding or routing.
 
C.

>>> rather than a
>>> property of the fabric.  Indeed varying this by source object would
>>> require the objects have a different xive pointer, when I thought the
>>> idea was that the XiveFabric was global.
>>
>> When a notification is forwarded, the sources needs to call an 
>> interface which generally is implemented by the source owner,
> 
> I'm not quite sure what you mean by "source owner".

The parent object.
 
>> which is not necessarily the main IC. 
>>
>>>> 	static void pnv_psi_notify(XiveFabric *xf, uint32_t lisn)
>>>> 	{
>>>> 	    PnvPsi *psi = PNV_PSI(xf);
>>>> 	    uint64_t notif_port =
>>>> 	        psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
>>>> 	    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
>>>> 	    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
>>>> 	    uint32_t data = cpu_to_be32(lisn);
>>>> 	
>>>> 	    if (valid) {
>>>> 	        cpu_physical_memory_write(notify_addr, &data, sizeof(data));
>>>> 	    }
>>>> 	}
>>>>
>>>> The PnvXive model handles the load and forwards to the fabric again.  
>>>>
>>>> The IPIs under PowerNV do not need an extra hop so they reach the 
>>>> routing routine directly without the extra notify() hop. 
>>>>
>>>> However, PowerNV at the end should be using xive_fabric_route() 
>>>> but there are some differences on how the NVT registers are 
>>>> updated (HV vs. OS mode) and it's not handled yet so it uses a 
>>>> notify() handler. But is should disappear and call directly 
>>>> xive_fabric_route() in a near future.
>>>>
>>>>
>>>> May be, XiveFabricNotifier would be a better name for this feature ?
>>>> I am adding a few ops later which are more related to routing.
>>>>
>>>> Thanks,
>>>>
>>>> C.
>>>>
>>>>
>>>>>
>>>>>>  }
>>>>>>  
>>>>>>  /*
>>>>>> @@ -302,6 +325,17 @@ static void xive_source_reset(DeviceState *dev)
>>>>>>  static void xive_source_realize(DeviceState *dev, Error **errp)
>>>>>>  {
>>>>>>      XiveSource *xsrc = XIVE_SOURCE(dev);
>>>>>> +    Object *obj;
>>>>>> +    Error *local_err = NULL;
>>>>>> +
>>>>>> +    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
>>>>>> +    if (!obj) {
>>>>>> +        error_propagate(errp, local_err);
>>>>>> +        error_prepend(errp, "required link 'xive' not found: ");
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>> +    xsrc->xive = XIVE_FABRIC(obj);
>>>>>>  
>>>>>>      if (!xsrc->nr_irqs) {
>>>>>>          error_setg(errp, "Number of interrupt needs to be greater than 0");
>>>>>> @@ -376,6 +410,7 @@ static const TypeInfo xive_source_info = {
>>>>>>  static void xive_register_types(void)
>>>>>>  {
>>>>>>      type_register_static(&xive_source_info);
>>>>>> +    type_register_static(&xive_fabric_info);
>>>>>>  }
>>>>>>  
>>>>>>  type_init(xive_register_types)
>>>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>>>> index 0b76dd278d9b..4fcae2c763e6 100644
>>>>>> --- a/include/hw/ppc/xive.h
>>>>>> +++ b/include/hw/ppc/xive.h
>>>>>> @@ -12,6 +12,8 @@
>>>>>>  
>>>>>>  #include "hw/sysbus.h"
>>>>>>  
>>>>>> +typedef struct XiveFabric XiveFabric;
>>>>>> +
>>>>>>  /*
>>>>>>   * XIVE Interrupt Source
>>>>>>   */
>>>>>> @@ -46,6 +48,8 @@ typedef struct XiveSource {
>>>>>>      hwaddr       esb_base;
>>>>>>      uint32_t     esb_shift;
>>>>>>      MemoryRegion esb_mmio;
>>>>>> +
>>>>>> +    XiveFabric   *xive;
>>>>>>  } XiveSource;
>>>>>>  
>>>>>>  /*
>>>>>> @@ -143,4 +147,25 @@ static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
>>>>>>      xsrc->status[srcno] |= lsi ? XIVE_STATUS_LSI : 0;
>>>>>>  }
>>>>>>  
>>>>>> +/*
>>>>>> + * XIVE Fabric
>>>>>> + */
>>>>>> +
>>>>>> +typedef struct XiveFabric {
>>>>>> +    Object parent;
>>>>>> +} XiveFabric;
>>>>>> +
>>>>>> +#define TYPE_XIVE_FABRIC "xive-fabric"
>>>>>> +#define XIVE_FABRIC(obj)                                     \
>>>>>> +    OBJECT_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
>>>>>> +#define XIVE_FABRIC_CLASS(klass)                                     \
>>>>>> +    OBJECT_CLASS_CHECK(XiveFabricClass, (klass), TYPE_XIVE_FABRIC)
>>>>>> +#define XIVE_FABRIC_GET_CLASS(obj)                                   \
>>>>>> +    OBJECT_GET_CLASS(XiveFabricClass, (obj), TYPE_XIVE_FABRIC)
>>>>>> +
>>>>>> +typedef struct XiveFabricClass {
>>>>>> +    InterfaceClass parent;
>>>>>> +    void (*notify)(XiveFabric *xf, uint32_t lisn);
>>>>>> +} XiveFabricClass;
>>>>>> +
>>>>>>  #endif /* PPC_XIVE_H */
>>>>>
>>>>
>>>
>>
>
David Gibson April 27, 2018, 6:32 a.m. UTC | #7
On Thu, Apr 26, 2018 at 12:30:42PM +0200, Cédric Le Goater wrote:
> On 04/26/2018 05:54 AM, David Gibson wrote:
> > On Tue, Apr 24, 2018 at 11:33:11AM +0200, Cédric Le Goater wrote:
> >> On 04/24/2018 08:46 AM, David Gibson wrote:
> >>> On Mon, Apr 23, 2018 at 09:58:43AM +0200, Cédric Le Goater wrote:
> >>>> On 04/23/2018 08:46 AM, David Gibson wrote:
> >>>>> On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
> >>>>>> The XiveFabric offers a simple interface, between the XiveSourve
> >>>>>> object and the device model owning the interrupt sources, to forward
> >>>>>> an event notification to the XIVE interrupt controller of the machine
> >>>>>> and if the owner is the controller, to call directly the routing
> >>>>>> sub-engine.
> >>>>>>
> >>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>> ---
> >>>>>>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
> >>>>>>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
> >>>>>>  2 files changed, 61 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >>>>>> index 060976077dd7..b4c3d06c1219 100644
> >>>>>> --- a/hw/intc/xive.c
> >>>>>> +++ b/hw/intc/xive.c
> >>>>>> @@ -17,6 +17,21 @@
> >>>>>>  #include "hw/ppc/xive.h"
> >>>>>>  
> >>>>>>  /*
> >>>>>> + * XIVE Fabric
> >>>>>> + */
> >>>>>> +
> >>>>>> +static void xive_fabric_route(XiveFabric *xf, int lisn)
> >>>>>> +{
> >>>>>> +
> >>>>>> +}
> >>>>>> +
> >>>>>> +static const TypeInfo xive_fabric_info = {
> >>>>>> +    .name = TYPE_XIVE_FABRIC,
> >>>>>> +    .parent = TYPE_INTERFACE,
> >>>>>> +    .class_size = sizeof(XiveFabricClass),
> >>>>>> +};
> >>>>>> +
> >>>>>> +/*
> >>>>>>   * XIVE Interrupt Source
> >>>>>>   */
> >>>>>>  
> >>>>>> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
> >>>>>>  
> >>>>>>  /*
> >>>>>>   * Forward the source event notification to the associated XiveFabric,
> >>>>>> - * the device owning the sources.
> >>>>>> + * the device owning the sources, or perform the routing if the device
> >>>>>> + * is the interrupt controller.
> >>>>>>   */
> >>>>>>  static void xive_source_notify(XiveSource *xsrc, int srcno)
> >>>>>>  {
> >>>>>>  
> >>>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
> >>>>>> +
> >>>>>> +    if (xfc->notify) {
> >>>>>> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
> >>>>>> +    } else {
> >>>>>> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
> >>>>>> +    }
> >>>>>
> >>>>> Why 2 cases?  Can't the XiveFabric object just make its notify equal
> >>>>> to xive_fabric_route if that's what it wants?
> >>>> Under sPAPR, all the sources, IPIs and virtual device interrupts, 
> >>>> generate events which are directly routed by xive_fabric_route(). 
> >>>> There is no need of an extra hop. Indeed. 
> >>>
> >>> Ok.
> >>>
> >>>> Under PowerNV, some sources forward the notification to the routing 
> >>>> engine using a specific MMIO load on a notify address which is stored 
> >>>> in one of the controller registers. So we need a hop to reach the 
> >>>> device model, owning the sources, and do that load :
> >>>
> >>> Hm.  So you're saying that in pnv some sources send their notification
> >>> to some other unit, 
> >>
> >> Not to any unit/device, to the device owning the sources.
> >>
> >> For the XiveSource object under PSI, the XIVEFabric interface is the 
> >> PSI device object it self, which knows how to forward the notification 
> >> on the XIVE Power "bus". To be more precise, the PSI HB device has 
> >> 14 interrupt sources, which notifications are forwarded using a MMIO 
> >> load to some address. The load address is configured (by skiboot) in 
> >> one of the PSI device registers, and points to a MMIO region of the 
> >> main XIVE interrupt controller. 
> >>
> >> The PHB4 sources should be the same.
> >>
> >> For the XiveSource object (all interrupts) under sPAPRXive, the 
> >> XIVEFabric is the main interrupt controller sPAPRXive.
> >>
> >> For the XiveSource object (IPIs) under PnvXive, the XIVEFabric is 
> >> also the main interrupt controller PnvXive.
> > 
> > Hrm.  Apparently I'm missing something, I'm really not getting what
> > you're trying to explain here.
> 
> I see that. Let's try again.
> 
> >>> that would then (after possible masking) forward on to the overall> xive fabric ? 
> >>
> >> yes. May be XIVEFabric is a confusing name. What about XIVEForwarder ? 
> > 
> > Maybe..?
> > 
> >>> That seems like a property of the source object, 
> >>
> >> The source object is generic. It's a bunch of PQ bits that can be 
> >> controlled by MMIOs. Nothing more.
> > 
> > Hmm.  Isn't the source object also responsible for forwarding the
> > interrupt to something up the chain (whatever that is)?
> 
> Yes but it can not forward directly. The XiveSource is generic and 
> can only call a handler :
> 
> 	xfc->notify(xsrc->xive, srcno + xsrc->offset);

But.. your patch doesn't do that always, it's conditional which I
still don't understand.

> The device model owner, the parent of the XiveSource object, would 
> do the real forward.

Why?  I mean the XiveSource basically represents the xive irq related
logic of the PHB or whatever, why would it not represent *all* of
that, rather than just the ESB bits, meaning the owner has to have
some more xive logic for the forwarding.

Note that I don't think the fact that some sources notify via mmio and
some are internal really matters.  It's not like we're modelling the
power bus down to the wire-transaction level.

> It's very similar to what we have today with XICS :
> 
> 	- The sPAPR model has an ICSState  
> 	- The PnvPSI model has an ICSState 
> 	- The PnvPHB3 model has two ICSStates
> 
> and the 'xics' pointer in ICSState points to the 'interrupt unit' of 
> the machine to do resends and to grab ICPs. So it used for routing 
> essentially.

Hmm.  I think you and I are looking at XICSFabric kind of
differently.  As I see it, it's not really an active component at
all.  Rather it's basically a global "map" of the xics components so
that they can find each other.

> in Xive 
> 
> 	- sPAPRXive model has a XiveSource
> 	- PnvXive model has a XiveSource
> 	- PnvPSI model has a XiveSource
> 	- PnvPHB4 model should have also.
> 
> and the 'xive' pointer in XiveSource points to the parent object,

Uh.. yeah.. the xics pointer in ICS units doesn't point to the parent
object, except maybe by accident.  It's absolutely intended to be
global, and so points to the machine.

> which will handle the event notification forwarding or routing.

Ok, how about this for a partial model.  We have:

XiveSource objects:
	* Owns an ESB table
	* Knows the mapping of its local irq offsets to global irq
	  numbers
	* Provides the mmio interface for ESB manipulation
	* When neccessary, notifies a new interrupt to a XiveRouter

XiveRouter objects:
	* Responsible for a fixed range of global irq numbers
	* Owns an IVT (but what that means can vary, see below)
	* When notified of an irq, routes it to the appropriate EQ
	  (haven't thought about this part yet)
	* Abstract class - needs subclasses to define how to get IVEs

XiveFabric interface:
	* Lets XIVE components locate each other
	* get_router() method: maps a global irq number to XiveRouter
	  object
	* Always global (implemented on the machine)

On pseries we have:

	? XiveSource objects.  We probably only need one, but 1 for
	LSI and one for MSI might be convenient.  More wouldn't break
	the model

	1 sPAPRXiveRouter.  This is a subclass of XiveRouter that
	holds the IVT internall (and migrates it).

	the XiveFabric implementation always returns the single global
	router for get_router()

On powernv we have:
	N XiveSource objects.  Some in PHBs, some extra ones on each
	chip

	(#chips) PowerXiveRouter objects.  This subclass of XiveRouter
	stores the register giving the IVT base address and migrates
	that, but the IVT contents are in RAM

	the XiveFabric get_router() implementation returns the right
	chip's router based on the irq number

Obviously the router->EQ sides still needs a bunch of thought.
Cédric Le Goater May 2, 2018, 3:28 p.m. UTC | #8
On 04/27/2018 08:32 AM, David Gibson wrote:
> On Thu, Apr 26, 2018 at 12:30:42PM +0200, Cédric Le Goater wrote:
>> On 04/26/2018 05:54 AM, David Gibson wrote:
>>> On Tue, Apr 24, 2018 at 11:33:11AM +0200, Cédric Le Goater wrote:
>>>> On 04/24/2018 08:46 AM, David Gibson wrote:
>>>>> On Mon, Apr 23, 2018 at 09:58:43AM +0200, Cédric Le Goater wrote:
>>>>>> On 04/23/2018 08:46 AM, David Gibson wrote:
>>>>>>> On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
>>>>>>>> The XiveFabric offers a simple interface, between the XiveSourve
>>>>>>>> object and the device model owning the interrupt sources, to forward
>>>>>>>> an event notification to the XIVE interrupt controller of the machine
>>>>>>>> and if the owner is the controller, to call directly the routing
>>>>>>>> sub-engine.
>>>>>>>>
>>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>>>> ---
>>>>>>>>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
>>>>>>>>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
>>>>>>>>  2 files changed, 61 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>>>>>> index 060976077dd7..b4c3d06c1219 100644
>>>>>>>> --- a/hw/intc/xive.c
>>>>>>>> +++ b/hw/intc/xive.c
>>>>>>>> @@ -17,6 +17,21 @@
>>>>>>>>  #include "hw/ppc/xive.h"
>>>>>>>>  
>>>>>>>>  /*
>>>>>>>> + * XIVE Fabric
>>>>>>>> + */
>>>>>>>> +
>>>>>>>> +static void xive_fabric_route(XiveFabric *xf, int lisn)
>>>>>>>> +{
>>>>>>>> +
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static const TypeInfo xive_fabric_info = {
>>>>>>>> +    .name = TYPE_XIVE_FABRIC,
>>>>>>>> +    .parent = TYPE_INTERFACE,
>>>>>>>> +    .class_size = sizeof(XiveFabricClass),
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/*
>>>>>>>>   * XIVE Interrupt Source
>>>>>>>>   */
>>>>>>>>  
>>>>>>>> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
>>>>>>>>  
>>>>>>>>  /*
>>>>>>>>   * Forward the source event notification to the associated XiveFabric,
>>>>>>>> - * the device owning the sources.
>>>>>>>> + * the device owning the sources, or perform the routing if the device
>>>>>>>> + * is the interrupt controller.
>>>>>>>>   */
>>>>>>>>  static void xive_source_notify(XiveSource *xsrc, int srcno)
>>>>>>>>  {
>>>>>>>>  
>>>>>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
>>>>>>>> +
>>>>>>>> +    if (xfc->notify) {
>>>>>>>> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
>>>>>>>> +    } else {
>>>>>>>> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
>>>>>>>> +    }
>>>>>>>
>>>>>>> Why 2 cases?  Can't the XiveFabric object just make its notify equal
>>>>>>> to xive_fabric_route if that's what it wants?
>>>>>> Under sPAPR, all the sources, IPIs and virtual device interrupts, 
>>>>>> generate events which are directly routed by xive_fabric_route(). 
>>>>>> There is no need of an extra hop. Indeed. 
>>>>>
>>>>> Ok.
>>>>>
>>>>>> Under PowerNV, some sources forward the notification to the routing 
>>>>>> engine using a specific MMIO load on a notify address which is stored 
>>>>>> in one of the controller registers. So we need a hop to reach the 
>>>>>> device model, owning the sources, and do that load :
>>>>>
>>>>> Hm.  So you're saying that in pnv some sources send their notification
>>>>> to some other unit, 
>>>>
>>>> Not to any unit/device, to the device owning the sources.
>>>>
>>>> For the XiveSource object under PSI, the XIVEFabric interface is the 
>>>> PSI device object it self, which knows how to forward the notification 
>>>> on the XIVE Power "bus". To be more precise, the PSI HB device has 
>>>> 14 interrupt sources, which notifications are forwarded using a MMIO 
>>>> load to some address. The load address is configured (by skiboot) in 
>>>> one of the PSI device registers, and points to a MMIO region of the 
>>>> main XIVE interrupt controller. 
>>>>
>>>> The PHB4 sources should be the same.
>>>>
>>>> For the XiveSource object (all interrupts) under sPAPRXive, the 
>>>> XIVEFabric is the main interrupt controller sPAPRXive.
>>>>
>>>> For the XiveSource object (IPIs) under PnvXive, the XIVEFabric is 
>>>> also the main interrupt controller PnvXive.
>>>
>>> Hrm.  Apparently I'm missing something, I'm really not getting what
>>> you're trying to explain here.
>>
>> I see that. Let's try again.
>>
>>>>> that would then (after possible masking) forward on to the overall> xive fabric ? 
>>>>
>>>> yes. May be XIVEFabric is a confusing name. What about XIVEForwarder ? 
>>>
>>> Maybe..?
>>>
>>>>> That seems like a property of the source object, 
>>>>
>>>> The source object is generic. It's a bunch of PQ bits that can be 
>>>> controlled by MMIOs. Nothing more.
>>>
>>> Hmm.  Isn't the source object also responsible for forwarding the
>>> interrupt to something up the chain (whatever that is)?
>>
>> Yes but it can not forward directly. The XiveSource is generic and 
>> can only call a handler :
>>
>> 	xfc->notify(xsrc->xive, srcno + xsrc->offset);
> 
> But.. your patch doesn't do that always, it's conditional which I
> still don't understand.

Because at the end of the notify/forward chain, you route.

>> The device model owner, the parent of the XiveSource object, would 
>> do the real forward.
> 
> Why?  

because, in my idea of the XiveSource concept, it does not have 
the logic to do so: the register for the MMIO address to use, 
another one for the IVT offset, etc

> I mean the XiveSource basically represents the xive irq related
> logic of the PHB or whatever, why would it not represent *all* of
> that, rather than just the ESB bits, meaning the owner has to have
> some more xive logic for the forwarding.

ok. This is where we diverge in the concept. 

The PQ bits, the ESB MMIO region handlers can be easily shared 
between the different device models. They are the common part
of devices with XIVE interrupt sources.  

> Note that I don't think the fact that some sources notify via mmio and
> some are internal really matters.  It's not like we're modelling the
> power bus down to the wire-transaction level.

yes but the configuration of the devices are different. pnv devices 
will have registers accessible through MMIO or XSCOM and configured
by the firmware. spapr is all set up by QEMU.

>> It's very similar to what we have today with XICS :
>>
>> 	- The sPAPR model has an ICSState  
>> 	- The PnvPSI model has an ICSState 
>> 	- The PnvPHB3 model has two ICSStates
>>
>> and the 'xics' pointer in ICSState points to the 'interrupt unit' of 
>> the machine to do resends and to grab ICPs. So it used for routing 
>> essentially.
> 
> Hmm.  I think you and I are looking at XICSFabric kind of
> differently.  As I see it, it's not really an active component at
> all.  Rather it's basically a global "map" of the xics components so
> that they can find each other.

ok. I am not that far either. 
 
>> in Xive 
>>
>> 	- sPAPRXive model has a XiveSource
>> 	- PnvXive model has a XiveSource
>> 	- PnvPSI model has a XiveSource
>> 	- PnvPHB4 model should have also.
>>
>> and the 'xive' pointer in XiveSource points to the parent object,
> 
> Uh.. yeah.. the xics pointer in ICS units doesn't point to the parent
> object, except maybe by accident.  It's absolutely intended to be
> global, and so points to the machine.

yes. I agree. 

XIVE has more layers and visible components due to the internal 
tables used for routing.

>> which will handle the event notification forwarding or routing.
> 
> Ok, how about this for a partial model.  We have:
> 
> XiveSource objects:
> 	* Owns an ESB table
> 	* Knows the mapping of its local irq offsets to global irq
> 	  numbers

That is the 'offset' attribute I suppose. This is set at runtime 
for the powernv devices. For pseries, we should need it for 
passthrough. I think. I haven't looked at that part yet.

> 	* Provides the mmio interface for ESB manipulation
> 	* When neccessary, notifies a new interrupt to a XiveRouter

ok. I think that what we have today fits the idea then. 

> XiveRouter objects:
> 	* Responsible for a fixed range of global irq numbers
> 	* Owns an IVT (but what that means can vary, see below)

the size of the IVT table is determined at runtime for powernv.

> 	* When notified of an irq, routes it to the appropriate EQ
> 	  (haven't thought about this part yet)

we will need a class handler to get EQs

> 	* Abstract class - needs subclasses to define how to get IVEs

OK. We will need a few other ops. The router needs to :

	- get IVEs
	- get EQ descriptors
	- update OS EQs (write event data in OS RAM)
	- update EQ descriptors (to set EQ descriptor index & toggle)
	- get an NVT/VP (to notify CPUs)

For powernv, we should also consider updating the NVT/VP. It can be 
done later. 

> XiveFabric interface:
> 	* Lets XIVE components locate each other

hmm, it should be a chain : 

	source -> router -> presenter -> cpu

So the components should not have to locate each other. The presenter
does not know about the source for instance. Only the sources need
to forward events to the main controller logic doing the routing.

> 	* get_router() method: maps a global irq number to XiveRouter
> 	  object

Ah. you are thinking about the multichip case under powernv. I need
to look at that more closely. But XIVE has a concept of block which 
is used by skiboot to map a chip to a block and the XIVE tables have 
a block field.

> 	* Always global (implemented on the machine)

OK. this is more or less the object modeling the main interrupt 
controller ? What I called sPAPRXive in the current patchset.

 
> On pseries we have:
> 
> 	? XiveSource objects.  We probably only need one, but 1 for
> 	LSI and one for MSI might be convenient.  

It's not too ugly for the moment. If we create a source object 
for LSIs only that might be more complex for passthrough devices 
and their associated ESB MMIO region.

side note :

LSIs work under TCG, and used to work under KVM until we removed
the StoreEOI support. Since the EOI is now different, we need 
to find a way to handle EOI for guest virtual LSIs which are 
not LSIs for the host. We can still change the Linux spapr 
backend or have QEMU handle the EOI for virtual LSIs. This is 
on my TODO list.

>       More wouldn't break the model

It should not. Initially the pachset had two source objects : 
one for IPIs and one for the virtual devices interrupts. 

> 	1 sPAPRXiveRouter.  This is a subclass of XiveRouter that
> 	holds the IVT internall (and migrates it).

OK.

> 	the XiveFabric implementation always returns the single global
> 	router for get_router()

we can do that. 

Do we gather all XIVE components objects under an object 'sPAPRXive' 
modeling that way the main interrupt controller of the machine ? 
It should not have any state but it will hold the TIMA region 
most certainly, and the addresses where to map the ESB and 
TIMA regions.

KVM support will bring extra needs.
 
> On powernv we have:
> 	N XiveSource objects.  Some in PHBs, some extra ones on each
> 	chip

yes and PSI to start with.  

> 	(#chips) PowerXiveRouter objects.  This subclass of XiveRouter
> 	stores the register giving the IVT base address and migrates
> 	that, but the IVT contents are in RAM
> 
> 	the XiveFabric get_router() implementation returns the right
> 	chip's router based on the irq number
> 
> Obviously the router->EQ sides still needs a bunch of thought.

These are all routing tables : 

	- IVT
	- EQDT
	- VPDT  

Thanks,

C.
David Gibson May 3, 2018, 5:13 a.m. UTC | #9
On Wed, May 02, 2018 at 05:28:23PM +0200, Cédric Le Goater wrote:
> On 04/27/2018 08:32 AM, David Gibson wrote:
> > On Thu, Apr 26, 2018 at 12:30:42PM +0200, Cédric Le Goater wrote:
> >> On 04/26/2018 05:54 AM, David Gibson wrote:
> >>> On Tue, Apr 24, 2018 at 11:33:11AM +0200, Cédric Le Goater wrote:
> >>>> On 04/24/2018 08:46 AM, David Gibson wrote:
> >>>>> On Mon, Apr 23, 2018 at 09:58:43AM +0200, Cédric Le Goater wrote:
> >>>>>> On 04/23/2018 08:46 AM, David Gibson wrote:
> >>>>>>> On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
> >>>>>>>> The XiveFabric offers a simple interface, between the XiveSourve
> >>>>>>>> object and the device model owning the interrupt sources, to forward
> >>>>>>>> an event notification to the XIVE interrupt controller of the machine
> >>>>>>>> and if the owner is the controller, to call directly the routing
> >>>>>>>> sub-engine.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>>>>>> ---
> >>>>>>>>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
> >>>>>>>>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
> >>>>>>>>  2 files changed, 61 insertions(+), 1 deletion(-)
> >>>>>>>>
> >>>>>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >>>>>>>> index 060976077dd7..b4c3d06c1219 100644
> >>>>>>>> --- a/hw/intc/xive.c
> >>>>>>>> +++ b/hw/intc/xive.c
> >>>>>>>> @@ -17,6 +17,21 @@
> >>>>>>>>  #include "hw/ppc/xive.h"
> >>>>>>>>  
> >>>>>>>>  /*
> >>>>>>>> + * XIVE Fabric
> >>>>>>>> + */
> >>>>>>>> +
> >>>>>>>> +static void xive_fabric_route(XiveFabric *xf, int lisn)
> >>>>>>>> +{
> >>>>>>>> +
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static const TypeInfo xive_fabric_info = {
> >>>>>>>> +    .name = TYPE_XIVE_FABRIC,
> >>>>>>>> +    .parent = TYPE_INTERFACE,
> >>>>>>>> +    .class_size = sizeof(XiveFabricClass),
> >>>>>>>> +};
> >>>>>>>> +
> >>>>>>>> +/*
> >>>>>>>>   * XIVE Interrupt Source
> >>>>>>>>   */
> >>>>>>>>  
> >>>>>>>> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
> >>>>>>>>  
> >>>>>>>>  /*
> >>>>>>>>   * Forward the source event notification to the associated XiveFabric,
> >>>>>>>> - * the device owning the sources.
> >>>>>>>> + * the device owning the sources, or perform the routing if the device
> >>>>>>>> + * is the interrupt controller.
> >>>>>>>>   */
> >>>>>>>>  static void xive_source_notify(XiveSource *xsrc, int srcno)
> >>>>>>>>  {
> >>>>>>>>  
> >>>>>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
> >>>>>>>> +
> >>>>>>>> +    if (xfc->notify) {
> >>>>>>>> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
> >>>>>>>> +    } else {
> >>>>>>>> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
> >>>>>>>> +    }
> >>>>>>>
> >>>>>>> Why 2 cases?  Can't the XiveFabric object just make its notify equal
> >>>>>>> to xive_fabric_route if that's what it wants?
> >>>>>> Under sPAPR, all the sources, IPIs and virtual device interrupts, 
> >>>>>> generate events which are directly routed by xive_fabric_route(). 
> >>>>>> There is no need of an extra hop. Indeed. 
> >>>>>
> >>>>> Ok.
> >>>>>
> >>>>>> Under PowerNV, some sources forward the notification to the routing 
> >>>>>> engine using a specific MMIO load on a notify address which is stored 
> >>>>>> in one of the controller registers. So we need a hop to reach the 
> >>>>>> device model, owning the sources, and do that load :
> >>>>>
> >>>>> Hm.  So you're saying that in pnv some sources send their notification
> >>>>> to some other unit, 
> >>>>
> >>>> Not to any unit/device, to the device owning the sources.
> >>>>
> >>>> For the XiveSource object under PSI, the XIVEFabric interface is the 
> >>>> PSI device object it self, which knows how to forward the notification 
> >>>> on the XIVE Power "bus". To be more precise, the PSI HB device has 
> >>>> 14 interrupt sources, which notifications are forwarded using a MMIO 
> >>>> load to some address. The load address is configured (by skiboot) in 
> >>>> one of the PSI device registers, and points to a MMIO region of the 
> >>>> main XIVE interrupt controller. 
> >>>>
> >>>> The PHB4 sources should be the same.
> >>>>
> >>>> For the XiveSource object (all interrupts) under sPAPRXive, the 
> >>>> XIVEFabric is the main interrupt controller sPAPRXive.
> >>>>
> >>>> For the XiveSource object (IPIs) under PnvXive, the XIVEFabric is 
> >>>> also the main interrupt controller PnvXive.
> >>>
> >>> Hrm.  Apparently I'm missing something, I'm really not getting what
> >>> you're trying to explain here.
> >>
> >> I see that. Let's try again.
> >>
> >>>>> that would then (after possible masking) forward on to the overall> xive fabric ? 
> >>>>
> >>>> yes. May be XIVEFabric is a confusing name. What about XIVEForwarder ? 
> >>>
> >>> Maybe..?
> >>>
> >>>>> That seems like a property of the source object, 
> >>>>
> >>>> The source object is generic. It's a bunch of PQ bits that can be 
> >>>> controlled by MMIOs. Nothing more.
> >>>
> >>> Hmm.  Isn't the source object also responsible for forwarding the
> >>> interrupt to something up the chain (whatever that is)?
> >>
> >> Yes but it can not forward directly. The XiveSource is generic and 
> >> can only call a handler :
> >>
> >> 	xfc->notify(xsrc->xive, srcno + xsrc->offset);
> > 
> > But.. your patch doesn't do that always, it's conditional which I
> > still don't understand.
> 
> Because at the end of the notify/forward chain, you route.

Hrm.  I'm really not understanding this notify/forward thing as
distinct from routing.  I mean, from the description here it sounds
kind of like cascaded interrupt controllers, which qemu already has
mechanisms to handle, but I didn't think (most) POWER9 devices worked
like that.

> >> The device model owner, the parent of the XiveSource object, would 
> >> do the real forward.
> > 
> > Why?  
> 
> because, in my idea of the XiveSource concept, it does not have 
> the logic to do so: the register for the MMIO address to use, 
> another one for the IVT offset, etc
> 
> > I mean the XiveSource basically represents the xive irq related
> > logic of the PHB or whatever, why would it not represent *all* of
> > that, rather than just the ESB bits, meaning the owner has to have
> > some more xive logic for the forwarding.
> 
> ok. This is where we diverge in the concept. 
> 
> The PQ bits, the ESB MMIO region handlers can be easily shared 
> between the different device models. They are the common part
> of devices with XIVE interrupt sources.

Sure, but QOM subclasses are a thing, which sounds like a better way
of doing this than having to have extra XIVE logic in all the parent
devices.

> > Note that I don't think the fact that some sources notify via mmio and
> > some are internal really matters.  It's not like we're modelling the
> > power bus down to the wire-transaction level.
> 
> yes but the configuration of the devices are different. pnv devices 
> will have registers accessible through MMIO or XSCOM and configured
> by the firmware. spapr is all set up by QEMU.

Ah.. ok.  Actually modelling the mmio forwards probably makes sense
then.  I still think it makes more sense in a pnv XiveSource subclass
rather than putting it in the containing device.

> >> It's very similar to what we have today with XICS :
> >>
> >> 	- The sPAPR model has an ICSState  
> >> 	- The PnvPSI model has an ICSState 
> >> 	- The PnvPHB3 model has two ICSStates
> >>
> >> and the 'xics' pointer in ICSState points to the 'interrupt unit' of 
> >> the machine to do resends and to grab ICPs. So it used for routing 
> >> essentially.
> > 
> > Hmm.  I think you and I are looking at XICSFabric kind of
> > differently.  As I see it, it's not really an active component at
> > all.  Rather it's basically a global "map" of the xics components so
> > that they can find each other.
> 
> ok. I am not that far either. 
>  
> >> in Xive 
> >>
> >> 	- sPAPRXive model has a XiveSource
> >> 	- PnvXive model has a XiveSource
> >> 	- PnvPSI model has a XiveSource
> >> 	- PnvPHB4 model should have also.
> >>
> >> and the 'xive' pointer in XiveSource points to the parent object,
> > 
> > Uh.. yeah.. the xics pointer in ICS units doesn't point to the parent
> > object, except maybe by accident.  It's absolutely intended to be
> > global, and so points to the machine.
> 
> yes. I agree. 
> 
> XIVE has more layers and visible components due to the internal 
> tables used for routing.

Right.

> >> which will handle the event notification forwarding or routing.
> > 
> > Ok, how about this for a partial model.  We have:
> > 
> > XiveSource objects:
> > 	* Owns an ESB table
> > 	* Knows the mapping of its local irq offsets to global irq
> > 	  numbers
> 
> That is the 'offset' attribute I suppose. This is set at runtime 
> for the powernv devices.

Ok, so that offset is effectively a register (which will have to be
migrated), rather than a device property.

> For pseries, we should need it for 
> passthrough. I think. I haven't looked at that part yet.

Uh.. I really hope not.  AIUI the offsets are decided by the platform
rather than the guest in this case, yes?  In which case if we can't
count on a fixed offset even in passthrough mode, then migration is
basically impossible.

> > 	* Provides the mmio interface for ESB manipulation
> > 	* When neccessary, notifies a new interrupt to a XiveRouter
> 
> ok. I think that what we have today fits the idea then. 

Yes, I think so to - just clarifying in the context of the rest of
this proposal.

> > XiveRouter objects:
> > 	* Responsible for a fixed range of global irq numbers
> > 	* Owns an IVT (but what that means can vary, see below)
> 
> the size of the IVT table is determined at runtime for powernv.

That's fine - both the size and base of the IVT will be registers of
the powernv variant of the device.

> > 	* When notified of an irq, routes it to the appropriate EQ
> > 	  (haven't thought about this part yet)
> 
> we will need a class handler to get EQs

Sure.

> > 	* Abstract class - needs subclasses to define how to get IVEs
> 
> OK. We will need a few other ops. The router needs to :
> 
> 	- get IVEs
> 	- get EQ descriptors
> 	- update OS EQs (write event data in OS RAM)

IIUC the actual EQ (as opposed to its descriptor) will be in guest RAM
for both powernv and spapr, so this can be common (utiliizing a
subclass hook to locate the EQ base address).

> 	- update EQ descriptors (to set EQ descriptor index & toggle)
> 	- get an NVT/VP (to notify CPUs)

Getting NVT/VP sounds more like it would belong in the XiveFabric than
the router, but I haven't looked at this in detail yet.

> For powernv, we should also consider updating the NVT/VP. It can be 
> done later. 
> 
> > XiveFabric interface:
> > 	* Lets XIVE components locate each other
> 
> hmm, it should be a chain : 
> 
> 	source -> router -> presenter -> cpu
> 
> So the components should not have to locate each other. The presenter
> does not know about the source for instance. Only the sources need
> to forward events to the main controller logic doing the routing.

source -> router

From the above sounds like we can maybe make this just a router
property in the source, rather than a lookup through a fabric.

router -> presenter

Here we might still need a "fabric".  By defintion the router can
direct to a bunch of different presenters, so it needs some kind of
map to find them, no?

presenter -> cpu

This one's trivial; just the cpu->intc pointer or similar.

> > 	* get_router() method: maps a global irq number to XiveRouter
> > 	  object
> 
> Ah. you are thinking about the multichip case under powernv. I need
> to look at that more closely. But XIVE has a concept of block which 
> is used by skiboot to map a chip to a block and the XIVE tables have 
> a block field.

Hmm.. I think we need to understand this to make a real model.  I'd
thought the block number would just be the chip number somewhere in
the high bits of the global irq, but sounds like there might be yet
another indirection here.

> > 	* Always global (implemented on the machine)
> 
> OK. this is more or less the object modeling the main interrupt 
> controller ? What I called sPAPRXive in the current patchset.

No.  This is *always* global - assuming we need it at all - even on
multichip powernv.

> > On pseries we have:
> > 
> > 	? XiveSource objects.  We probably only need one, but 1 for
> > 	LSI and one for MSI might be convenient.  
> 
> It's not too ugly for the moment. If we create a source object 
> for LSIs only that might be more complex for passthrough devices 
> and their associated ESB MMIO region.
> 
> side note :
> 
> LSIs work under TCG, and used to work under KVM until we removed
> the StoreEOI support. Since the EOI is now different, we need 
> to find a way to handle EOI for guest virtual LSIs which are 
> not LSIs for the host.

I can't quite picture that case - can you give a concrete example?

> We can still change the Linux spapr 
> backend or have QEMU handle the EOI for virtual LSIs. This is 
> on my TODO list.
> 
> >       More wouldn't break the model
> 
> It should not. Initially the pachset had two source objects : 
> one for IPIs and one for the virtual devices interrupts. 
> 
> > 	1 sPAPRXiveRouter.  This is a subclass of XiveRouter that
> > 	holds the IVT internall (and migrates it).
> 
> OK.
> 
> > 	the XiveFabric implementation always returns the single global
> > 	router for get_router()
> 
> we can do that. 

Again, if it's true that each source object always forwards to just
one router, then we don't need a get_router(), we can just use a link
property.

> Do we gather all XIVE components objects under an object 'sPAPRXive' 
> modeling that way the main interrupt controller of the machine ? 

No, I don't think so.

> It should not have any state but it will hold the TIMA region 
> most certainly, and the addresses where to map the ESB and 
> TIMA regions.

Uh.. I'd expect the TIMAs to be held by the NVT objects, which we
haven't covered yet.

> KVM support will bring extra needs.
>  
> > On powernv we have:
> > 	N XiveSource objects.  Some in PHBs, some extra ones on each
> > 	chip
> 
> yes and PSI to start with.  
> 
> > 	(#chips) PowerXiveRouter objects.  This subclass of XiveRouter
> > 	stores the register giving the IVT base address and migrates
> > 	that, but the IVT contents are in RAM
> > 
> > 	the XiveFabric get_router() implementation returns the right
> > 	chip's router based on the irq number
> > 
> > Obviously the router->EQ sides still needs a bunch of thought.
> 
> These are all routing tables : 
> 
> 	- IVT
> 	- EQDT
> 	- VPDT  

Thinking about what you've said, I'm thinking maybe what we need on
the source side is two versions which don't exactly correspond to
spapr vs powernv:

InBandXiveSource: this one's notify behaviour is to issue an mmio read
to a configured address.  It's expected that mmio address belongs to a
XiveRouter, but the source itself doesn't know anything routers.

OutOfBandXiveSource: this one's notify behaviour is to explicitly poke
a XiveRouter.  A link to the router and offset (which range of router
irqs are used by this source) would be object properties.

powernv would use the first for "external" irq sources and the second
for internal ones.  spapr would use the second for everything.
Cédric Le Goater May 23, 2018, 10:12 a.m. UTC | #10
On 05/03/2018 07:13 AM, David Gibson wrote:
> On Wed, May 02, 2018 at 05:28:23PM +0200, Cédric Le Goater wrote:
>> On 04/27/2018 08:32 AM, David Gibson wrote:
>>> On Thu, Apr 26, 2018 at 12:30:42PM +0200, Cédric Le Goater wrote:
>>>> On 04/26/2018 05:54 AM, David Gibson wrote:
>>>>> On Tue, Apr 24, 2018 at 11:33:11AM +0200, Cédric Le Goater wrote:
>>>>>> On 04/24/2018 08:46 AM, David Gibson wrote:
>>>>>>> On Mon, Apr 23, 2018 at 09:58:43AM +0200, Cédric Le Goater wrote:
>>>>>>>> On 04/23/2018 08:46 AM, David Gibson wrote:
>>>>>>>>> On Thu, Apr 19, 2018 at 02:42:59PM +0200, Cédric Le Goater wrote:
>>>>>>>>>> The XiveFabric offers a simple interface, between the XiveSourve
>>>>>>>>>> object and the device model owning the interrupt sources, to forward
>>>>>>>>>> an event notification to the XIVE interrupt controller of the machine
>>>>>>>>>> and if the owner is the controller, to call directly the routing
>>>>>>>>>> sub-engine.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>>>>>>>> ---
>>>>>>>>>>  hw/intc/xive.c        | 37 ++++++++++++++++++++++++++++++++++++-
>>>>>>>>>>  include/hw/ppc/xive.h | 25 +++++++++++++++++++++++++
>>>>>>>>>>  2 files changed, 61 insertions(+), 1 deletion(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>>>>>>>> index 060976077dd7..b4c3d06c1219 100644
>>>>>>>>>> --- a/hw/intc/xive.c
>>>>>>>>>> +++ b/hw/intc/xive.c
>>>>>>>>>> @@ -17,6 +17,21 @@
>>>>>>>>>>  #include "hw/ppc/xive.h"
>>>>>>>>>>  
>>>>>>>>>>  /*
>>>>>>>>>> + * XIVE Fabric
>>>>>>>>>> + */
>>>>>>>>>> +
>>>>>>>>>> +static void xive_fabric_route(XiveFabric *xf, int lisn)
>>>>>>>>>> +{
>>>>>>>>>> +
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static const TypeInfo xive_fabric_info = {
>>>>>>>>>> +    .name = TYPE_XIVE_FABRIC,
>>>>>>>>>> +    .parent = TYPE_INTERFACE,
>>>>>>>>>> +    .class_size = sizeof(XiveFabricClass),
>>>>>>>>>> +};
>>>>>>>>>> +
>>>>>>>>>> +/*
>>>>>>>>>>   * XIVE Interrupt Source
>>>>>>>>>>   */
>>>>>>>>>>  
>>>>>>>>>> @@ -97,11 +112,19 @@ static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
>>>>>>>>>>  
>>>>>>>>>>  /*
>>>>>>>>>>   * Forward the source event notification to the associated XiveFabric,
>>>>>>>>>> - * the device owning the sources.
>>>>>>>>>> + * the device owning the sources, or perform the routing if the device
>>>>>>>>>> + * is the interrupt controller.
>>>>>>>>>>   */
>>>>>>>>>>  static void xive_source_notify(XiveSource *xsrc, int srcno)
>>>>>>>>>>  {
>>>>>>>>>>  
>>>>>>>>>> +    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
>>>>>>>>>> +
>>>>>>>>>> +    if (xfc->notify) {
>>>>>>>>>> +        xfc->notify(xsrc->xive, srcno + xsrc->offset);
>>>>>>>>>> +    } else {
>>>>>>>>>> +        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
>>>>>>>>>> +    }
>>>>>>>>>
>>>>>>>>> Why 2 cases?  Can't the XiveFabric object just make its notify equal
>>>>>>>>> to xive_fabric_route if that's what it wants?
>>>>>>>> Under sPAPR, all the sources, IPIs and virtual device interrupts, 
>>>>>>>> generate events which are directly routed by xive_fabric_route(). 
>>>>>>>> There is no need of an extra hop. Indeed. 
>>>>>>>
>>>>>>> Ok.
>>>>>>>
>>>>>>>> Under PowerNV, some sources forward the notification to the routing 
>>>>>>>> engine using a specific MMIO load on a notify address which is stored 
>>>>>>>> in one of the controller registers. So we need a hop to reach the 
>>>>>>>> device model, owning the sources, and do that load :
>>>>>>>
>>>>>>> Hm.  So you're saying that in pnv some sources send their notification
>>>>>>> to some other unit, 
>>>>>>
>>>>>> Not to any unit/device, to the device owning the sources.
>>>>>>
>>>>>> For the XiveSource object under PSI, the XIVEFabric interface is the 
>>>>>> PSI device object it self, which knows how to forward the notification 
>>>>>> on the XIVE Power "bus". To be more precise, the PSI HB device has 
>>>>>> 14 interrupt sources, which notifications are forwarded using a MMIO 
>>>>>> load to some address. The load address is configured (by skiboot) in 
>>>>>> one of the PSI device registers, and points to a MMIO region of the 
>>>>>> main XIVE interrupt controller. 
>>>>>>
>>>>>> The PHB4 sources should be the same.
>>>>>>
>>>>>> For the XiveSource object (all interrupts) under sPAPRXive, the 
>>>>>> XIVEFabric is the main interrupt controller sPAPRXive.
>>>>>>
>>>>>> For the XiveSource object (IPIs) under PnvXive, the XIVEFabric is 
>>>>>> also the main interrupt controller PnvXive.
>>>>>
>>>>> Hrm.  Apparently I'm missing something, I'm really not getting what
>>>>> you're trying to explain here.
>>>>
>>>> I see that. Let's try again.
>>>>
>>>>>>> that would then (after possible masking) forward on to the overall> xive fabric ? 
>>>>>>
>>>>>> yes. May be XIVEFabric is a confusing name. What about XIVEForwarder ? 
>>>>>
>>>>> Maybe..?
>>>>>
>>>>>>> That seems like a property of the source object, 
>>>>>>
>>>>>> The source object is generic. It's a bunch of PQ bits that can be 
>>>>>> controlled by MMIOs. Nothing more.
>>>>>
>>>>> Hmm.  Isn't the source object also responsible for forwarding the
>>>>> interrupt to something up the chain (whatever that is)?
>>>>
>>>> Yes but it can not forward directly. The XiveSource is generic and 
>>>> can only call a handler :
>>>>
>>>> 	xfc->notify(xsrc->xive, srcno + xsrc->offset);
>>>
>>> But.. your patch doesn't do that always, it's conditional which I
>>> still don't understand.
>>
>> Because at the end of the notify/forward chain, you route.
> 
> Hrm.  I'm really not understanding this notify/forward thing as
> distinct from routing.  I mean, from the description here it sounds
> kind of like cascaded interrupt controllers, which qemu already has
> mechanisms to handle, but I didn't think (most) POWER9 devices worked
> like that.

they don't. 

The concept I have in mind for the XIVEFabric QOM interface is 
something close to the XIVE logic unit in HW which links the 
device interrupt sources and the XIVE interrupt controller of the 
chip to the PowerBUS. The interrupt controller also has a special 
CQ (common queue) unit acting as a proxy for the PowerBus for the 
virtualization/router and the presenter units.  

It's a convenient model for QEMU because the XiveSource model only 
has to call an interface handler when a notification event is let 
through. It's like doing a MMIO to send on the PowerBUS the ISN of 
the source that just triggered.  

>>>> The device model owner, the parent of the XiveSource object, would 
>>>> do the real forward.
>>>
>>> Why?  
>>
>> because, in my idea of the XiveSource concept, it does not have 
>> the logic to do so: the register for the MMIO address to use, 
>> another one for the IVT offset, etc
>>
>>> I mean the XiveSource basically represents the xive irq related
>>> logic of the PHB or whatever, why would it not represent *all* of
>>> that, rather than just the ESB bits, meaning the owner has to have
>>> some more xive logic for the forwarding.
>>
>> ok. This is where we diverge in the concept. 
>>
>> The PQ bits, the ESB MMIO region handlers can be easily shared 
>> between the different device models. They are the common part
>> of devices with XIVE interrupt sources.
> 
> Sure, but QOM subclasses are a thing, which sounds like a better way
> of doing this than having to have extra XIVE logic in all the parent
> devices.

I agree It can be done with subclasses also but QOM interfaces are 
quickly defined and stateless. 

On sPAPR, the extra XIVE logic would only be the sPAPRXive model 
acting as the machine interrupt controller model. It owns all the 
guest interrupt sources.

In PowerNV, we would have the PnvXive interrupt controller model
for the chip, which also owns the IPI sources, and the PSIHB model 
and the PHB4 model. 

This is very much like XICS. Each relevant device has one or more 
ICSstate objects. ICSstate has a pointer to the XICSfabric.


>>> Note that I don't think the fact that some sources notify via mmio and
>>> some are internal really matters.  It's not like we're modelling the
>>> power bus down to the wire-transaction level.
>>
>> yes but the configuration of the devices are different. pnv devices 
>> will have registers accessible through MMIO or XSCOM and configured
>> by the firmware. spapr is all set up by QEMU.
> 
> Ah.. ok.  Actually modelling the mmio forwards probably makes sense
> then.  I still think it makes more sense in a pnv XiveSource subclass
> rather than putting it in the containing device.

Hmm, we will then need to define classes for the sPAPRXiveSource, 
PnvXiveSource, PSIXiveSource, which works fine for sure. But with 
a QOM interface, we could use directly the XiveSource class. 

I will take a look. 

>>>> It's very similar to what we have today with XICS :
>>>>
>>>> 	- The sPAPR model has an ICSState  
>>>> 	- The PnvPSI model has an ICSState 
>>>> 	- The PnvPHB3 model has two ICSStates
>>>>
>>>> and the 'xics' pointer in ICSState points to the 'interrupt unit' of 
>>>> the machine to do resends and to grab ICPs. So it used for routing 
>>>> essentially.
>>>
>>> Hmm.  I think you and I are looking at XICSFabric kind of
>>> differently.  As I see it, it's not really an active component at
>>> all.  Rather it's basically a global "map" of the xics components so
>>> that they can find each other.
>>
>> ok. I am not that far either. 
>>  
>>>> in Xive 
>>>>
>>>> 	- sPAPRXive model has a XiveSource
>>>> 	- PnvXive model has a XiveSource
>>>> 	- PnvPSI model has a XiveSource
>>>> 	- PnvPHB4 model should have also.
>>>>
>>>> and the 'xive' pointer in XiveSource points to the parent object,
>>>
>>> Uh.. yeah.. the xics pointer in ICS units doesn't point to the parent
>>> object, except maybe by accident.  It's absolutely intended to be
>>> global, and so points to the machine.
>>
>> yes. I agree. 
>>
>> XIVE has more layers and visible components due to the internal 
>> tables used for routing.
> 
> Right.
> 
>>>> which will handle the event notification forwarding or routing.
>>>
>>> Ok, how about this for a partial model.  We have:
>>>
>>> XiveSource objects:
>>> 	* Owns an ESB table
>>> 	* Knows the mapping of its local irq offsets to global irq
>>> 	  numbers
>>
>> That is the 'offset' attribute I suppose. This is set at runtime 
>> for the powernv devices.
> 
> Ok, so that offset is effectively a register (which will have to be
> migrated), rather than a device property.

powernv migration is a not on my radar yet :)

>> For pseries, we should need it for 
>> passthrough. I think. I haven't looked at that part yet.
> 
> Uh.. I really hope not.  AIUI the offsets are decided by the platform
> rather than the guest in this case, yes?  

A mapping is done in the guest. I still need to check how the ESB 
pages are populated in the guest ESB MMIO region. We should be fine 
I think.

> In which case if we can't
> count on a fixed offset even in passthrough mode, then migration is
> basically impossible.
>>>> 	* Provides the mmio interface for ESB manipulation
>>> 	* When neccessary, notifies a new interrupt to a XiveRouter
>>
>> ok. I think that what we have today fits the idea then. 
> 
> Yes, I think so to - just clarifying in the context of the rest of
> this proposal.

yes. I think we should be adding a XiveRouter class like you propose 
below.
 
>>> XiveRouter objects:
>>> 	* Responsible for a fixed range of global irq numbers
>>> 	* Owns an IVT (but what that means can vary, see below)
>>
>> the size of the IVT table is determined at runtime for powernv.
> 
> That's fine - both the size and base of the IVT will be registers of
> the powernv variant of the device.
> 
>>> 	* When notified of an irq, routes it to the appropriate EQ
>>> 	  (haven't thought about this part yet)
>>
>> we will need a class handler to get EQs
> 
> Sure.
> 
>>> 	* Abstract class - needs subclasses to define how to get IVEs
>>
>> OK. We will need a few other ops. The router needs to :
>>
>> 	- get IVEs
>> 	- get EQ descriptors
>> 	- update OS EQs (write event data in OS RAM)
> 
> IIUC the actual EQ (as opposed to its descriptor) will be in guest RAM
> for both powernv and spapr,

yes. 

> so this can be common (utiliizing a
> subclass hook to locate the EQ base address).

yes.
 
>> 	- update EQ descriptors (to set EQ descriptor index & toggle)
>> 	- get an NVT/VP (to notify CPUs)
> 
> Getting NVT/VP sounds more like it would belong in the XiveFabric than
> the router, but I haven't looked at this in detail yet.

The VP table is owned by the presenter unit. It is notified by the router
unit when an event is let through. The presenter then looks for the VP to
notify among the VPs dispatched on the HW threads. If none is found, 
the event is escalated. 

The router units owns the IVE and the EQ tables, but still can do some
remote access to the VPDT, to update the backlog and IBP I think. 
I don't think we need to introduce a presenter model, accessors for 
the VPs should enough. 

>> For powernv, we should also consider updating the NVT/VP. It can be 
>> done later. 
>>
>>> XiveFabric interface:
>>> 	* Lets XIVE components locate each other
>>
>> hmm, it should be a chain : 
>>
>> 	source -> router -> presenter -> cpu
>>
>> So the components should not have to locate each other. The presenter
>> does not know about the source for instance. Only the sources need
>> to forward events to the main controller logic doing the routing.
> 
> source -> router
> 
> From the above sounds like we can maybe make this just a router
> property in the source, rather than a lookup through a fabric.

If we use a direct link from the source to the router, how do we handle 
the powernv case which does event notification with a MMIO load ? 
I think a call through an interface is better (the XiveFabric)

> router -> presenter
> 
> Here we might still need a "fabric".  By defintion the router can
> direct to a bunch of different presenters, so it needs some kind of
> map to find them, no?

yes. The vp_block identifies the chip. There are different possible 
scenarios for the block configuration on a system but OPAL uses a 
simple one : one chip <-> one block. We can use that to start with 
on PowerNV.

And on pseries, just use block 0 for all, we don't really care.
The concept of block does not reach the spapr specs. 

As for the presenter it self, it loops on the CPUs to check the CAM 
line register of the thread interrupt management context to see if 
one matches vp_block+vp_index. This is one routine. I don't think 
we need a model for that.

> presenter -> cpu
> 
> This one's trivial; just the cpu->intc pointer or similar.

yes. We store under the cpu the thread interrupt management context,
which is a set of registers that the OS accesses through the TIMA,
one per machine.

>>> 	* get_router() method: maps a global irq number to XiveRouter
>>> 	  object
>>
>> Ah. you are thinking about the multichip case under powernv. I need
>> to look at that more closely. But XIVE has a concept of block which 
>> is used by skiboot to map a chip to a block and the XIVE tables have 
>> a block field.
> 
> Hmm.. I think we need to understand this to make a real model.  I'd
> thought the block number would just be the chip number somewhere in
> the high bits of the global irq, but sounds like there might be yet
> another indirection here.

We have a one-to-one mapping today. I doubt it will change. We can
start with that.

>>> 	* Always global (implemented on the machine)
>>
>> OK. this is more or less the object modeling the main interrupt 
>> controller ? What I called sPAPRXive in the current patchset.
> 
> No.  This is *always* global - assuming we need it at all - even on
> multichip powernv.
>
>>> On pseries we have:
>>>
>>> 	? XiveSource objects.  We probably only need one, but 1 for
>>> 	LSI and one for MSI might be convenient.  
>>
>> It's not too ugly for the moment. If we create a source object 
>> for LSIs only that might be more complex for passthrough devices 
>> and their associated ESB MMIO region.
>>
>> side note :
>>
>> LSIs work under TCG, and used to work under KVM until we removed
>> the StoreEOI support. Since the EOI is now different, we need 
>> to find a way to handle EOI for guest virtual LSIs which are 
>> not LSIs for the host.
> 
> I can't quite picture that case - can you give a concrete example?

A guest won't be able to use a rtl8139 nic under KVM.

As it is the guest that does the EOI, it should use the appropriate 
sequence depending on the interrupt type, LSI vs. MSI. The store EOI
was used for both types until we removed the support bc of ordering 
issues. So now, the guest uses a specific EOI sequence for LSI but 
the hypervisor implements virtual LSIs on top of IPIs interrupts which
are MSIs ... we are doomed. We need to fix the guest or to find a 
way to reroute such interrupts to QEMU.  


> 
>> We can still change the Linux spapr 
>> backend or have QEMU handle the EOI for virtual LSIs. This is 
>> on my TODO list.
>>
>>>       More wouldn't break the model
>>
>> It should not. Initially the pachset had two source objects : 
>> one for IPIs and one for the virtual devices interrupts. 
>>
>>> 	1 sPAPRXiveRouter.  This is a subclass of XiveRouter that
>>> 	holds the IVT internall (and migrates it).
>>
>> OK.
>>
>>> 	the XiveFabric implementation always returns the single global
>>> 	router for get_router()
>>
>> we can do that. 
> 
> Again, if it's true that each source object always forwards to just
> one router, then we don't need a get_router(), we can just use a link
> property.

that is the goal of the architecture, yes. powernv sources use MMIOs 
to forward events to the router and source event notifications are 
usually forwarded to the local chip. 

There are some case in which the IVE lookups are be remote. We can
use the block to scan the system chips for that. we don't have to
implement all the protocol.

> 
>> Do we gather all XIVE components objects under an object 'sPAPRXive' 
>> modeling that way the main interrupt controller of the machine ? 
> 
> No, I don't think so.
Hmm, 

sPAPRXive would inherit from the XiveRouter and define the IVT
and the EQDT. It is also practical to set up the TIMA memory region
which is unique per machine. KVM support will need custom version 
of this model.  

>> It should not have any state but it will hold the TIMA region 
>> most certainly, and the addresses where to map the ESB and 
>> TIMA regions.
> 
> Uh.. I'd expect the TIMAs to be held by the NVT objects, which we
> haven't covered yet.

There is a set of registers per CPU (thread interrupt management context),
but there is one TIMA (thread interrupt management areao) memory region 
for the whole machine.
 
We should change the NVT name, it is confusing. NVT is an old name for the 
backing store struct in RAM, which was replaced by VP. 

XiveTCTX (Xive Thread Context) is better IMO and its reflects the names 
of the IC registers.
 
>> KVM support will bring extra needs.
>>  
>>> On powernv we have:
>>> 	N XiveSource objects.  Some in PHBs, some extra ones on each
>>> 	chip
>>
>> yes and PSI to start with.  
>>
>>> 	(#chips) PowerXiveRouter objects.  This subclass of XiveRouter
>>> 	stores the register giving the IVT base address and migrates
>>> 	that, but the IVT contents are in RAM
>>>
>>> 	the XiveFabric get_router() implementation returns the right
>>> 	chip's router based on the irq number
>>>
>>> Obviously the router->EQ sides still needs a bunch of thought.
>>
>> These are all routing tables : 
>>
>> 	- IVT
>> 	- EQDT
>> 	- VPDT  
> 
> Thinking about what you've said, I'm thinking maybe what we need on
> the source side is two versions which don't exactly correspond to
> spapr vs powernv:
> 
> InBandXiveSource: this one's notify behaviour is to issue an mmio read
> to a configured address.  It's expected that mmio address belongs to a
> XiveRouter, but the source itself doesn't know anything routers.
> 
> OutOfBandXiveSource: this one's notify behaviour is to explicitly poke
> a XiveRouter.  A link to the router and offset (which range of router
> irqs are used by this source) would be object properties.
> 
> powernv would use the first for "external" irq sources and the second
> for internal ones.  spapr would use the second for everything.
> 

OK. 

I think I will stick to my idea for the next version because there are 
quite a lot of changes in the models already : a new spapr irq backend,
a XiveRouter, a EQD table for sPAPR, a complete TIMA support for all 
privileges, a renamed XiveTCTX, a simple Presenter scanning CAM lines, 
support for EQ ESBs (that no system uses), a pseries-2.13-xive machine,
etc.

I will share the powernv and PSIHB models to validate the common
XIVE model. We can change to the above if it is really not satisfying.

Thanks,

C.
diff mbox series

Patch

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 060976077dd7..b4c3d06c1219 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -17,6 +17,21 @@ 
 #include "hw/ppc/xive.h"
 
 /*
+ * XIVE Fabric
+ */
+
+static void xive_fabric_route(XiveFabric *xf, int lisn)
+{
+
+}
+
+static const TypeInfo xive_fabric_info = {
+    .name = TYPE_XIVE_FABRIC,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(XiveFabricClass),
+};
+
+/*
  * XIVE Interrupt Source
  */
 
@@ -97,11 +112,19 @@  static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno)
 
 /*
  * Forward the source event notification to the associated XiveFabric,
- * the device owning the sources.
+ * the device owning the sources, or perform the routing if the device
+ * is the interrupt controller.
  */
 static void xive_source_notify(XiveSource *xsrc, int srcno)
 {
 
+    XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xsrc->xive);
+
+    if (xfc->notify) {
+        xfc->notify(xsrc->xive, srcno + xsrc->offset);
+    } else {
+        xive_fabric_route(xsrc->xive, srcno + xsrc->offset);
+    }
 }
 
 /*
@@ -302,6 +325,17 @@  static void xive_source_reset(DeviceState *dev)
 static void xive_source_realize(DeviceState *dev, Error **errp)
 {
     XiveSource *xsrc = XIVE_SOURCE(dev);
+    Object *obj;
+    Error *local_err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
+    if (!obj) {
+        error_propagate(errp, local_err);
+        error_prepend(errp, "required link 'xive' not found: ");
+        return;
+    }
+
+    xsrc->xive = XIVE_FABRIC(obj);
 
     if (!xsrc->nr_irqs) {
         error_setg(errp, "Number of interrupt needs to be greater than 0");
@@ -376,6 +410,7 @@  static const TypeInfo xive_source_info = {
 static void xive_register_types(void)
 {
     type_register_static(&xive_source_info);
+    type_register_static(&xive_fabric_info);
 }
 
 type_init(xive_register_types)
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 0b76dd278d9b..4fcae2c763e6 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -12,6 +12,8 @@ 
 
 #include "hw/sysbus.h"
 
+typedef struct XiveFabric XiveFabric;
+
 /*
  * XIVE Interrupt Source
  */
@@ -46,6 +48,8 @@  typedef struct XiveSource {
     hwaddr       esb_base;
     uint32_t     esb_shift;
     MemoryRegion esb_mmio;
+
+    XiveFabric   *xive;
 } XiveSource;
 
 /*
@@ -143,4 +147,25 @@  static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno,
     xsrc->status[srcno] |= lsi ? XIVE_STATUS_LSI : 0;
 }
 
+/*
+ * XIVE Fabric
+ */
+
+typedef struct XiveFabric {
+    Object parent;
+} XiveFabric;
+
+#define TYPE_XIVE_FABRIC "xive-fabric"
+#define XIVE_FABRIC(obj)                                     \
+    OBJECT_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
+#define XIVE_FABRIC_CLASS(klass)                                     \
+    OBJECT_CLASS_CHECK(XiveFabricClass, (klass), TYPE_XIVE_FABRIC)
+#define XIVE_FABRIC_GET_CLASS(obj)                                   \
+    OBJECT_GET_CLASS(XiveFabricClass, (obj), TYPE_XIVE_FABRIC)
+
+typedef struct XiveFabricClass {
+    InterfaceClass parent;
+    void (*notify)(XiveFabric *xf, uint32_t lisn);
+} XiveFabricClass;
+
 #endif /* PPC_XIVE_H */