diff mbox

[02/17] pseries: rework XICS

Message ID 1372315560-5478-3-git-send-email-aik@ozlabs.ru
State New
Headers show

Commit Message

Alexey Kardashevskiy June 27, 2013, 6:45 a.m. UTC
Currently XICS interrupt controller is not a QEMU device. As we are going
to support in-kernel emulated XICS which is a part of KVM, it make
sense not to extend the existing XICS and have multiple KVM stub functions
but to create yet another device and share pieces between fully emulated
XICS and in-kernel XICS.

The rework includes:
* port to QOM
* made few functions public to use from in-kernel XICS implementation
* made VMStateDescription public to be used for in-kernel XICS migration
* move xics_system_init() to spapr.c, it tries creating fully-emulated
XICS now and will try in-kernel XICS in upcoming patches.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/intc/xics.c        |  109 ++++++++++++++++++++++++++-----------------------
 hw/ppc/spapr.c        |   28 +++++++++++++
 include/hw/ppc/xics.h |   59 ++++++++++++++++++++++++--
 3 files changed, 141 insertions(+), 55 deletions(-)

Comments

David Gibson June 27, 2013, 11:47 a.m. UTC | #1
On Thu, Jun 27, 2013 at 04:45:45PM +1000, Alexey Kardashevskiy wrote:
> Currently XICS interrupt controller is not a QEMU device. As we are going
> to support in-kernel emulated XICS which is a part of KVM, it make
> sense not to extend the existing XICS and have multiple KVM stub functions
> but to create yet another device and share pieces between fully emulated
> XICS and in-kernel XICS.

Hmm.  So, I think changing the xics to the qdev/qom framework is a
generally good idea.  But I'm not convinced its a good idea to have
different devices for the kernel and non-kernel xics.  Won't that
prevent migrating from a system with a kernel xics to one without, or
vice versa?

> 
> The rework includes:
> * port to QOM
> * made few functions public to use from in-kernel XICS implementation
> * made VMStateDescription public to be used for in-kernel XICS migration
> * move xics_system_init() to spapr.c, it tries creating fully-emulated
> XICS now and will try in-kernel XICS in upcoming patches.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  hw/intc/xics.c        |  109 ++++++++++++++++++++++++++-----------------------
>  hw/ppc/spapr.c        |   28 +++++++++++++
>  include/hw/ppc/xics.h |   59 ++++++++++++++++++++++++--
>  3 files changed, 141 insertions(+), 55 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index 091912e..0e374c8 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -34,13 +34,6 @@
>   * ICP: Presentation layer
>   */
>  
> -struct icp_server_state {
> -    uint32_t xirr;
> -    uint8_t pending_priority;
> -    uint8_t mfrr;
> -    qemu_irq output;
> -};
> -
>  #define XISR_MASK  0x00ffffff
>  #define CPPR_MASK  0xff000000
>  
> @@ -49,12 +42,6 @@ struct icp_server_state {
>  
>  struct ics_state;
>  
> -struct icp_state {
> -    long nr_servers;
> -    struct icp_server_state *ss;
> -    struct ics_state *ics;
> -};
> -
>  static void ics_reject(struct ics_state *ics, int nr);
>  static void ics_resend(struct ics_state *ics);
>  static void ics_eoi(struct ics_state *ics, int nr);
> @@ -171,27 +158,6 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
>  /*
>   * ICS: Source layer
>   */
> -
> -struct ics_irq_state {
> -    int server;
> -    uint8_t priority;
> -    uint8_t saved_priority;
> -#define XICS_STATUS_ASSERTED           0x1
> -#define XICS_STATUS_SENT               0x2
> -#define XICS_STATUS_REJECTED           0x4
> -#define XICS_STATUS_MASKED_PENDING     0x8
> -    uint8_t status;
> -};
> -
> -struct ics_state {
> -    int nr_irqs;
> -    int offset;
> -    qemu_irq *qirqs;
> -    bool *islsi;
> -    struct ics_irq_state *irqs;
> -    struct icp_state *icp;
> -};
> -
>  static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
>  {
>      return (nr >= ics->offset)
> @@ -506,9 +472,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>      rtas_st(rets, 0, 0); /* Success */
>  }
>  
> -static void xics_reset(void *opaque)
> +void xics_common_reset(struct icp_state *icp)

Why do you need to expose this interface?  Couldn't the caller use
qdev_reset(xics) just as easily?

>  {
> -    struct icp_state *icp = (struct icp_state *)opaque;
>      struct ics_state *ics = icp->ics;
>      int i;
>  
> @@ -527,7 +492,12 @@ static void xics_reset(void *opaque)
>      }
>  }
>  
> -void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
> +static void xics_reset(DeviceState *d)
> +{
> +    xics_common_reset(XICS(d));
> +}
> +
> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>  {
>      CPUState *cs = CPU(cpu);
>      CPUPPCState *env = &cpu->env;
> @@ -551,37 +521,72 @@ void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>      }
>  }
>  
> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
> +{
> +    xics_common_cpu_setup(icp, cpu);
> +}
> +
> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler)
>  {
> -    struct icp_state *icp;
> -    struct ics_state *ics;
> +    struct ics_state *ics = icp->ics;
>  
> -    icp = g_malloc0(sizeof(*icp));
> -    icp->nr_servers = nr_servers;
>      icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
>  
>      ics = g_malloc0(sizeof(*ics));
> -    ics->nr_irqs = nr_irqs;
> +    ics->nr_irqs = icp->nr_irqs;
>      ics->offset = XICS_IRQ_BASE;
> -    ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
> -    ics->islsi = g_malloc0(nr_irqs * sizeof(bool));
> +    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(struct ics_irq_state));
> +    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
>  
>      icp->ics = ics;
>      ics->icp = icp;
>  
> -    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
> +    ics->qirqs = qemu_allocate_irqs(handler, ics, ics->nr_irqs);
> +}
>  
> -    spapr_register_hypercall(H_CPPR, h_cppr);
> -    spapr_register_hypercall(H_IPI, h_ipi);
> -    spapr_register_hypercall(H_XIRR, h_xirr);
> -    spapr_register_hypercall(H_EOI, h_eoi);
> +static void xics_realize(DeviceState *dev, Error **errp)
> +{
> +    struct icp_state *icp = XICS(dev);
> +
> +    xics_common_init(icp, ics_set_irq);
>  
>      spapr_rtas_register("ibm,set-xive", rtas_set_xive);
>      spapr_rtas_register("ibm,get-xive", rtas_get_xive);
>      spapr_rtas_register("ibm,int-off", rtas_int_off);
>      spapr_rtas_register("ibm,int-on", rtas_int_on);
>  
> -    qemu_register_reset(xics_reset, icp);
> +}
> +
> +static Property xics_properties[] = {
> +    DEFINE_PROP_UINT32("nr_servers", struct icp_state, nr_servers, -1),
> +    DEFINE_PROP_UINT32("nr_irqs", struct icp_state, nr_irqs, -1),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void xics_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +
> +    dc->realize = xics_realize;
> +    dc->props = xics_properties;
> +    dc->reset = xics_reset;
> +}
> +
> +static const TypeInfo xics_info = {
> +    .name          = TYPE_XICS,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(struct icp_state),
> +    .class_init    = xics_class_init,
> +};
> +
> +static void xics_register_types(void)
> +{
> +    spapr_register_hypercall(H_CPPR, h_cppr);
> +    spapr_register_hypercall(H_IPI, h_ipi);
> +    spapr_register_hypercall(H_XIRR, h_xirr);
> +    spapr_register_hypercall(H_EOI, h_eoi);
>  
> -    return icp;
> +    type_register_static(&xics_info);
>  }
> +
> +type_init(xics_register_types)
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 38c29b7..def3505 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -719,6 +719,34 @@ static int spapr_vga_init(PCIBus *pci_bus)
>      }
>  }
>  
> +static struct icp_state *try_create_xics(const char *type, int nr_servers,
> +                                         int nr_irqs)
> +{
> +    DeviceState *dev;
> +
> +    dev = qdev_create(NULL, type);
> +    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
> +    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
> +    if (qdev_init(dev) < 0) {
> +        return NULL;

You could just use qdev_init_nofail() here to avoid the manual
handling of failures.

> +    }
> +
> +    return XICS(dev);
> +}
> +
> +static struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
> +{
> +    struct icp_state *icp = NULL;
> +
> +    icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
> +    if (!icp) {
> +        perror("Failed to create XICS\n");
> +        abort();
> +    }
> +
> +    return icp;
> +}
> +
>  /* pSeries LPAR / sPAPR hardware init */
>  static void ppc_spapr_init(QEMUMachineInitArgs *args)
>  {
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 6bce042..3f72806 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -27,15 +27,68 @@
>  #if !defined(__XICS_H__)
>  #define __XICS_H__
>  
> +#include "hw/sysbus.h"
> +
> +#define TYPE_XICS "xics"
> +#define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS)
> +
>  #define XICS_IPI        0x2
> -#define XICS_IRQ_BASE   0x10
> +#define XICS_BUID       0x1
> +#define XICS_IRQ_BASE   (XICS_BUID << 12)
> +
> +/*
> + * We currently only support one BUID which is our interrupt base
> + * (the kernel implementation supports more but we don't exploit
> + *  that yet)
> + */
>  
> -struct icp_state;
> +struct icp_state {
> +    /*< private >*/
> +    SysBusDevice parent_obj;
> +    /*< public >*/
> +    uint32_t nr_servers;
> +    uint32_t nr_irqs;
> +    struct icp_server_state *ss;
> +    struct ics_state *ics;
> +};
> +
> +struct icp_server_state {
> +    uint32_t xirr;
> +    uint8_t pending_priority;
> +    uint8_t mfrr;
> +    qemu_irq output;
> +};

The indivudual server_state and irq_state structures probably
shouldn't be exported.

> +struct ics_state {
> +    uint32_t nr_irqs;
> +    uint32_t offset;
> +    qemu_irq *qirqs;
> +    bool *islsi;
> +    struct ics_irq_state *irqs;
> +    struct icp_state *icp;
> +};
> +
> +struct ics_irq_state {
> +    uint32_t server;
> +    uint8_t priority;
> +    uint8_t saved_priority;
> +#define XICS_STATUS_ASSERTED           0x1
> +#define XICS_STATUS_SENT               0x2
> +#define XICS_STATUS_REJECTED           0x4
> +#define XICS_STATUS_MASKED_PENDING     0x8
> +    uint8_t status;
> +};
>  
>  qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
>  void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
>  
> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler);
> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
> +void xics_common_reset(struct icp_state *icp);
> +
>  void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
>  
> +extern const VMStateDescription vmstate_icp_server;
> +extern const VMStateDescription vmstate_ics;
> +
>  #endif /* __XICS_H__ */
Alexey Kardashevskiy June 27, 2013, 12:17 p.m. UTC | #2
On 06/27/2013 09:47 PM, David Gibson wrote:
> On Thu, Jun 27, 2013 at 04:45:45PM +1000, Alexey Kardashevskiy wrote:
>> Currently XICS interrupt controller is not a QEMU device. As we are going
>> to support in-kernel emulated XICS which is a part of KVM, it make
>> sense not to extend the existing XICS and have multiple KVM stub functions
>> but to create yet another device and share pieces between fully emulated
>> XICS and in-kernel XICS.
> 
> Hmm.  So, I think changing the xics to the qdev/qom framework is a
> generally good idea.  But I'm not convinced its a good idea to have
> different devices for the kernel and non-kernel xics.

The idea came from Alex Graf, this is already done for openpic/openpic-kvm.
The normal practice is to move ioctls to KVM to KVM code and provide empty
stubs for non-KVM case. There were too many so having a separate xics-kvm
is kind of help.


> Won't that
> prevent migrating from a system with a kernel xics to one without, or
> vice versa?

Mmm. Do we care much about that?...
At the moment it is not supported that as VMStateDescription have different
.name for xics and xics-kvm but easy to fix. And we do not pass a device to
vmstate_register so that must be it.


> 
>>
>> The rework includes:
>> * port to QOM
>> * made few functions public to use from in-kernel XICS implementation
>> * made VMStateDescription public to be used for in-kernel XICS migration
>> * move xics_system_init() to spapr.c, it tries creating fully-emulated
>> XICS now and will try in-kernel XICS in upcoming patches.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  hw/intc/xics.c        |  109 ++++++++++++++++++++++++++-----------------------
>>  hw/ppc/spapr.c        |   28 +++++++++++++
>>  include/hw/ppc/xics.h |   59 ++++++++++++++++++++++++--
>>  3 files changed, 141 insertions(+), 55 deletions(-)
>>
>> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
>> index 091912e..0e374c8 100644
>> --- a/hw/intc/xics.c
>> +++ b/hw/intc/xics.c
>> @@ -34,13 +34,6 @@
>>   * ICP: Presentation layer
>>   */
>>  
>> -struct icp_server_state {
>> -    uint32_t xirr;
>> -    uint8_t pending_priority;
>> -    uint8_t mfrr;
>> -    qemu_irq output;
>> -};
>> -
>>  #define XISR_MASK  0x00ffffff
>>  #define CPPR_MASK  0xff000000
>>  
>> @@ -49,12 +42,6 @@ struct icp_server_state {
>>  
>>  struct ics_state;
>>  
>> -struct icp_state {
>> -    long nr_servers;
>> -    struct icp_server_state *ss;
>> -    struct ics_state *ics;
>> -};
>> -
>>  static void ics_reject(struct ics_state *ics, int nr);
>>  static void ics_resend(struct ics_state *ics);
>>  static void ics_eoi(struct ics_state *ics, int nr);
>> @@ -171,27 +158,6 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
>>  /*
>>   * ICS: Source layer
>>   */
>> -
>> -struct ics_irq_state {
>> -    int server;
>> -    uint8_t priority;
>> -    uint8_t saved_priority;
>> -#define XICS_STATUS_ASSERTED           0x1
>> -#define XICS_STATUS_SENT               0x2
>> -#define XICS_STATUS_REJECTED           0x4
>> -#define XICS_STATUS_MASKED_PENDING     0x8
>> -    uint8_t status;
>> -};
>> -
>> -struct ics_state {
>> -    int nr_irqs;
>> -    int offset;
>> -    qemu_irq *qirqs;
>> -    bool *islsi;
>> -    struct ics_irq_state *irqs;
>> -    struct icp_state *icp;
>> -};
>> -
>>  static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
>>  {
>>      return (nr >= ics->offset)
>> @@ -506,9 +472,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>>      rtas_st(rets, 0, 0); /* Success */
>>  }
>>  
>> -static void xics_reset(void *opaque)
>> +void xics_common_reset(struct icp_state *icp)
> 
> Why do you need to expose this interface?  Couldn't the caller use
> qdev_reset(xics) just as easily?
> 
>>  {
>> -    struct icp_state *icp = (struct icp_state *)opaque;
>>      struct ics_state *ics = icp->ics;
>>      int i;
>>  
>> @@ -527,7 +492,12 @@ static void xics_reset(void *opaque)
>>      }
>>  }
>>  
>> -void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>> +static void xics_reset(DeviceState *d)
>> +{
>> +    xics_common_reset(XICS(d));
>> +}
>> +
>> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>  {
>>      CPUState *cs = CPU(cpu);
>>      CPUPPCState *env = &cpu->env;
>> @@ -551,37 +521,72 @@ void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>      }
>>  }
>>  
>> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
>> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>> +{
>> +    xics_common_cpu_setup(icp, cpu);
>> +}
>> +
>> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler)
>>  {
>> -    struct icp_state *icp;
>> -    struct ics_state *ics;
>> +    struct ics_state *ics = icp->ics;
>>  
>> -    icp = g_malloc0(sizeof(*icp));
>> -    icp->nr_servers = nr_servers;
>>      icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
>>  
>>      ics = g_malloc0(sizeof(*ics));
>> -    ics->nr_irqs = nr_irqs;
>> +    ics->nr_irqs = icp->nr_irqs;
>>      ics->offset = XICS_IRQ_BASE;
>> -    ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
>> -    ics->islsi = g_malloc0(nr_irqs * sizeof(bool));
>> +    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(struct ics_irq_state));
>> +    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
>>  
>>      icp->ics = ics;
>>      ics->icp = icp;
>>  
>> -    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
>> +    ics->qirqs = qemu_allocate_irqs(handler, ics, ics->nr_irqs);
>> +}
>>  
>> -    spapr_register_hypercall(H_CPPR, h_cppr);
>> -    spapr_register_hypercall(H_IPI, h_ipi);
>> -    spapr_register_hypercall(H_XIRR, h_xirr);
>> -    spapr_register_hypercall(H_EOI, h_eoi);
>> +static void xics_realize(DeviceState *dev, Error **errp)
>> +{
>> +    struct icp_state *icp = XICS(dev);
>> +
>> +    xics_common_init(icp, ics_set_irq);
>>  
>>      spapr_rtas_register("ibm,set-xive", rtas_set_xive);
>>      spapr_rtas_register("ibm,get-xive", rtas_get_xive);
>>      spapr_rtas_register("ibm,int-off", rtas_int_off);
>>      spapr_rtas_register("ibm,int-on", rtas_int_on);
>>  
>> -    qemu_register_reset(xics_reset, icp);
>> +}
>> +
>> +static Property xics_properties[] = {
>> +    DEFINE_PROP_UINT32("nr_servers", struct icp_state, nr_servers, -1),
>> +    DEFINE_PROP_UINT32("nr_irqs", struct icp_state, nr_irqs, -1),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void xics_class_init(ObjectClass *oc, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>> +
>> +    dc->realize = xics_realize;
>> +    dc->props = xics_properties;
>> +    dc->reset = xics_reset;
>> +}
>> +
>> +static const TypeInfo xics_info = {
>> +    .name          = TYPE_XICS,
>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>> +    .instance_size = sizeof(struct icp_state),
>> +    .class_init    = xics_class_init,
>> +};
>> +
>> +static void xics_register_types(void)
>> +{
>> +    spapr_register_hypercall(H_CPPR, h_cppr);
>> +    spapr_register_hypercall(H_IPI, h_ipi);
>> +    spapr_register_hypercall(H_XIRR, h_xirr);
>> +    spapr_register_hypercall(H_EOI, h_eoi);
>>  
>> -    return icp;
>> +    type_register_static(&xics_info);
>>  }
>> +
>> +type_init(xics_register_types)
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 38c29b7..def3505 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -719,6 +719,34 @@ static int spapr_vga_init(PCIBus *pci_bus)
>>      }
>>  }
>>  
>> +static struct icp_state *try_create_xics(const char *type, int nr_servers,
>> +                                         int nr_irqs)
>> +{
>> +    DeviceState *dev;
>> +
>> +    dev = qdev_create(NULL, type);
>> +    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
>> +    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
>> +    if (qdev_init(dev) < 0) {
>> +        return NULL;
> 
> You could just use qdev_init_nofail() here to avoid the manual
> handling of failures.
> 
>> +    }
>> +
>> +    return XICS(dev);
>> +}
>> +
>> +static struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
>> +{
>> +    struct icp_state *icp = NULL;
>> +
>> +    icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
>> +    if (!icp) {
>> +        perror("Failed to create XICS\n");
>> +        abort();
>> +    }
>> +
>> +    return icp;
>> +}
>> +
>>  /* pSeries LPAR / sPAPR hardware init */
>>  static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>  {
>> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
>> index 6bce042..3f72806 100644
>> --- a/include/hw/ppc/xics.h
>> +++ b/include/hw/ppc/xics.h
>> @@ -27,15 +27,68 @@
>>  #if !defined(__XICS_H__)
>>  #define __XICS_H__
>>  
>> +#include "hw/sysbus.h"
>> +
>> +#define TYPE_XICS "xics"
>> +#define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS)
>> +
>>  #define XICS_IPI        0x2
>> -#define XICS_IRQ_BASE   0x10
>> +#define XICS_BUID       0x1
>> +#define XICS_IRQ_BASE   (XICS_BUID << 12)
>> +
>> +/*
>> + * We currently only support one BUID which is our interrupt base
>> + * (the kernel implementation supports more but we don't exploit
>> + *  that yet)
>> + */
>>  
>> -struct icp_state;
>> +struct icp_state {
>> +    /*< private >*/
>> +    SysBusDevice parent_obj;
>> +    /*< public >*/
>> +    uint32_t nr_servers;
>> +    uint32_t nr_irqs;
>> +    struct icp_server_state *ss;
>> +    struct ics_state *ics;
>> +};
>> +
>> +struct icp_server_state {
>> +    uint32_t xirr;
>> +    uint8_t pending_priority;
>> +    uint8_t mfrr;
>> +    qemu_irq output;
>> +};
> 
> The indivudual server_state and irq_state structures probably
> shouldn't be exported.
> 
>> +struct ics_state {
>> +    uint32_t nr_irqs;
>> +    uint32_t offset;
>> +    qemu_irq *qirqs;
>> +    bool *islsi;
>> +    struct ics_irq_state *irqs;
>> +    struct icp_state *icp;
>> +};
>> +
>> +struct ics_irq_state {
>> +    uint32_t server;
>> +    uint8_t priority;
>> +    uint8_t saved_priority;
>> +#define XICS_STATUS_ASSERTED           0x1
>> +#define XICS_STATUS_SENT               0x2
>> +#define XICS_STATUS_REJECTED           0x4
>> +#define XICS_STATUS_MASKED_PENDING     0x8
>> +    uint8_t status;
>> +};
>>  
>>  qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
>>  void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
>>  
>> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
>> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler);
>> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
>> +void xics_common_reset(struct icp_state *icp);
>> +
>>  void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
>>  
>> +extern const VMStateDescription vmstate_icp_server;
>> +extern const VMStateDescription vmstate_ics;
>> +
>>  #endif /* __XICS_H__ */
>
David Gibson July 2, 2013, 12:06 a.m. UTC | #3
On Thu, Jun 27, 2013 at 10:17:19PM +1000, Alexey Kardashevskiy wrote:
> On 06/27/2013 09:47 PM, David Gibson wrote:
> > On Thu, Jun 27, 2013 at 04:45:45PM +1000, Alexey Kardashevskiy wrote:
> >> Currently XICS interrupt controller is not a QEMU device. As we are going
> >> to support in-kernel emulated XICS which is a part of KVM, it make
> >> sense not to extend the existing XICS and have multiple KVM stub functions
> >> but to create yet another device and share pieces between fully emulated
> >> XICS and in-kernel XICS.
> > 
> > Hmm.  So, I think changing the xics to the qdev/qom framework is a
> > generally good idea.  But I'm not convinced its a good idea to have
> > different devices for the kernel and non-kernel xics.
> 
> The idea came from Alex Graf, this is already done for openpic/openpic-kvm.
> The normal practice is to move ioctls to KVM to KVM code and provide empty
> stubs for non-KVM case. There were too many so having a separate xics-kvm
> is kind of help.
> 
> 
> > Won't that
> > prevent migrating from a system with a kernel xics to one without, or
> > vice versa?
> 
> Mmm. Do we care much about that?...

Enough to avoid making it impossible by design.

> At the moment it is not supported that as VMStateDescription have different
> .name for xics and xics-kvm but easy to fix. And we do not pass a device to
> vmstate_register so that must be it.

Ok, if you can make the ids in the vmsd match, then that should be ok.
Alexander Graf July 2, 2013, 12:21 a.m. UTC | #4
On 02.07.2013, at 02:06, David Gibson wrote:

> On Thu, Jun 27, 2013 at 10:17:19PM +1000, Alexey Kardashevskiy wrote:
>> On 06/27/2013 09:47 PM, David Gibson wrote:
>>> On Thu, Jun 27, 2013 at 04:45:45PM +1000, Alexey Kardashevskiy wrote:
>>>> Currently XICS interrupt controller is not a QEMU device. As we are going
>>>> to support in-kernel emulated XICS which is a part of KVM, it make
>>>> sense not to extend the existing XICS and have multiple KVM stub functions
>>>> but to create yet another device and share pieces between fully emulated
>>>> XICS and in-kernel XICS.
>>> 
>>> Hmm.  So, I think changing the xics to the qdev/qom framework is a
>>> generally good idea.  But I'm not convinced its a good idea to have
>>> different devices for the kernel and non-kernel xics.
>> 
>> The idea came from Alex Graf, this is already done for openpic/openpic-kvm.
>> The normal practice is to move ioctls to KVM to KVM code and provide empty
>> stubs for non-KVM case. There were too many so having a separate xics-kvm
>> is kind of help.
>> 
>> 
>>> Won't that
>>> prevent migrating from a system with a kernel xics to one without, or
>>> vice versa?
>> 
>> Mmm. Do we care much about that?...
> 
> Enough to avoid making it impossible by design.

We went that route with x86 too after lots of hassle trying to shoehorn the in-kernel APIC into the emulation device. It's more hassle than gain.

> 
>> At the moment it is not supported that as VMStateDescription have different
>> .name for xics and xics-kvm but easy to fix. And we do not pass a device to
>> vmstate_register so that must be it.
> 
> Ok, if you can make the ids in the vmsd match, then that should be ok.

I really just wouldn't bother too much about it. Sooner or later QEMU-XICS is going to be a legacy and debug only option.


Alex
Alexey Kardashevskiy July 2, 2013, 2:08 a.m. UTC | #5
On 07/02/2013 10:21 AM, Alexander Graf wrote:
> 
> On 02.07.2013, at 02:06, David Gibson wrote:
> 
>> On Thu, Jun 27, 2013 at 10:17:19PM +1000, Alexey Kardashevskiy wrote:
>>> On 06/27/2013 09:47 PM, David Gibson wrote:
>>>> On Thu, Jun 27, 2013 at 04:45:45PM +1000, Alexey Kardashevskiy wrote:
>>>>> Currently XICS interrupt controller is not a QEMU device. As we are going
>>>>> to support in-kernel emulated XICS which is a part of KVM, it make
>>>>> sense not to extend the existing XICS and have multiple KVM stub functions
>>>>> but to create yet another device and share pieces between fully emulated
>>>>> XICS and in-kernel XICS.
>>>>
>>>> Hmm.  So, I think changing the xics to the qdev/qom framework is a
>>>> generally good idea.  But I'm not convinced its a good idea to have
>>>> different devices for the kernel and non-kernel xics.
>>>
>>> The idea came from Alex Graf, this is already done for openpic/openpic-kvm.
>>> The normal practice is to move ioctls to KVM to KVM code and provide empty
>>> stubs for non-KVM case. There were too many so having a separate xics-kvm
>>> is kind of help.
>>>
>>>
>>>> Won't that
>>>> prevent migrating from a system with a kernel xics to one without, or
>>>> vice versa?
>>>
>>> Mmm. Do we care much about that?...
>>
>> Enough to avoid making it impossible by design.
> 
> We went that route with x86 too after lots of hassle trying to shoehorn the in-kernel APIC into the emulation device. It's more hassle than gain.

At the moment it can be supported at no cost so next time I'll post it with
matched vmsd.



>>> At the moment it is not supported that as VMStateDescription have different
>>> .name for xics and xics-kvm but easy to fix. And we do not pass a device to
>>> vmstate_register so that must be it.
>>
>> Ok, if you can make the ids in the vmsd match, then that should be ok.
> 
> I really just wouldn't bother too much about it. Sooner or later QEMU-XICS is going to be a legacy and debug only option.
Anthony Liguori July 8, 2013, 6:22 p.m. UTC | #6
Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> Currently XICS interrupt controller is not a QEMU device. As we are going
> to support in-kernel emulated XICS which is a part of KVM, it make
> sense not to extend the existing XICS and have multiple KVM stub functions
> but to create yet another device and share pieces between fully emulated
> XICS and in-kernel XICS.
>
> The rework includes:
> * port to QOM
> * made few functions public to use from in-kernel XICS implementation
> * made VMStateDescription public to be used for in-kernel XICS migration
> * move xics_system_init() to spapr.c, it tries creating fully-emulated
> XICS now and will try in-kernel XICS in upcoming patches.
>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  hw/intc/xics.c        |  109 ++++++++++++++++++++++++++-----------------------
>  hw/ppc/spapr.c        |   28 +++++++++++++
>  include/hw/ppc/xics.h |   59 ++++++++++++++++++++++++--
>  3 files changed, 141 insertions(+), 55 deletions(-)
>
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index 091912e..0e374c8 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -34,13 +34,6 @@
>   * ICP: Presentation layer
>   */
>  
> -struct icp_server_state {
> -    uint32_t xirr;
> -    uint8_t pending_priority;
> -    uint8_t mfrr;
> -    qemu_irq output;
> -};
> -
>  #define XISR_MASK  0x00ffffff
>  #define CPPR_MASK  0xff000000
>  
> @@ -49,12 +42,6 @@ struct icp_server_state {
>  
>  struct ics_state;
>  
> -struct icp_state {
> -    long nr_servers;
> -    struct icp_server_state *ss;
> -    struct ics_state *ics;
> -};
> -
>  static void ics_reject(struct ics_state *ics, int nr);
>  static void ics_resend(struct ics_state *ics);
>  static void ics_eoi(struct ics_state *ics, int nr);
> @@ -171,27 +158,6 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
>  /*
>   * ICS: Source layer
>   */
> -
> -struct ics_irq_state {
> -    int server;
> -    uint8_t priority;
> -    uint8_t saved_priority;
> -#define XICS_STATUS_ASSERTED           0x1
> -#define XICS_STATUS_SENT               0x2
> -#define XICS_STATUS_REJECTED           0x4
> -#define XICS_STATUS_MASKED_PENDING     0x8
> -    uint8_t status;
> -};
> -
> -struct ics_state {
> -    int nr_irqs;
> -    int offset;
> -    qemu_irq *qirqs;
> -    bool *islsi;
> -    struct ics_irq_state *irqs;
> -    struct icp_state *icp;
> -};
> -
>  static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
>  {
>      return (nr >= ics->offset)
> @@ -506,9 +472,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>      rtas_st(rets, 0, 0); /* Success */
>  }
>  
> -static void xics_reset(void *opaque)
> +void xics_common_reset(struct icp_state *icp)
>  {
> -    struct icp_state *icp = (struct icp_state *)opaque;
>      struct ics_state *ics = icp->ics;
>      int i;
>  
> @@ -527,7 +492,12 @@ static void xics_reset(void *opaque)
>      }
>  }
>  
> -void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
> +static void xics_reset(DeviceState *d)
> +{
> +    xics_common_reset(XICS(d));
> +}
> +
> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>  {
>      CPUState *cs = CPU(cpu);
>      CPUPPCState *env = &cpu->env;
> @@ -551,37 +521,72 @@ void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>      }
>  }
>  
> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
> +{
> +    xics_common_cpu_setup(icp, cpu);
> +}
> +
> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler)
>  {
> -    struct icp_state *icp;
> -    struct ics_state *ics;
> +    struct ics_state *ics = icp->ics;
>  
> -    icp = g_malloc0(sizeof(*icp));
> -    icp->nr_servers = nr_servers;
>      icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
>  
>      ics = g_malloc0(sizeof(*ics));
> -    ics->nr_irqs = nr_irqs;
> +    ics->nr_irqs = icp->nr_irqs;
>      ics->offset = XICS_IRQ_BASE;
> -    ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
> -    ics->islsi = g_malloc0(nr_irqs * sizeof(bool));
> +    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(struct ics_irq_state));
> +    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
>  
>      icp->ics = ics;
>      ics->icp = icp;
>  
> -    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
> +    ics->qirqs = qemu_allocate_irqs(handler, ics, ics->nr_irqs);
> +}
>  
> -    spapr_register_hypercall(H_CPPR, h_cppr);
> -    spapr_register_hypercall(H_IPI, h_ipi);
> -    spapr_register_hypercall(H_XIRR, h_xirr);
> -    spapr_register_hypercall(H_EOI, h_eoi);
> +static void xics_realize(DeviceState *dev, Error **errp)
> +{
> +    struct icp_state *icp = XICS(dev);
> +
> +    xics_common_init(icp, ics_set_irq);
>  
>      spapr_rtas_register("ibm,set-xive", rtas_set_xive);
>      spapr_rtas_register("ibm,get-xive", rtas_get_xive);
>      spapr_rtas_register("ibm,int-off", rtas_int_off);
>      spapr_rtas_register("ibm,int-on", rtas_int_on);
>  
> -    qemu_register_reset(xics_reset, icp);
> +}
> +
> +static Property xics_properties[] = {
> +    DEFINE_PROP_UINT32("nr_servers", struct icp_state, nr_servers, -1),
> +    DEFINE_PROP_UINT32("nr_irqs", struct icp_state, nr_irqs, -1),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void xics_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +
> +    dc->realize = xics_realize;
> +    dc->props = xics_properties;
> +    dc->reset = xics_reset;
> +}
> +
> +static const TypeInfo xics_info = {
> +    .name          = TYPE_XICS,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(struct icp_state),
> +    .class_init    = xics_class_init,
> +};
> +
> +static void xics_register_types(void)
> +{
> +    spapr_register_hypercall(H_CPPR, h_cppr);
> +    spapr_register_hypercall(H_IPI, h_ipi);
> +    spapr_register_hypercall(H_XIRR, h_xirr);
> +    spapr_register_hypercall(H_EOI, h_eoi);
>  
> -    return icp;
> +    type_register_static(&xics_info);
>  }
> +
> +type_init(xics_register_types)
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 38c29b7..def3505 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -719,6 +719,34 @@ static int spapr_vga_init(PCIBus *pci_bus)
>      }
>  }
>  
> +static struct icp_state *try_create_xics(const char *type, int nr_servers,
> +                                         int nr_irqs)
> +{
> +    DeviceState *dev;
> +
> +    dev = qdev_create(NULL, type);
> +    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
> +    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
> +    if (qdev_init(dev) < 0) {
> +        return NULL;
> +    }
> +
> +    return XICS(dev);
> +}
> +
> +static struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
> +{
> +    struct icp_state *icp = NULL;
> +
> +    icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
> +    if (!icp) {
> +        perror("Failed to create XICS\n");
> +        abort();
> +    }
> +
> +    return icp;
> +}
> +
>  /* pSeries LPAR / sPAPR hardware init */
>  static void ppc_spapr_init(QEMUMachineInitArgs *args)
>  {
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 6bce042..3f72806 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -27,15 +27,68 @@
>  #if !defined(__XICS_H__)
>  #define __XICS_H__
>  
> +#include "hw/sysbus.h"
> +
> +#define TYPE_XICS "xics"
> +#define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS)
> +
>  #define XICS_IPI        0x2
> -#define XICS_IRQ_BASE   0x10
> +#define XICS_BUID       0x1
> +#define XICS_IRQ_BASE   (XICS_BUID << 12)
> +
> +/*
> + * We currently only support one BUID which is our interrupt base
> + * (the kernel implementation supports more but we don't exploit
> + *  that yet)
> + */
>  
> -struct icp_state;
> +struct icp_state {
> +    /*< private >*/
> +    SysBusDevice parent_obj;
> +    /*< public >*/
> +    uint32_t nr_servers;
> +    uint32_t nr_irqs;
> +    struct icp_server_state *ss;
> +    struct ics_state *ics;
> +};
> +
> +struct icp_server_state {
> +    uint32_t xirr;
> +    uint8_t pending_priority;
> +    uint8_t mfrr;
> +    qemu_irq output;
> +};

If you're exposing all of this, please fix coding style while you're at
it.

> +
> +struct ics_state {
> +    uint32_t nr_irqs;
> +    uint32_t offset;
> +    qemu_irq *qirqs;
> +    bool *islsi;
> +    struct ics_irq_state *irqs;
> +    struct icp_state *icp;
> +};

Shouldn't this be a device too?

> +
> +struct ics_irq_state {
> +    uint32_t server;
> +    uint8_t priority;
> +    uint8_t saved_priority;
> +#define XICS_STATUS_ASSERTED           0x1
> +#define XICS_STATUS_SENT               0x2
> +#define XICS_STATUS_REJECTED           0x4
> +#define XICS_STATUS_MASKED_PENDING     0x8
> +    uint8_t status;
> +};
>  
>  qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
>  void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
>  
> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler);
> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
> +void xics_common_reset(struct icp_state *icp);
> +
>  void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
>  
> +extern const VMStateDescription vmstate_icp_server;
> +extern const VMStateDescription vmstate_ics;

This is the wrong way of doing whatever you're trying to do.

Regards,

Anthony Liguori

> +
>  #endif /* __XICS_H__ */
> -- 
> 1.7.10.4
Anthony Liguori July 8, 2013, 6:24 p.m. UTC | #7
Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> On 06/27/2013 09:47 PM, David Gibson wrote:
>> On Thu, Jun 27, 2013 at 04:45:45PM +1000, Alexey Kardashevskiy wrote:
>>> Currently XICS interrupt controller is not a QEMU device. As we are going
>>> to support in-kernel emulated XICS which is a part of KVM, it make
>>> sense not to extend the existing XICS and have multiple KVM stub functions
>>> but to create yet another device and share pieces between fully emulated
>>> XICS and in-kernel XICS.
>> 
>> Hmm.  So, I think changing the xics to the qdev/qom framework is a
>> generally good idea.  But I'm not convinced its a good idea to have
>> different devices for the kernel and non-kernel xics.
>
> The idea came from Alex Graf, this is already done for openpic/openpic-kvm.
> The normal practice is to move ioctls to KVM to KVM code and provide empty
> stubs for non-KVM case. There were too many so having a separate xics-kvm
> is kind of help.

The way this should be modelled is:

XICSCommon
 -> XICS
 -> XICSKVM

With vmstate et al being part of XICSCommon.  See how the i8259 and
i8254 are modelled.

Regards,

Anthony Liguori


>
>
>> Won't that
>> prevent migrating from a system with a kernel xics to one without, or
>> vice versa?
>
> Mmm. Do we care much about that?...
> At the moment it is not supported that as VMStateDescription have different
> .name for xics and xics-kvm but easy to fix. And we do not pass a device to
> vmstate_register so that must be it.
>
>
>> 
>>>
>>> The rework includes:
>>> * port to QOM
>>> * made few functions public to use from in-kernel XICS implementation
>>> * made VMStateDescription public to be used for in-kernel XICS migration
>>> * move xics_system_init() to spapr.c, it tries creating fully-emulated
>>> XICS now and will try in-kernel XICS in upcoming patches.
>>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>>  hw/intc/xics.c        |  109 ++++++++++++++++++++++++++-----------------------
>>>  hw/ppc/spapr.c        |   28 +++++++++++++
>>>  include/hw/ppc/xics.h |   59 ++++++++++++++++++++++++--
>>>  3 files changed, 141 insertions(+), 55 deletions(-)
>>>
>>> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
>>> index 091912e..0e374c8 100644
>>> --- a/hw/intc/xics.c
>>> +++ b/hw/intc/xics.c
>>> @@ -34,13 +34,6 @@
>>>   * ICP: Presentation layer
>>>   */
>>>  
>>> -struct icp_server_state {
>>> -    uint32_t xirr;
>>> -    uint8_t pending_priority;
>>> -    uint8_t mfrr;
>>> -    qemu_irq output;
>>> -};
>>> -
>>>  #define XISR_MASK  0x00ffffff
>>>  #define CPPR_MASK  0xff000000
>>>  
>>> @@ -49,12 +42,6 @@ struct icp_server_state {
>>>  
>>>  struct ics_state;
>>>  
>>> -struct icp_state {
>>> -    long nr_servers;
>>> -    struct icp_server_state *ss;
>>> -    struct ics_state *ics;
>>> -};
>>> -
>>>  static void ics_reject(struct ics_state *ics, int nr);
>>>  static void ics_resend(struct ics_state *ics);
>>>  static void ics_eoi(struct ics_state *ics, int nr);
>>> @@ -171,27 +158,6 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
>>>  /*
>>>   * ICS: Source layer
>>>   */
>>> -
>>> -struct ics_irq_state {
>>> -    int server;
>>> -    uint8_t priority;
>>> -    uint8_t saved_priority;
>>> -#define XICS_STATUS_ASSERTED           0x1
>>> -#define XICS_STATUS_SENT               0x2
>>> -#define XICS_STATUS_REJECTED           0x4
>>> -#define XICS_STATUS_MASKED_PENDING     0x8
>>> -    uint8_t status;
>>> -};
>>> -
>>> -struct ics_state {
>>> -    int nr_irqs;
>>> -    int offset;
>>> -    qemu_irq *qirqs;
>>> -    bool *islsi;
>>> -    struct ics_irq_state *irqs;
>>> -    struct icp_state *icp;
>>> -};
>>> -
>>>  static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
>>>  {
>>>      return (nr >= ics->offset)
>>> @@ -506,9 +472,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>>>      rtas_st(rets, 0, 0); /* Success */
>>>  }
>>>  
>>> -static void xics_reset(void *opaque)
>>> +void xics_common_reset(struct icp_state *icp)
>> 
>> Why do you need to expose this interface?  Couldn't the caller use
>> qdev_reset(xics) just as easily?
>> 
>>>  {
>>> -    struct icp_state *icp = (struct icp_state *)opaque;
>>>      struct ics_state *ics = icp->ics;
>>>      int i;
>>>  
>>> @@ -527,7 +492,12 @@ static void xics_reset(void *opaque)
>>>      }
>>>  }
>>>  
>>> -void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>> +static void xics_reset(DeviceState *d)
>>> +{
>>> +    xics_common_reset(XICS(d));
>>> +}
>>> +
>>> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>>  {
>>>      CPUState *cs = CPU(cpu);
>>>      CPUPPCState *env = &cpu->env;
>>> @@ -551,37 +521,72 @@ void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>>      }
>>>  }
>>>  
>>> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
>>> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>> +{
>>> +    xics_common_cpu_setup(icp, cpu);
>>> +}
>>> +
>>> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler)
>>>  {
>>> -    struct icp_state *icp;
>>> -    struct ics_state *ics;
>>> +    struct ics_state *ics = icp->ics;
>>>  
>>> -    icp = g_malloc0(sizeof(*icp));
>>> -    icp->nr_servers = nr_servers;
>>>      icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
>>>  
>>>      ics = g_malloc0(sizeof(*ics));
>>> -    ics->nr_irqs = nr_irqs;
>>> +    ics->nr_irqs = icp->nr_irqs;
>>>      ics->offset = XICS_IRQ_BASE;
>>> -    ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
>>> -    ics->islsi = g_malloc0(nr_irqs * sizeof(bool));
>>> +    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(struct ics_irq_state));
>>> +    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
>>>  
>>>      icp->ics = ics;
>>>      ics->icp = icp;
>>>  
>>> -    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
>>> +    ics->qirqs = qemu_allocate_irqs(handler, ics, ics->nr_irqs);
>>> +}
>>>  
>>> -    spapr_register_hypercall(H_CPPR, h_cppr);
>>> -    spapr_register_hypercall(H_IPI, h_ipi);
>>> -    spapr_register_hypercall(H_XIRR, h_xirr);
>>> -    spapr_register_hypercall(H_EOI, h_eoi);
>>> +static void xics_realize(DeviceState *dev, Error **errp)
>>> +{
>>> +    struct icp_state *icp = XICS(dev);
>>> +
>>> +    xics_common_init(icp, ics_set_irq);
>>>  
>>>      spapr_rtas_register("ibm,set-xive", rtas_set_xive);
>>>      spapr_rtas_register("ibm,get-xive", rtas_get_xive);
>>>      spapr_rtas_register("ibm,int-off", rtas_int_off);
>>>      spapr_rtas_register("ibm,int-on", rtas_int_on);
>>>  
>>> -    qemu_register_reset(xics_reset, icp);
>>> +}
>>> +
>>> +static Property xics_properties[] = {
>>> +    DEFINE_PROP_UINT32("nr_servers", struct icp_state, nr_servers, -1),
>>> +    DEFINE_PROP_UINT32("nr_irqs", struct icp_state, nr_irqs, -1),
>>> +    DEFINE_PROP_END_OF_LIST(),
>>> +};
>>> +
>>> +static void xics_class_init(ObjectClass *oc, void *data)
>>> +{
>>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>>> +
>>> +    dc->realize = xics_realize;
>>> +    dc->props = xics_properties;
>>> +    dc->reset = xics_reset;
>>> +}
>>> +
>>> +static const TypeInfo xics_info = {
>>> +    .name          = TYPE_XICS,
>>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>>> +    .instance_size = sizeof(struct icp_state),
>>> +    .class_init    = xics_class_init,
>>> +};
>>> +
>>> +static void xics_register_types(void)
>>> +{
>>> +    spapr_register_hypercall(H_CPPR, h_cppr);
>>> +    spapr_register_hypercall(H_IPI, h_ipi);
>>> +    spapr_register_hypercall(H_XIRR, h_xirr);
>>> +    spapr_register_hypercall(H_EOI, h_eoi);
>>>  
>>> -    return icp;
>>> +    type_register_static(&xics_info);
>>>  }
>>> +
>>> +type_init(xics_register_types)
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index 38c29b7..def3505 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -719,6 +719,34 @@ static int spapr_vga_init(PCIBus *pci_bus)
>>>      }
>>>  }
>>>  
>>> +static struct icp_state *try_create_xics(const char *type, int nr_servers,
>>> +                                         int nr_irqs)
>>> +{
>>> +    DeviceState *dev;
>>> +
>>> +    dev = qdev_create(NULL, type);
>>> +    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
>>> +    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
>>> +    if (qdev_init(dev) < 0) {
>>> +        return NULL;
>> 
>> You could just use qdev_init_nofail() here to avoid the manual
>> handling of failures.
>> 
>>> +    }
>>> +
>>> +    return XICS(dev);
>>> +}
>>> +
>>> +static struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
>>> +{
>>> +    struct icp_state *icp = NULL;
>>> +
>>> +    icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
>>> +    if (!icp) {
>>> +        perror("Failed to create XICS\n");
>>> +        abort();
>>> +    }
>>> +
>>> +    return icp;
>>> +}
>>> +
>>>  /* pSeries LPAR / sPAPR hardware init */
>>>  static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>>  {
>>> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
>>> index 6bce042..3f72806 100644
>>> --- a/include/hw/ppc/xics.h
>>> +++ b/include/hw/ppc/xics.h
>>> @@ -27,15 +27,68 @@
>>>  #if !defined(__XICS_H__)
>>>  #define __XICS_H__
>>>  
>>> +#include "hw/sysbus.h"
>>> +
>>> +#define TYPE_XICS "xics"
>>> +#define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS)
>>> +
>>>  #define XICS_IPI        0x2
>>> -#define XICS_IRQ_BASE   0x10
>>> +#define XICS_BUID       0x1
>>> +#define XICS_IRQ_BASE   (XICS_BUID << 12)
>>> +
>>> +/*
>>> + * We currently only support one BUID which is our interrupt base
>>> + * (the kernel implementation supports more but we don't exploit
>>> + *  that yet)
>>> + */
>>>  
>>> -struct icp_state;
>>> +struct icp_state {
>>> +    /*< private >*/
>>> +    SysBusDevice parent_obj;
>>> +    /*< public >*/
>>> +    uint32_t nr_servers;
>>> +    uint32_t nr_irqs;
>>> +    struct icp_server_state *ss;
>>> +    struct ics_state *ics;
>>> +};
>>> +
>>> +struct icp_server_state {
>>> +    uint32_t xirr;
>>> +    uint8_t pending_priority;
>>> +    uint8_t mfrr;
>>> +    qemu_irq output;
>>> +};
>> 
>> The indivudual server_state and irq_state structures probably
>> shouldn't be exported.
>> 
>>> +struct ics_state {
>>> +    uint32_t nr_irqs;
>>> +    uint32_t offset;
>>> +    qemu_irq *qirqs;
>>> +    bool *islsi;
>>> +    struct ics_irq_state *irqs;
>>> +    struct icp_state *icp;
>>> +};
>>> +
>>> +struct ics_irq_state {
>>> +    uint32_t server;
>>> +    uint8_t priority;
>>> +    uint8_t saved_priority;
>>> +#define XICS_STATUS_ASSERTED           0x1
>>> +#define XICS_STATUS_SENT               0x2
>>> +#define XICS_STATUS_REJECTED           0x4
>>> +#define XICS_STATUS_MASKED_PENDING     0x8
>>> +    uint8_t status;
>>> +};
>>>  
>>>  qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
>>>  void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
>>>  
>>> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
>>> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler);
>>> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
>>> +void xics_common_reset(struct icp_state *icp);
>>> +
>>>  void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
>>>  
>>> +extern const VMStateDescription vmstate_icp_server;
>>> +extern const VMStateDescription vmstate_ics;
>>> +
>>>  #endif /* __XICS_H__ */
>> 
>
>
> -- 
> Alexey
Alexey Kardashevskiy July 9, 2013, 3:40 a.m. UTC | #8
On 07/09/2013 04:22 AM, Anthony Liguori wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> 
>> Currently XICS interrupt controller is not a QEMU device. As we are going
>> to support in-kernel emulated XICS which is a part of KVM, it make
>> sense not to extend the existing XICS and have multiple KVM stub functions
>> but to create yet another device and share pieces between fully emulated
>> XICS and in-kernel XICS.
>>
>> The rework includes:
>> * port to QOM
>> * made few functions public to use from in-kernel XICS implementation
>> * made VMStateDescription public to be used for in-kernel XICS migration
>> * move xics_system_init() to spapr.c, it tries creating fully-emulated
>> XICS now and will try in-kernel XICS in upcoming patches.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  hw/intc/xics.c        |  109 ++++++++++++++++++++++++++-----------------------
>>  hw/ppc/spapr.c        |   28 +++++++++++++
>>  include/hw/ppc/xics.h |   59 ++++++++++++++++++++++++--
>>  3 files changed, 141 insertions(+), 55 deletions(-)
>>
>> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
>> index 091912e..0e374c8 100644
>> --- a/hw/intc/xics.c
>> +++ b/hw/intc/xics.c
>> @@ -34,13 +34,6 @@
>>   * ICP: Presentation layer
>>   */
>>  
>> -struct icp_server_state {
>> -    uint32_t xirr;
>> -    uint8_t pending_priority;
>> -    uint8_t mfrr;
>> -    qemu_irq output;
>> -};
>> -
>>  #define XISR_MASK  0x00ffffff
>>  #define CPPR_MASK  0xff000000
>>  
>> @@ -49,12 +42,6 @@ struct icp_server_state {
>>  
>>  struct ics_state;
>>  
>> -struct icp_state {
>> -    long nr_servers;
>> -    struct icp_server_state *ss;
>> -    struct ics_state *ics;
>> -};
>> -
>>  static void ics_reject(struct ics_state *ics, int nr);
>>  static void ics_resend(struct ics_state *ics);
>>  static void ics_eoi(struct ics_state *ics, int nr);
>> @@ -171,27 +158,6 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
>>  /*
>>   * ICS: Source layer
>>   */
>> -
>> -struct ics_irq_state {
>> -    int server;
>> -    uint8_t priority;
>> -    uint8_t saved_priority;
>> -#define XICS_STATUS_ASSERTED           0x1
>> -#define XICS_STATUS_SENT               0x2
>> -#define XICS_STATUS_REJECTED           0x4
>> -#define XICS_STATUS_MASKED_PENDING     0x8
>> -    uint8_t status;
>> -};
>> -
>> -struct ics_state {
>> -    int nr_irqs;
>> -    int offset;
>> -    qemu_irq *qirqs;
>> -    bool *islsi;
>> -    struct ics_irq_state *irqs;
>> -    struct icp_state *icp;
>> -};
>> -
>>  static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
>>  {
>>      return (nr >= ics->offset)
>> @@ -506,9 +472,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>>      rtas_st(rets, 0, 0); /* Success */
>>  }
>>  
>> -static void xics_reset(void *opaque)
>> +void xics_common_reset(struct icp_state *icp)
>>  {
>> -    struct icp_state *icp = (struct icp_state *)opaque;
>>      struct ics_state *ics = icp->ics;
>>      int i;
>>  
>> @@ -527,7 +492,12 @@ static void xics_reset(void *opaque)
>>      }
>>  }
>>  
>> -void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>> +static void xics_reset(DeviceState *d)
>> +{
>> +    xics_common_reset(XICS(d));
>> +}
>> +
>> +void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>  {
>>      CPUState *cs = CPU(cpu);
>>      CPUPPCState *env = &cpu->env;
>> @@ -551,37 +521,72 @@ void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>>      }
>>  }
>>  
>> -struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
>> +void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
>> +{
>> +    xics_common_cpu_setup(icp, cpu);
>> +}
>> +
>> +void xics_common_init(struct icp_state *icp, qemu_irq_handler handler)
>>  {
>> -    struct icp_state *icp;
>> -    struct ics_state *ics;
>> +    struct ics_state *ics = icp->ics;
>>  
>> -    icp = g_malloc0(sizeof(*icp));
>> -    icp->nr_servers = nr_servers;
>>      icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
>>  
>>      ics = g_malloc0(sizeof(*ics));
>> -    ics->nr_irqs = nr_irqs;
>> +    ics->nr_irqs = icp->nr_irqs;
>>      ics->offset = XICS_IRQ_BASE;
>> -    ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
>> -    ics->islsi = g_malloc0(nr_irqs * sizeof(bool));
>> +    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(struct ics_irq_state));
>> +    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
>>  
>>      icp->ics = ics;
>>      ics->icp = icp;
>>  
>> -    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
>> +    ics->qirqs = qemu_allocate_irqs(handler, ics, ics->nr_irqs);
>> +}
>>  
>> -    spapr_register_hypercall(H_CPPR, h_cppr);
>> -    spapr_register_hypercall(H_IPI, h_ipi);
>> -    spapr_register_hypercall(H_XIRR, h_xirr);
>> -    spapr_register_hypercall(H_EOI, h_eoi);
>> +static void xics_realize(DeviceState *dev, Error **errp)
>> +{
>> +    struct icp_state *icp = XICS(dev);
>> +
>> +    xics_common_init(icp, ics_set_irq);
>>  
>>      spapr_rtas_register("ibm,set-xive", rtas_set_xive);
>>      spapr_rtas_register("ibm,get-xive", rtas_get_xive);
>>      spapr_rtas_register("ibm,int-off", rtas_int_off);
>>      spapr_rtas_register("ibm,int-on", rtas_int_on);
>>  
>> -    qemu_register_reset(xics_reset, icp);
>> +}
>> +
>> +static Property xics_properties[] = {
>> +    DEFINE_PROP_UINT32("nr_servers", struct icp_state, nr_servers, -1),
>> +    DEFINE_PROP_UINT32("nr_irqs", struct icp_state, nr_irqs, -1),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void xics_class_init(ObjectClass *oc, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>> +
>> +    dc->realize = xics_realize;
>> +    dc->props = xics_properties;
>> +    dc->reset = xics_reset;
>> +}
>> +
>> +static const TypeInfo xics_info = {
>> +    .name          = TYPE_XICS,
>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>> +    .instance_size = sizeof(struct icp_state),
>> +    .class_init    = xics_class_init,
>> +};
>> +
>> +static void xics_register_types(void)
>> +{
>> +    spapr_register_hypercall(H_CPPR, h_cppr);
>> +    spapr_register_hypercall(H_IPI, h_ipi);
>> +    spapr_register_hypercall(H_XIRR, h_xirr);
>> +    spapr_register_hypercall(H_EOI, h_eoi);
>>  
>> -    return icp;
>> +    type_register_static(&xics_info);
>>  }
>> +
>> +type_init(xics_register_types)
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 38c29b7..def3505 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -719,6 +719,34 @@ static int spapr_vga_init(PCIBus *pci_bus)
>>      }
>>  }
>>  
>> +static struct icp_state *try_create_xics(const char *type, int nr_servers,
>> +                                         int nr_irqs)
>> +{
>> +    DeviceState *dev;
>> +
>> +    dev = qdev_create(NULL, type);
>> +    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
>> +    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
>> +    if (qdev_init(dev) < 0) {
>> +        return NULL;
>> +    }
>> +
>> +    return XICS(dev);
>> +}
>> +
>> +static struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
>> +{
>> +    struct icp_state *icp = NULL;
>> +
>> +    icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
>> +    if (!icp) {
>> +        perror("Failed to create XICS\n");
>> +        abort();
>> +    }
>> +
>> +    return icp;
>> +}
>> +
>>  /* pSeries LPAR / sPAPR hardware init */
>>  static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>  {
>> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
>> index 6bce042..3f72806 100644
>> --- a/include/hw/ppc/xics.h
>> +++ b/include/hw/ppc/xics.h
>> @@ -27,15 +27,68 @@
>>  #if !defined(__XICS_H__)
>>  #define __XICS_H__
>>  
>> +#include "hw/sysbus.h"
>> +
>> +#define TYPE_XICS "xics"
>> +#define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS)
>> +
>>  #define XICS_IPI        0x2
>> -#define XICS_IRQ_BASE   0x10
>> +#define XICS_BUID       0x1
>> +#define XICS_IRQ_BASE   (XICS_BUID << 12)
>> +
>> +/*
>> + * We currently only support one BUID which is our interrupt base
>> + * (the kernel implementation supports more but we don't exploit
>> + *  that yet)
>> + */
>>  
>> -struct icp_state;
>> +struct icp_state {
>> +    /*< private >*/
>> +    SysBusDevice parent_obj;
>> +    /*< public >*/
>> +    uint32_t nr_servers;
>> +    uint32_t nr_irqs;
>> +    struct icp_server_state *ss;
>> +    struct ics_state *ics;
>> +};
>> +
>> +struct icp_server_state {
>> +    uint32_t xirr;
>> +    uint8_t pending_priority;
>> +    uint8_t mfrr;
>> +    qemu_irq output;
>> +};
> 
> If you're exposing all of this, please fix coding style while you're at
> it.

>> +
>> +struct ics_state {
>> +    uint32_t nr_irqs;
>> +    uint32_t offset;
>> +    qemu_irq *qirqs;
>> +    bool *islsi;
>> +    struct ics_irq_state *irqs;
>> +    struct icp_state *icp;
>> +};
> 
> Shouldn't this be a device too?

No, why? It is a per CPU state of XICS controller, never exists apart from
XICS.
Benjamin Herrenschmidt July 9, 2013, 4:48 a.m. UTC | #9
On Tue, 2013-07-09 at 13:40 +1000, Alexey Kardashevskiy wrote:
> No, why? It is a per CPU state of XICS controller, never exists apart
> from XICS.

ICP is. ICS is  ... different but can mostly be considered to be the
XICS itself.

Anthony, we could be completely anal about it and create a gigantic
cathedral of devices or just be a bit realistic and do something simpler
that has the exact same functionality :)

Basically, in HW the layout of the interrupt network is:

 - One ICP per processor thread (the "presenter"). This contains the
registers to fetch a pending interrupt (ack), EOI, and control the
processor priority.

 - One ICS per logical source of interrupts (ie, one per PCI host
bridge, and a few others here or there). This contains the per-interrupt
source configuration (target processor(s), priority, mask) and the
per-interrupt internal state.

Under PAPR, there is a single "virtual" ICS ... somewhat (it's a bit
oddball what pHyp does here, arguably there are two but we can ignore
that distinction). There is no register level access. A pair of firmware
(RTAS) calls is used to configure each virtual interrupt.

So our model here is somewhat the same. We have one ICS in the emulated
XICS which arguably *is* the emulated XICS, there's no point making it a
separate "device", that would just be gross, and each VCPU has an
associated ICP.

Cheers,
Ben.
Anthony Liguori July 9, 2013, 1:58 p.m. UTC | #10
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Tue, 2013-07-09 at 13:40 +1000, Alexey Kardashevskiy wrote:
>> No, why? It is a per CPU state of XICS controller, never exists apart
>> from XICS.
>
> ICP is. ICS is  ... different but can mostly be considered to be the
> XICS itself.
>
> Anthony, we could be completely anal about it and create a gigantic
> cathedral of devices or just be a bit realistic and do something simpler
> that has the exact same functionality :)

There's very little complexity in making something a device.  It's just
a matter of sticking a DeviceState member in the struct, changing the
way the object is created (object_new vs. malloc), and adding a
TypeInfo.

There's a very good reason to have things be devices too.  You can only
control the section naming of devices for live migration.  The only way
to set compatibility properties for live migration is by having device
properties too.

You haven't dealt with these problems yet, but you will, and doing the
work up front means that you don't have to break migration once in order
to keep it compatible in the future.

> Basically, in HW the layout of the interrupt network is:
>
>  - One ICP per processor thread (the "presenter"). This contains the
> registers to fetch a pending interrupt (ack), EOI, and control the
> processor priority.
>
>  - One ICS per logical source of interrupts (ie, one per PCI host
> bridge, and a few others here or there). This contains the per-interrupt
> source configuration (target processor(s), priority, mask) and the
> per-interrupt internal state.

This sounds an awful lot like the relationship between the I/O APIC(s)
and the local APICs FWIW.

> Under PAPR, there is a single "virtual" ICS ... somewhat (it's a bit
> oddball what pHyp does here, arguably there are two but we can ignore
> that distinction). There is no register level access. A pair of firmware
> (RTAS) calls is used to configure each virtual interrupt.
>
> So our model here is somewhat the same. We have one ICS in the emulated
> XICS which arguably *is* the emulated XICS, there's no point making it a
> separate "device", that would just be gross, and each VCPU has an
> associated ICP.

There's nothing gross about making the things that are devices devices.

Regards,

Anthony Liguori

>
> Cheers,
> Ben.
Alexey Kardashevskiy July 10, 2013, 3:06 a.m. UTC | #11
On 07/09/2013 11:58 PM, Anthony Liguori wrote:
> Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
> 
>> On Tue, 2013-07-09 at 13:40 +1000, Alexey Kardashevskiy wrote:
>>> No, why? It is a per CPU state of XICS controller, never exists apart
>>> from XICS.
>>
>> ICP is. ICS is  ... different but can mostly be considered to be the
>> XICS itself.
>>
>> Anthony, we could be completely anal about it and create a gigantic
>> cathedral of devices or just be a bit realistic and do something simpler
>> that has the exact same functionality :)
> 
> There's very little complexity in making something a device.  It's just
> a matter of sticking a DeviceState member in the struct, changing the
> way the object is created (object_new vs. malloc), and adding a
> TypeInfo.
> 
> There's a very good reason to have things be devices too.  You can only
> control the section naming of devices for live migration.  The only way
> to set compatibility properties for live migration is by having device
> properties too.
> 
> You haven't dealt with these problems yet, but you will, and doing the
> work up front means that you don't have to break migration once in order
> to keep it compatible in the future.



I have got a problem right now. I need to have 2 devices - xics and
xics-kvm, give them the same VMState properties and migration names and
have different pre_load/post_load (btw the whole point of separating
xics-kvm from xics).

How do I solve this without anyone saying that I am doing terribly a wrong
thing?

I already asked this in "[Qemu-devel] [PATCH 05/17] pseries: savevm support
for XICS interrupt controller" but have not seen any response yet. Thank you.




>> Basically, in HW the layout of the interrupt network is:
>>
>>  - One ICP per processor thread (the "presenter"). This contains the
>> registers to fetch a pending interrupt (ack), EOI, and control the
>> processor priority.
>>
>>  - One ICS per logical source of interrupts (ie, one per PCI host
>> bridge, and a few others here or there). This contains the per-interrupt
>> source configuration (target processor(s), priority, mask) and the
>> per-interrupt internal state.
> 
> This sounds an awful lot like the relationship between the I/O APIC(s)
> and the local APICs FWIW.
> 
>> Under PAPR, there is a single "virtual" ICS ... somewhat (it's a bit
>> oddball what pHyp does here, arguably there are two but we can ignore
>> that distinction). There is no register level access. A pair of firmware
>> (RTAS) calls is used to configure each virtual interrupt.
>>
>> So our model here is somewhat the same. We have one ICS in the emulated
>> XICS which arguably *is* the emulated XICS, there's no point making it a
>> separate "device", that would just be gross, and each VCPU has an
>> associated ICP.
> 
> There's nothing gross about making the things that are devices devices.
Benjamin Herrenschmidt July 10, 2013, 3:26 a.m. UTC | #12
On Tue, 2013-07-09 at 08:58 -0500, Anthony Liguori wrote:
> There's nothing gross about making the things that are devices
> devices.

But there is no such thing as the XICS ...

The "XICS" is just the combination of ICP's and ICS... so XICS *is* the
device...

Cheers,
Ben.
Anthony Liguori July 10, 2013, 12:09 p.m. UTC | #13
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Tue, 2013-07-09 at 08:58 -0500, Anthony Liguori wrote:
>> There's nothing gross about making the things that are devices
>> devices.
>
> But there is no such thing as the XICS ...
>
> The "XICS" is just the combination of ICP's and ICS... so XICS *is* the
> device...

Then you have an XICS device which has a single ICP and multiple ICSs
as child devices.

Regards,

Anthony Liguori

>
> Cheers,
> Ben.
diff mbox

Patch

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 091912e..0e374c8 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -34,13 +34,6 @@ 
  * ICP: Presentation layer
  */
 
-struct icp_server_state {
-    uint32_t xirr;
-    uint8_t pending_priority;
-    uint8_t mfrr;
-    qemu_irq output;
-};
-
 #define XISR_MASK  0x00ffffff
 #define CPPR_MASK  0xff000000
 
@@ -49,12 +42,6 @@  struct icp_server_state {
 
 struct ics_state;
 
-struct icp_state {
-    long nr_servers;
-    struct icp_server_state *ss;
-    struct ics_state *ics;
-};
-
 static void ics_reject(struct ics_state *ics, int nr);
 static void ics_resend(struct ics_state *ics);
 static void ics_eoi(struct ics_state *ics, int nr);
@@ -171,27 +158,6 @@  static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
 /*
  * ICS: Source layer
  */
-
-struct ics_irq_state {
-    int server;
-    uint8_t priority;
-    uint8_t saved_priority;
-#define XICS_STATUS_ASSERTED           0x1
-#define XICS_STATUS_SENT               0x2
-#define XICS_STATUS_REJECTED           0x4
-#define XICS_STATUS_MASKED_PENDING     0x8
-    uint8_t status;
-};
-
-struct ics_state {
-    int nr_irqs;
-    int offset;
-    qemu_irq *qirqs;
-    bool *islsi;
-    struct ics_irq_state *irqs;
-    struct icp_state *icp;
-};
-
 static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
 {
     return (nr >= ics->offset)
@@ -506,9 +472,8 @@  static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     rtas_st(rets, 0, 0); /* Success */
 }
 
-static void xics_reset(void *opaque)
+void xics_common_reset(struct icp_state *icp)
 {
-    struct icp_state *icp = (struct icp_state *)opaque;
     struct ics_state *ics = icp->ics;
     int i;
 
@@ -527,7 +492,12 @@  static void xics_reset(void *opaque)
     }
 }
 
-void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
+static void xics_reset(DeviceState *d)
+{
+    xics_common_reset(XICS(d));
+}
+
+void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
@@ -551,37 +521,72 @@  void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
     }
 }
 
-struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
+void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
+{
+    xics_common_cpu_setup(icp, cpu);
+}
+
+void xics_common_init(struct icp_state *icp, qemu_irq_handler handler)
 {
-    struct icp_state *icp;
-    struct ics_state *ics;
+    struct ics_state *ics = icp->ics;
 
-    icp = g_malloc0(sizeof(*icp));
-    icp->nr_servers = nr_servers;
     icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
 
     ics = g_malloc0(sizeof(*ics));
-    ics->nr_irqs = nr_irqs;
+    ics->nr_irqs = icp->nr_irqs;
     ics->offset = XICS_IRQ_BASE;
-    ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
-    ics->islsi = g_malloc0(nr_irqs * sizeof(bool));
+    ics->irqs = g_malloc0(ics->nr_irqs * sizeof(struct ics_irq_state));
+    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
 
     icp->ics = ics;
     ics->icp = icp;
 
-    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
+    ics->qirqs = qemu_allocate_irqs(handler, ics, ics->nr_irqs);
+}
 
-    spapr_register_hypercall(H_CPPR, h_cppr);
-    spapr_register_hypercall(H_IPI, h_ipi);
-    spapr_register_hypercall(H_XIRR, h_xirr);
-    spapr_register_hypercall(H_EOI, h_eoi);
+static void xics_realize(DeviceState *dev, Error **errp)
+{
+    struct icp_state *icp = XICS(dev);
+
+    xics_common_init(icp, ics_set_irq);
 
     spapr_rtas_register("ibm,set-xive", rtas_set_xive);
     spapr_rtas_register("ibm,get-xive", rtas_get_xive);
     spapr_rtas_register("ibm,int-off", rtas_int_off);
     spapr_rtas_register("ibm,int-on", rtas_int_on);
 
-    qemu_register_reset(xics_reset, icp);
+}
+
+static Property xics_properties[] = {
+    DEFINE_PROP_UINT32("nr_servers", struct icp_state, nr_servers, -1),
+    DEFINE_PROP_UINT32("nr_irqs", struct icp_state, nr_irqs, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xics_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = xics_realize;
+    dc->props = xics_properties;
+    dc->reset = xics_reset;
+}
+
+static const TypeInfo xics_info = {
+    .name          = TYPE_XICS,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(struct icp_state),
+    .class_init    = xics_class_init,
+};
+
+static void xics_register_types(void)
+{
+    spapr_register_hypercall(H_CPPR, h_cppr);
+    spapr_register_hypercall(H_IPI, h_ipi);
+    spapr_register_hypercall(H_XIRR, h_xirr);
+    spapr_register_hypercall(H_EOI, h_eoi);
 
-    return icp;
+    type_register_static(&xics_info);
 }
+
+type_init(xics_register_types)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 38c29b7..def3505 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -719,6 +719,34 @@  static int spapr_vga_init(PCIBus *pci_bus)
     }
 }
 
+static struct icp_state *try_create_xics(const char *type, int nr_servers,
+                                         int nr_irqs)
+{
+    DeviceState *dev;
+
+    dev = qdev_create(NULL, type);
+    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
+    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
+    if (qdev_init(dev) < 0) {
+        return NULL;
+    }
+
+    return XICS(dev);
+}
+
+static struct icp_state *xics_system_init(int nr_servers, int nr_irqs)
+{
+    struct icp_state *icp = NULL;
+
+    icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
+    if (!icp) {
+        perror("Failed to create XICS\n");
+        abort();
+    }
+
+    return icp;
+}
+
 /* pSeries LPAR / sPAPR hardware init */
 static void ppc_spapr_init(QEMUMachineInitArgs *args)
 {
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 6bce042..3f72806 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -27,15 +27,68 @@ 
 #if !defined(__XICS_H__)
 #define __XICS_H__
 
+#include "hw/sysbus.h"
+
+#define TYPE_XICS "xics"
+#define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS)
+
 #define XICS_IPI        0x2
-#define XICS_IRQ_BASE   0x10
+#define XICS_BUID       0x1
+#define XICS_IRQ_BASE   (XICS_BUID << 12)
+
+/*
+ * We currently only support one BUID which is our interrupt base
+ * (the kernel implementation supports more but we don't exploit
+ *  that yet)
+ */
 
-struct icp_state;
+struct icp_state {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    uint32_t nr_servers;
+    uint32_t nr_irqs;
+    struct icp_server_state *ss;
+    struct ics_state *ics;
+};
+
+struct icp_server_state {
+    uint32_t xirr;
+    uint8_t pending_priority;
+    uint8_t mfrr;
+    qemu_irq output;
+};
+
+struct ics_state {
+    uint32_t nr_irqs;
+    uint32_t offset;
+    qemu_irq *qirqs;
+    bool *islsi;
+    struct ics_irq_state *irqs;
+    struct icp_state *icp;
+};
+
+struct ics_irq_state {
+    uint32_t server;
+    uint8_t priority;
+    uint8_t saved_priority;
+#define XICS_STATUS_ASSERTED           0x1
+#define XICS_STATUS_SENT               0x2
+#define XICS_STATUS_REJECTED           0x4
+#define XICS_STATUS_MASKED_PENDING     0x8
+    uint8_t status;
+};
 
 qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
 void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
 
-struct icp_state *xics_system_init(int nr_servers, int nr_irqs);
+void xics_common_init(struct icp_state *icp, qemu_irq_handler handler);
+void xics_common_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
+void xics_common_reset(struct icp_state *icp);
+
 void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
 
+extern const VMStateDescription vmstate_icp_server;
+extern const VMStateDescription vmstate_ics;
+
 #endif /* __XICS_H__ */