diff mbox series

[v5,15/36] spapr: introdude a new machine IRQ backend for XIVE

Message ID 20181116105729.23240-16-clg@kaod.org
State New
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater Nov. 16, 2018, 10:57 a.m. UTC
The XIVE IRQ backend uses the same layout as the new XICS backend but
covers the full range of the IRQ number space. The IRQ numbers for the
CPU IPIs are allocated at the bottom of this space, below 4K, to
preserve compatibility with XICS which does not use that range.

This should be enough given that the maximum number of CPUs is 1024
for the sPAPR machine under QEMU. For the record, the biggest POWER8
or POWER9 system has a maximum of 1536 HW threads (16 sockets, 192
cores, SMT8).

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/spapr.h     |   2 +
 include/hw/ppc/spapr_irq.h |   7 ++-
 hw/ppc/spapr.c             |   2 +-
 hw/ppc/spapr_irq.c         | 119 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 124 insertions(+), 6 deletions(-)

Comments

David Gibson Nov. 28, 2018, 3:28 a.m. UTC | #1
On Fri, Nov 16, 2018 at 11:57:08AM +0100, Cédric Le Goater wrote:
> The XIVE IRQ backend uses the same layout as the new XICS backend but
> covers the full range of the IRQ number space. The IRQ numbers for the
> CPU IPIs are allocated at the bottom of this space, below 4K, to
> preserve compatibility with XICS which does not use that range.
> 
> This should be enough given that the maximum number of CPUs is 1024
> for the sPAPR machine under QEMU. For the record, the biggest POWER8
> or POWER9 system has a maximum of 1536 HW threads (16 sockets, 192
> cores, SMT8).
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/spapr.h     |   2 +
>  include/hw/ppc/spapr_irq.h |   7 ++-
>  hw/ppc/spapr.c             |   2 +-
>  hw/ppc/spapr_irq.c         | 119 ++++++++++++++++++++++++++++++++++++-
>  4 files changed, 124 insertions(+), 6 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 6279711fe8f7..1fbc2663e06c 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -16,6 +16,7 @@ typedef struct sPAPREventLogEntry sPAPREventLogEntry;
>  typedef struct sPAPREventSource sPAPREventSource;
>  typedef struct sPAPRPendingHPT sPAPRPendingHPT;
>  typedef struct ICSState ICSState;
> +typedef struct sPAPRXive sPAPRXive;
>  
>  #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
>  #define SPAPR_ENTRY_POINT       0x100
> @@ -175,6 +176,7 @@ struct sPAPRMachineState {
>      const char *icp_type;
>      int32_t irq_map_nr;
>      unsigned long *irq_map;
> +    sPAPRXive  *xive;
>  
>      bool cmd_line_caps[SPAPR_CAP_NUM];
>      sPAPRCapabilities def, eff, mig;
> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> index 0e9229bf219e..c854ae527808 100644
> --- a/include/hw/ppc/spapr_irq.h
> +++ b/include/hw/ppc/spapr_irq.h
> @@ -13,6 +13,7 @@
>  /*
>   * IRQ range offsets per device type
>   */
> +#define SPAPR_IRQ_IPI        0x0
>  #define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
>  #define SPAPR_IRQ_HOTPLUG    0x1001
>  #define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
> @@ -33,7 +34,8 @@ typedef struct sPAPRIrq {
>      uint32_t    nr_irqs;
>      uint32_t    nr_msis;
>  
> -    void (*init)(sPAPRMachineState *spapr, int nr_irqs, Error **errp);
> +    void (*init)(sPAPRMachineState *spapr, int nr_irqs, int nr_servers,
> +                 Error **errp);
>      int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
>      void (*free)(sPAPRMachineState *spapr, int irq, int num);
>      qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
> @@ -42,8 +44,9 @@ typedef struct sPAPRIrq {
>  
>  extern sPAPRIrq spapr_irq_xics;
>  extern sPAPRIrq spapr_irq_xics_legacy;
> +extern sPAPRIrq spapr_irq_xive;
>  
> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp);
> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp);

I don't see why nr_servers needs to become a parameter, since it can
be derived from spapr within this routine.

>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
>  void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
>  qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index e470efe7993c..9f8c19e56e7a 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2594,7 +2594,7 @@ static void spapr_machine_init(MachineState *machine)
>      spapr_set_vsmt_mode(spapr, &error_fatal);
>  
>      /* Set up Interrupt Controller before we create the VCPUs */
> -    spapr_irq_init(spapr, &error_fatal);
> +    spapr_irq_init(spapr, xics_max_server_number(spapr), &error_fatal);

We should rename xics_max_server_number() since it's no longer xics
specific.

>      /* Set up containers for ibm,client-architecture-support negotiated options
>       */
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index bac450ffff23..2569ae1bc7f8 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -12,6 +12,7 @@
>  #include "qemu/error-report.h"
>  #include "qapi/error.h"
>  #include "hw/ppc/spapr.h"
> +#include "hw/ppc/spapr_xive.h"
>  #include "hw/ppc/xics.h"
>  #include "sysemu/kvm.h"
>  
> @@ -91,7 +92,7 @@ error:
>  }
>  
>  static void spapr_irq_init_xics(sPAPRMachineState *spapr, int nr_irqs,
> -                                Error **errp)
> +                                int nr_servers, Error **errp)
>  {
>      MachineState *machine = MACHINE(spapr);
>      Error *local_err = NULL;
> @@ -204,10 +205,122 @@ sPAPRIrq spapr_irq_xics = {
>      .print_info  = spapr_irq_print_info_xics,
>  };
>  
> + /*
> + * XIVE IRQ backend.
> + */
> +static sPAPRXive *spapr_xive_create(sPAPRMachineState *spapr,
> +                                    const char *type_xive, int nr_irqs,
> +                                    int nr_servers, Error **errp)
> +{
> +    sPAPRXive *xive;
> +    Error *local_err = NULL;
> +    Object *obj;
> +    uint32_t nr_ends = nr_servers << 3; /* 8 priority ENDs per CPU */
> +    int i;
> +
> +    obj = object_new(type_xive);

What's the reason for making the type a parameter, rather than just
using the #define here.

> +    object_property_set_int(obj, nr_irqs, "nr-irqs", &error_abort);
> +    object_property_set_int(obj, nr_ends, "nr-ends", &error_abort);

This is still within the sPAPR code, and you have a pointer to the
MachineState, so I don't see why you could't just derive nr_irqs and
nr_servers from that, rather than having them passed in.

> +    object_property_set_bool(obj, true, "realized", &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return NULL;
> +    }
> +    qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());

Whereas the XiveSource and XiveRouter I think make more sense as
"device components" rather than SysBusDevice subclasses, I think it
*does* make sense for the PAPR-XIVE object to be a full fledged
SysBusDevice.

And for that reason, I think it makes more sense to create it with
qdev_create(), which should avoid having to manually fiddle with the
parent bus.

> +    xive = SPAPR_XIVE(obj);
> +
> +    /* Enable the CPU IPIs */
> +    for (i = 0; i < nr_servers; ++i) {
> +        spapr_xive_irq_enable(xive, SPAPR_IRQ_IPI + i, false);

This comment possibly belonged on an earlier patch.  I don't love the
"..._enable" name - to me that suggests something runtime rather than
configuration time.  A better option isn't quickly occurring to me
though :/.

> +    }
> +
> +    return xive;
> +}
> +
> +static void spapr_irq_init_xive(sPAPRMachineState *spapr, int nr_irqs,
> +                                int nr_servers, Error **errp)
> +{
> +    MachineState *machine = MACHINE(spapr);
> +    Error *local_err = NULL;
> +
> +    /* KVM XIVE support */
> +    if (kvm_enabled()) {
> +        if (machine_kernel_irqchip_required(machine)) {
> +            error_setg(errp, "kernel_irqchip requested. no XIVE support");
> +            return;
> +        }
> +    }
> +
> +    /* QEMU XIVE support */
> +    spapr->xive = spapr_xive_create(spapr, TYPE_SPAPR_XIVE, nr_irqs, nr_servers,
> +                                    &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +}
> +
> +static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
> +                                Error **errp)
> +{
> +    if (!spapr_xive_irq_enable(spapr->xive, irq, lsi)) {
> +        error_setg(errp, "IRQ %d is invalid", irq);
> +        return -1;
> +    }
> +    return 0;
> +}
> +
> +static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
> +{
> +    int i;
> +
> +    for (i = irq; i < irq + num; ++i) {
> +        spapr_xive_irq_disable(spapr->xive, i);
> +    }
> +}
> +
> +static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
> +{
> +    return spapr_xive_qirq(spapr->xive, irq);
> +}
> +
> +static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
> +                                      Monitor *mon)
> +{
> +    CPUState *cs;
> +
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +
> +        xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
> +    }
> +
> +    spapr_xive_pic_print_info(spapr->xive, mon);

Any reason the info dumping routines are split into two?

> +}
> +
> +/*
> + * XIVE uses the full IRQ number space. Set it to 8K to be compatible
> + * with XICS.
> + */
> +
> +#define SPAPR_IRQ_XIVE_NR_IRQS     0x2000
> +#define SPAPR_IRQ_XIVE_NR_MSIS     (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
> +
> +sPAPRIrq spapr_irq_xive = {
> +    .nr_irqs     = SPAPR_IRQ_XIVE_NR_IRQS,
> +    .nr_msis     = SPAPR_IRQ_XIVE_NR_MSIS,
> +
> +    .init        = spapr_irq_init_xive,
> +    .claim       = spapr_irq_claim_xive,
> +    .free        = spapr_irq_free_xive,
> +    .qirq        = spapr_qirq_xive,
> +    .print_info  = spapr_irq_print_info_xive,
> +};
> +
>  /*
>   * sPAPR IRQ frontend routines for devices
>   */
> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp)
>  {
>      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>  
> @@ -216,7 +329,7 @@ void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
>          spapr_irq_msi_init(spapr, smc->irq->nr_msis);
>      }
>  
> -    smc->irq->init(spapr, smc->irq->nr_irqs, errp);
> +    smc->irq->init(spapr, smc->irq->nr_irqs, nr_servers, errp);
>  }
>  
>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp)
Cédric Le Goater Nov. 28, 2018, 5:16 p.m. UTC | #2
On 11/28/18 4:28 AM, David Gibson wrote:
> On Fri, Nov 16, 2018 at 11:57:08AM +0100, Cédric Le Goater wrote:
>> The XIVE IRQ backend uses the same layout as the new XICS backend but
>> covers the full range of the IRQ number space. The IRQ numbers for the
>> CPU IPIs are allocated at the bottom of this space, below 4K, to
>> preserve compatibility with XICS which does not use that range.
>>
>> This should be enough given that the maximum number of CPUs is 1024
>> for the sPAPR machine under QEMU. For the record, the biggest POWER8
>> or POWER9 system has a maximum of 1536 HW threads (16 sockets, 192
>> cores, SMT8).
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  include/hw/ppc/spapr.h     |   2 +
>>  include/hw/ppc/spapr_irq.h |   7 ++-
>>  hw/ppc/spapr.c             |   2 +-
>>  hw/ppc/spapr_irq.c         | 119 ++++++++++++++++++++++++++++++++++++-
>>  4 files changed, 124 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 6279711fe8f7..1fbc2663e06c 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -16,6 +16,7 @@ typedef struct sPAPREventLogEntry sPAPREventLogEntry;
>>  typedef struct sPAPREventSource sPAPREventSource;
>>  typedef struct sPAPRPendingHPT sPAPRPendingHPT;
>>  typedef struct ICSState ICSState;
>> +typedef struct sPAPRXive sPAPRXive;
>>  
>>  #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
>>  #define SPAPR_ENTRY_POINT       0x100
>> @@ -175,6 +176,7 @@ struct sPAPRMachineState {
>>      const char *icp_type;
>>      int32_t irq_map_nr;
>>      unsigned long *irq_map;
>> +    sPAPRXive  *xive;
>>  
>>      bool cmd_line_caps[SPAPR_CAP_NUM];
>>      sPAPRCapabilities def, eff, mig;
>> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
>> index 0e9229bf219e..c854ae527808 100644
>> --- a/include/hw/ppc/spapr_irq.h
>> +++ b/include/hw/ppc/spapr_irq.h
>> @@ -13,6 +13,7 @@
>>  /*
>>   * IRQ range offsets per device type
>>   */
>> +#define SPAPR_IRQ_IPI        0x0
>>  #define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
>>  #define SPAPR_IRQ_HOTPLUG    0x1001
>>  #define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
>> @@ -33,7 +34,8 @@ typedef struct sPAPRIrq {
>>      uint32_t    nr_irqs;
>>      uint32_t    nr_msis;
>>  
>> -    void (*init)(sPAPRMachineState *spapr, int nr_irqs, Error **errp);
>> +    void (*init)(sPAPRMachineState *spapr, int nr_irqs, int nr_servers,
>> +                 Error **errp);
>>      int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
>>      void (*free)(sPAPRMachineState *spapr, int irq, int num);
>>      qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
>> @@ -42,8 +44,9 @@ typedef struct sPAPRIrq {
>>  
>>  extern sPAPRIrq spapr_irq_xics;
>>  extern sPAPRIrq spapr_irq_xics_legacy;
>> +extern sPAPRIrq spapr_irq_xive;
>>  
>> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp);
>> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp);
> 
> I don't see why nr_servers needs to become a parameter, since it can
> be derived from spapr within this routine.

ok. This is true. We can use directly xics_max_server_number(spapr).

>>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
>>  void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
>>  qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index e470efe7993c..9f8c19e56e7a 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2594,7 +2594,7 @@ static void spapr_machine_init(MachineState *machine)
>>      spapr_set_vsmt_mode(spapr, &error_fatal);
>>  
>>      /* Set up Interrupt Controller before we create the VCPUs */
>> -    spapr_irq_init(spapr, &error_fatal);
>> +    spapr_irq_init(spapr, xics_max_server_number(spapr), &error_fatal);
> 
> We should rename xics_max_server_number() since it's no longer xics
> specific.

yes.

>>      /* Set up containers for ibm,client-architecture-support negotiated options
>>       */
>> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
>> index bac450ffff23..2569ae1bc7f8 100644
>> --- a/hw/ppc/spapr_irq.c
>> +++ b/hw/ppc/spapr_irq.c
>> @@ -12,6 +12,7 @@
>>  #include "qemu/error-report.h"
>>  #include "qapi/error.h"
>>  #include "hw/ppc/spapr.h"
>> +#include "hw/ppc/spapr_xive.h"
>>  #include "hw/ppc/xics.h"
>>  #include "sysemu/kvm.h"
>>  
>> @@ -91,7 +92,7 @@ error:
>>  }
>>  
>>  static void spapr_irq_init_xics(sPAPRMachineState *spapr, int nr_irqs,
>> -                                Error **errp)
>> +                                int nr_servers, Error **errp)
>>  {
>>      MachineState *machine = MACHINE(spapr);
>>      Error *local_err = NULL;
>> @@ -204,10 +205,122 @@ sPAPRIrq spapr_irq_xics = {
>>      .print_info  = spapr_irq_print_info_xics,
>>  };
>>  
>> + /*
>> + * XIVE IRQ backend.
>> + */
>> +static sPAPRXive *spapr_xive_create(sPAPRMachineState *spapr,
>> +                                    const char *type_xive, int nr_irqs,
>> +                                    int nr_servers, Error **errp)
>> +{
>> +    sPAPRXive *xive;
>> +    Error *local_err = NULL;
>> +    Object *obj;
>> +    uint32_t nr_ends = nr_servers << 3; /* 8 priority ENDs per CPU */
>> +    int i;
>> +
>> +    obj = object_new(type_xive);
> 
> What's the reason for making the type a parameter, rather than just
> using the #define here.

KVM.

>> +    object_property_set_int(obj, nr_irqs, "nr-irqs", &error_abort);
>> +    object_property_set_int(obj, nr_ends, "nr-ends", &error_abort);
> 
> This is still within the sPAPR code, and you have a pointer to the
> MachineState, so I don't see why you could't just derive nr_irqs and
> nr_servers from that, rather than having them passed in.

for nr_servers I agree. nr_irqs comes from the machine class and it will
not make any sense using the machine class in the init routine of the
'dual' sPAPR IRQ backend supporting both modes. See patch 34 which
initializes both backend for the 'dual' machine.
 
>> +    object_property_set_bool(obj, true, "realized", &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return NULL;
>> +    }
>> +    qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());
> 
> Whereas the XiveSource and XiveRouter I think make more sense as
> "device components" rather than SysBusDevice subclasses, 

Yes. I changed that.

> I think it
> *does* make sense for the PAPR-XIVE object to be a full fledged
> SysBusDevice.

Ah. That I didn't do but thinking of it, it makes sense as it is the
object managing the TIMA and ESB memory region mapping for the machine. 

> And for that reason, I think it makes more sense to create it with
> qdev_create(), which should avoid having to manually fiddle with the
> parent bus.

OK. I will give it a try. 

>> +    xive = SPAPR_XIVE(obj);
>> +
>> +    /* Enable the CPU IPIs */
>> +    for (i = 0; i < nr_servers; ++i) {
>> +        spapr_xive_irq_enable(xive, SPAPR_IRQ_IPI + i, false);
> 
> This comment possibly belonged on an earlier patch.  I don't love the
> "..._enable" name - to me that suggests something runtime rather than
> configuration time.  A better option isn't quickly occurring to me
> though :/.

Instead, I could call the sPAPR IRQ claim method  : 

    for (i = 0; i < nr_servers; ++i) {
	spapr_irq_xive.claim(spapr, SPAPR_IRQ_IPI + i, false, &local_err);
    }


What it does is to set the EAS_VALID bit in the EAT (it also sets the 
LSI bit). what about :
	
	spapr_xive_irq_validate() 
	spapr_xive_irq_invalidate() 

or to map the sPAPR IRQ backend names :

	spapr_xive_irq_claim() 
	spapr_xive_irq_free() 


> 
>> +    }
>> +
>> +    return xive;
>> +}
>> +
>> +static void spapr_irq_init_xive(sPAPRMachineState *spapr, int nr_irqs,
>> +                                int nr_servers, Error **errp)
>> +{
>> +    MachineState *machine = MACHINE(spapr);
>> +    Error *local_err = NULL;
>> +
>> +    /* KVM XIVE support */
>> +    if (kvm_enabled()) {
>> +        if (machine_kernel_irqchip_required(machine)) {
>> +            error_setg(errp, "kernel_irqchip requested. no XIVE support");
>> +            return;
>> +        }
>> +    }
>> +
>> +    /* QEMU XIVE support */
>> +    spapr->xive = spapr_xive_create(spapr, TYPE_SPAPR_XIVE, nr_irqs, nr_servers,
>> +                                    &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +}
>> +
>> +static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
>> +                                Error **errp)
>> +{
>> +    if (!spapr_xive_irq_enable(spapr->xive, irq, lsi)) {
>> +        error_setg(errp, "IRQ %d is invalid", irq);
>> +        return -1;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
>> +{
>> +    int i;
>> +
>> +    for (i = irq; i < irq + num; ++i) {
>> +        spapr_xive_irq_disable(spapr->xive, i);
>> +    }
>> +}
>> +
>> +static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
>> +{
>> +    return spapr_xive_qirq(spapr->xive, irq);
>> +}
>> +
>> +static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
>> +                                      Monitor *mon)
>> +{
>> +    CPUState *cs;
>> +
>> +    CPU_FOREACH(cs) {
>> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +
>> +        xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
>> +    }
>> +
>> +    spapr_xive_pic_print_info(spapr->xive, mon);
> 
> Any reason the info dumping routines are split into two?

Not the same objects. Are you suggesting that we could print all the info 
from the sPAPR XIVE model ? including the XiveTCTX. I thought of doing 
that also. Fine for me if it's ok for you.

Thanks,

C.

> 
>> +}
>> +
>> +/*
>> + * XIVE uses the full IRQ number space. Set it to 8K to be compatible
>> + * with XICS.
>> + */
>> +
>> +#define SPAPR_IRQ_XIVE_NR_IRQS     0x2000
>> +#define SPAPR_IRQ_XIVE_NR_MSIS     (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
>> +
>> +sPAPRIrq spapr_irq_xive = {
>> +    .nr_irqs     = SPAPR_IRQ_XIVE_NR_IRQS,
>> +    .nr_msis     = SPAPR_IRQ_XIVE_NR_MSIS,
>> +
>> +    .init        = spapr_irq_init_xive,
>> +    .claim       = spapr_irq_claim_xive,
>> +    .free        = spapr_irq_free_xive,
>> +    .qirq        = spapr_qirq_xive,
>> +    .print_info  = spapr_irq_print_info_xive,
>> +};
>> +
>>  /*
>>   * sPAPR IRQ frontend routines for devices
>>   */
>> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
>> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp)
>>  {
>>      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>>  
>> @@ -216,7 +329,7 @@ void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
>>          spapr_irq_msi_init(spapr, smc->irq->nr_msis);
>>      }
>>  
>> -    smc->irq->init(spapr, smc->irq->nr_irqs, errp);
>> +    smc->irq->init(spapr, smc->irq->nr_irqs, nr_servers, errp);
>>  }
>>  
>>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp)
>
David Gibson Nov. 29, 2018, 1:07 a.m. UTC | #3
On Wed, Nov 28, 2018 at 06:16:58PM +0100, Cédric Le Goater wrote:
> On 11/28/18 4:28 AM, David Gibson wrote:
> > On Fri, Nov 16, 2018 at 11:57:08AM +0100, Cédric Le Goater wrote:
> >> The XIVE IRQ backend uses the same layout as the new XICS backend but
> >> covers the full range of the IRQ number space. The IRQ numbers for the
> >> CPU IPIs are allocated at the bottom of this space, below 4K, to
> >> preserve compatibility with XICS which does not use that range.
> >>
> >> This should be enough given that the maximum number of CPUs is 1024
> >> for the sPAPR machine under QEMU. For the record, the biggest POWER8
> >> or POWER9 system has a maximum of 1536 HW threads (16 sockets, 192
> >> cores, SMT8).
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  include/hw/ppc/spapr.h     |   2 +
> >>  include/hw/ppc/spapr_irq.h |   7 ++-
> >>  hw/ppc/spapr.c             |   2 +-
> >>  hw/ppc/spapr_irq.c         | 119 ++++++++++++++++++++++++++++++++++++-
> >>  4 files changed, 124 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index 6279711fe8f7..1fbc2663e06c 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -16,6 +16,7 @@ typedef struct sPAPREventLogEntry sPAPREventLogEntry;
> >>  typedef struct sPAPREventSource sPAPREventSource;
> >>  typedef struct sPAPRPendingHPT sPAPRPendingHPT;
> >>  typedef struct ICSState ICSState;
> >> +typedef struct sPAPRXive sPAPRXive;
> >>  
> >>  #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
> >>  #define SPAPR_ENTRY_POINT       0x100
> >> @@ -175,6 +176,7 @@ struct sPAPRMachineState {
> >>      const char *icp_type;
> >>      int32_t irq_map_nr;
> >>      unsigned long *irq_map;
> >> +    sPAPRXive  *xive;
> >>  
> >>      bool cmd_line_caps[SPAPR_CAP_NUM];
> >>      sPAPRCapabilities def, eff, mig;
> >> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> >> index 0e9229bf219e..c854ae527808 100644
> >> --- a/include/hw/ppc/spapr_irq.h
> >> +++ b/include/hw/ppc/spapr_irq.h
> >> @@ -13,6 +13,7 @@
> >>  /*
> >>   * IRQ range offsets per device type
> >>   */
> >> +#define SPAPR_IRQ_IPI        0x0
> >>  #define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
> >>  #define SPAPR_IRQ_HOTPLUG    0x1001
> >>  #define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
> >> @@ -33,7 +34,8 @@ typedef struct sPAPRIrq {
> >>      uint32_t    nr_irqs;
> >>      uint32_t    nr_msis;
> >>  
> >> -    void (*init)(sPAPRMachineState *spapr, int nr_irqs, Error **errp);
> >> +    void (*init)(sPAPRMachineState *spapr, int nr_irqs, int nr_servers,
> >> +                 Error **errp);
> >>      int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
> >>      void (*free)(sPAPRMachineState *spapr, int irq, int num);
> >>      qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
> >> @@ -42,8 +44,9 @@ typedef struct sPAPRIrq {
> >>  
> >>  extern sPAPRIrq spapr_irq_xics;
> >>  extern sPAPRIrq spapr_irq_xics_legacy;
> >> +extern sPAPRIrq spapr_irq_xive;
> >>  
> >> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp);
> >> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp);
> > 
> > I don't see why nr_servers needs to become a parameter, since it can
> > be derived from spapr within this routine.
> 
> ok. This is true. We can use directly xics_max_server_number(spapr).
> 
> >>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
> >>  void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
> >>  qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index e470efe7993c..9f8c19e56e7a 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2594,7 +2594,7 @@ static void spapr_machine_init(MachineState *machine)
> >>      spapr_set_vsmt_mode(spapr, &error_fatal);
> >>  
> >>      /* Set up Interrupt Controller before we create the VCPUs */
> >> -    spapr_irq_init(spapr, &error_fatal);
> >> +    spapr_irq_init(spapr, xics_max_server_number(spapr), &error_fatal);
> > 
> > We should rename xics_max_server_number() since it's no longer xics
> > specific.
> 
> yes.
> 
> >>      /* Set up containers for ibm,client-architecture-support negotiated options
> >>       */
> >> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> >> index bac450ffff23..2569ae1bc7f8 100644
> >> --- a/hw/ppc/spapr_irq.c
> >> +++ b/hw/ppc/spapr_irq.c
> >> @@ -12,6 +12,7 @@
> >>  #include "qemu/error-report.h"
> >>  #include "qapi/error.h"
> >>  #include "hw/ppc/spapr.h"
> >> +#include "hw/ppc/spapr_xive.h"
> >>  #include "hw/ppc/xics.h"
> >>  #include "sysemu/kvm.h"
> >>  
> >> @@ -91,7 +92,7 @@ error:
> >>  }
> >>  
> >>  static void spapr_irq_init_xics(sPAPRMachineState *spapr, int nr_irqs,
> >> -                                Error **errp)
> >> +                                int nr_servers, Error **errp)
> >>  {
> >>      MachineState *machine = MACHINE(spapr);
> >>      Error *local_err = NULL;
> >> @@ -204,10 +205,122 @@ sPAPRIrq spapr_irq_xics = {
> >>      .print_info  = spapr_irq_print_info_xics,
> >>  };
> >>  
> >> + /*
> >> + * XIVE IRQ backend.
> >> + */
> >> +static sPAPRXive *spapr_xive_create(sPAPRMachineState *spapr,
> >> +                                    const char *type_xive, int nr_irqs,
> >> +                                    int nr_servers, Error **errp)
> >> +{
> >> +    sPAPRXive *xive;
> >> +    Error *local_err = NULL;
> >> +    Object *obj;
> >> +    uint32_t nr_ends = nr_servers << 3; /* 8 priority ENDs per CPU */
> >> +    int i;
> >> +
> >> +    obj = object_new(type_xive);
> > 
> > What's the reason for making the type a parameter, rather than just
> > using the #define here.
> 
> KVM.

Yeah, I realised that when I'd read a few patches further on.  As I
commented there, I don't think the separate KVM/TCG subclasses is
actually a good pattern to follow.

> >> +    object_property_set_int(obj, nr_irqs, "nr-irqs", &error_abort);
> >> +    object_property_set_int(obj, nr_ends, "nr-ends", &error_abort);
> > 
> > This is still within the sPAPR code, and you have a pointer to the
> > MachineState, so I don't see why you could't just derive nr_irqs and
> > nr_servers from that, rather than having them passed in.
> 
> for nr_servers I agree. nr_irqs comes from the machine class and it will
> not make any sense using the machine class in the init routine of the
> 'dual' sPAPR IRQ backend supporting both modes. See patch 34 which
> initializes both backend for the 'dual' machine.

Uh.. I guess I'll comment when I get to that patch, but I don't see
why accessing the machine class would be a problem.  If we have the
MachineState we can get to the MachineClass.

> >> +    object_property_set_bool(obj, true, "realized", &local_err);
> >> +    if (local_err) {
> >> +        error_propagate(errp, local_err);
> >> +        return NULL;
> >> +    }
> >> +    qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());
> > 
> > Whereas the XiveSource and XiveRouter I think make more sense as
> > "device components" rather than SysBusDevice subclasses, 
> 
> Yes. I changed that.
> 
> > I think it
> > *does* make sense for the PAPR-XIVE object to be a full fledged
> > SysBusDevice.
> 
> Ah. That I didn't do but thinking of it, it makes sense as it is the
> object managing the TIMA and ESB memory region mapping for the machine. 
> 
> > And for that reason, I think it makes more sense to create it with
> > qdev_create(), which should avoid having to manually fiddle with the
> > parent bus.
> 
> OK. I will give it a try. 
> 
> >> +    xive = SPAPR_XIVE(obj);
> >> +
> >> +    /* Enable the CPU IPIs */
> >> +    for (i = 0; i < nr_servers; ++i) {
> >> +        spapr_xive_irq_enable(xive, SPAPR_IRQ_IPI + i, false);
> > 
> > This comment possibly belonged on an earlier patch.  I don't love the
> > "..._enable" name - to me that suggests something runtime rather than
> > configuration time.  A better option isn't quickly occurring to me
> > though :/.
> 
> Instead, I could call the sPAPR IRQ claim method  : 
> 
>     for (i = 0; i < nr_servers; ++i) {
> 	spapr_irq_xive.claim(spapr, SPAPR_IRQ_IPI + i, false, &local_err);
>     }
> 
> 
> What it does is to set the EAS_VALID bit in the EAT (it also sets the 
> LSI bit). what about :
> 	
> 	spapr_xive_irq_validate() 
> 	spapr_xive_irq_invalidate() 
> 
> or to map the sPAPR IRQ backend names :
> 
> 	spapr_xive_irq_claim() 
> 	spapr_xive_irq_free()

Let's use claim/free to match the terms spapr already uses.


> 
> 
> > 
> >> +    }
> >> +
> >> +    return xive;
> >> +}
> >> +
> >> +static void spapr_irq_init_xive(sPAPRMachineState *spapr, int nr_irqs,
> >> +                                int nr_servers, Error **errp)
> >> +{
> >> +    MachineState *machine = MACHINE(spapr);
> >> +    Error *local_err = NULL;
> >> +
> >> +    /* KVM XIVE support */
> >> +    if (kvm_enabled()) {
> >> +        if (machine_kernel_irqchip_required(machine)) {
> >> +            error_setg(errp, "kernel_irqchip requested. no XIVE support");
> >> +            return;
> >> +        }
> >> +    }
> >> +
> >> +    /* QEMU XIVE support */
> >> +    spapr->xive = spapr_xive_create(spapr, TYPE_SPAPR_XIVE, nr_irqs, nr_servers,
> >> +                                    &local_err);
> >> +    if (local_err) {
> >> +        error_propagate(errp, local_err);
> >> +        return;
> >> +    }
> >> +}
> >> +
> >> +static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
> >> +                                Error **errp)
> >> +{
> >> +    if (!spapr_xive_irq_enable(spapr->xive, irq, lsi)) {
> >> +        error_setg(errp, "IRQ %d is invalid", irq);
> >> +        return -1;
> >> +    }
> >> +    return 0;
> >> +}
> >> +
> >> +static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
> >> +{
> >> +    int i;
> >> +
> >> +    for (i = irq; i < irq + num; ++i) {
> >> +        spapr_xive_irq_disable(spapr->xive, i);
> >> +    }
> >> +}
> >> +
> >> +static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
> >> +{
> >> +    return spapr_xive_qirq(spapr->xive, irq);
> >> +}
> >> +
> >> +static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
> >> +                                      Monitor *mon)
> >> +{
> >> +    CPUState *cs;
> >> +
> >> +    CPU_FOREACH(cs) {
> >> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> >> +
> >> +        xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
> >> +    }
> >> +
> >> +    spapr_xive_pic_print_info(spapr->xive, mon);
> > 
> > Any reason the info dumping routines are split into two?
> 
> Not the same objects. Are you suggesting that we could print all the info 
> from the sPAPR XIVE model ? including the XiveTCTX. I thought of doing 
> that also. Fine for me if it's ok for you.

Ah.. I think I got xive_pic_print_info() and
xive_tctx_pic_print_info() mixed up.  Never mind.

> 
> Thanks,
> 
> C.
> 
> > 
> >> +}
> >> +
> >> +/*
> >> + * XIVE uses the full IRQ number space. Set it to 8K to be compatible
> >> + * with XICS.
> >> + */
> >> +
> >> +#define SPAPR_IRQ_XIVE_NR_IRQS     0x2000
> >> +#define SPAPR_IRQ_XIVE_NR_MSIS     (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
> >> +
> >> +sPAPRIrq spapr_irq_xive = {
> >> +    .nr_irqs     = SPAPR_IRQ_XIVE_NR_IRQS,
> >> +    .nr_msis     = SPAPR_IRQ_XIVE_NR_MSIS,
> >> +
> >> +    .init        = spapr_irq_init_xive,
> >> +    .claim       = spapr_irq_claim_xive,
> >> +    .free        = spapr_irq_free_xive,
> >> +    .qirq        = spapr_qirq_xive,
> >> +    .print_info  = spapr_irq_print_info_xive,
> >> +};
> >> +
> >>  /*
> >>   * sPAPR IRQ frontend routines for devices
> >>   */
> >> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
> >> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp)
> >>  {
> >>      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> >>  
> >> @@ -216,7 +329,7 @@ void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
> >>          spapr_irq_msi_init(spapr, smc->irq->nr_msis);
> >>      }
> >>  
> >> -    smc->irq->init(spapr, smc->irq->nr_irqs, errp);
> >> +    smc->irq->init(spapr, smc->irq->nr_irqs, nr_servers, errp);
> >>  }
> >>  
> >>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp)
> > 
>
Cédric Le Goater Nov. 29, 2018, 3:34 p.m. UTC | #4
On 11/29/18 2:07 AM, David Gibson wrote:
> On Wed, Nov 28, 2018 at 06:16:58PM +0100, Cédric Le Goater wrote:
>> On 11/28/18 4:28 AM, David Gibson wrote:
>>> On Fri, Nov 16, 2018 at 11:57:08AM +0100, Cédric Le Goater wrote:
>>>> The XIVE IRQ backend uses the same layout as the new XICS backend but
>>>> covers the full range of the IRQ number space. The IRQ numbers for the
>>>> CPU IPIs are allocated at the bottom of this space, below 4K, to
>>>> preserve compatibility with XICS which does not use that range.
>>>>
>>>> This should be enough given that the maximum number of CPUs is 1024
>>>> for the sPAPR machine under QEMU. For the record, the biggest POWER8
>>>> or POWER9 system has a maximum of 1536 HW threads (16 sockets, 192
>>>> cores, SMT8).
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  include/hw/ppc/spapr.h     |   2 +
>>>>  include/hw/ppc/spapr_irq.h |   7 ++-
>>>>  hw/ppc/spapr.c             |   2 +-
>>>>  hw/ppc/spapr_irq.c         | 119 ++++++++++++++++++++++++++++++++++++-
>>>>  4 files changed, 124 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>> index 6279711fe8f7..1fbc2663e06c 100644
>>>> --- a/include/hw/ppc/spapr.h
>>>> +++ b/include/hw/ppc/spapr.h
>>>> @@ -16,6 +16,7 @@ typedef struct sPAPREventLogEntry sPAPREventLogEntry;
>>>>  typedef struct sPAPREventSource sPAPREventSource;
>>>>  typedef struct sPAPRPendingHPT sPAPRPendingHPT;
>>>>  typedef struct ICSState ICSState;
>>>> +typedef struct sPAPRXive sPAPRXive;
>>>>  
>>>>  #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
>>>>  #define SPAPR_ENTRY_POINT       0x100
>>>> @@ -175,6 +176,7 @@ struct sPAPRMachineState {
>>>>      const char *icp_type;
>>>>      int32_t irq_map_nr;
>>>>      unsigned long *irq_map;
>>>> +    sPAPRXive  *xive;
>>>>  
>>>>      bool cmd_line_caps[SPAPR_CAP_NUM];
>>>>      sPAPRCapabilities def, eff, mig;
>>>> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
>>>> index 0e9229bf219e..c854ae527808 100644
>>>> --- a/include/hw/ppc/spapr_irq.h
>>>> +++ b/include/hw/ppc/spapr_irq.h
>>>> @@ -13,6 +13,7 @@
>>>>  /*
>>>>   * IRQ range offsets per device type
>>>>   */
>>>> +#define SPAPR_IRQ_IPI        0x0
>>>>  #define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
>>>>  #define SPAPR_IRQ_HOTPLUG    0x1001
>>>>  #define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
>>>> @@ -33,7 +34,8 @@ typedef struct sPAPRIrq {
>>>>      uint32_t    nr_irqs;
>>>>      uint32_t    nr_msis;
>>>>  
>>>> -    void (*init)(sPAPRMachineState *spapr, int nr_irqs, Error **errp);
>>>> +    void (*init)(sPAPRMachineState *spapr, int nr_irqs, int nr_servers,
>>>> +                 Error **errp);
>>>>      int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
>>>>      void (*free)(sPAPRMachineState *spapr, int irq, int num);
>>>>      qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
>>>> @@ -42,8 +44,9 @@ typedef struct sPAPRIrq {
>>>>  
>>>>  extern sPAPRIrq spapr_irq_xics;
>>>>  extern sPAPRIrq spapr_irq_xics_legacy;
>>>> +extern sPAPRIrq spapr_irq_xive;
>>>>  
>>>> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp);
>>>> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp);
>>>
>>> I don't see why nr_servers needs to become a parameter, since it can
>>> be derived from spapr within this routine.
>>
>> ok. This is true. We can use directly xics_max_server_number(spapr).
>>
>>>>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
>>>>  void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
>>>>  qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index e470efe7993c..9f8c19e56e7a 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -2594,7 +2594,7 @@ static void spapr_machine_init(MachineState *machine)
>>>>      spapr_set_vsmt_mode(spapr, &error_fatal);
>>>>  
>>>>      /* Set up Interrupt Controller before we create the VCPUs */
>>>> -    spapr_irq_init(spapr, &error_fatal);
>>>> +    spapr_irq_init(spapr, xics_max_server_number(spapr), &error_fatal);
>>>
>>> We should rename xics_max_server_number() since it's no longer xics
>>> specific.
>>
>> yes.
>>
>>>>      /* Set up containers for ibm,client-architecture-support negotiated options
>>>>       */
>>>> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
>>>> index bac450ffff23..2569ae1bc7f8 100644
>>>> --- a/hw/ppc/spapr_irq.c
>>>> +++ b/hw/ppc/spapr_irq.c
>>>> @@ -12,6 +12,7 @@
>>>>  #include "qemu/error-report.h"
>>>>  #include "qapi/error.h"
>>>>  #include "hw/ppc/spapr.h"
>>>> +#include "hw/ppc/spapr_xive.h"
>>>>  #include "hw/ppc/xics.h"
>>>>  #include "sysemu/kvm.h"
>>>>  
>>>> @@ -91,7 +92,7 @@ error:
>>>>  }
>>>>  
>>>>  static void spapr_irq_init_xics(sPAPRMachineState *spapr, int nr_irqs,
>>>> -                                Error **errp)
>>>> +                                int nr_servers, Error **errp)
>>>>  {
>>>>      MachineState *machine = MACHINE(spapr);
>>>>      Error *local_err = NULL;
>>>> @@ -204,10 +205,122 @@ sPAPRIrq spapr_irq_xics = {
>>>>      .print_info  = spapr_irq_print_info_xics,
>>>>  };
>>>>  
>>>> + /*
>>>> + * XIVE IRQ backend.
>>>> + */
>>>> +static sPAPRXive *spapr_xive_create(sPAPRMachineState *spapr,
>>>> +                                    const char *type_xive, int nr_irqs,
>>>> +                                    int nr_servers, Error **errp)
>>>> +{
>>>> +    sPAPRXive *xive;
>>>> +    Error *local_err = NULL;
>>>> +    Object *obj;
>>>> +    uint32_t nr_ends = nr_servers << 3; /* 8 priority ENDs per CPU */
>>>> +    int i;
>>>> +
>>>> +    obj = object_new(type_xive);
>>>
>>> What's the reason for making the type a parameter, rather than just
>>> using the #define here.
>>
>> KVM.
> 
> Yeah, I realised that when I'd read a few patches further on.  As I
> commented there, I don't think the separate KVM/TCG subclasses is
> actually a good pattern to follow.

I will use the simple pattern in next spin: if (kvm) { } 

We might want to do that for XICS also but it would break migratibility.  

>>>> +    object_property_set_int(obj, nr_irqs, "nr-irqs", &error_abort);
>>>> +    object_property_set_int(obj, nr_ends, "nr-ends", &error_abort);
>>>
>>> This is still within the sPAPR code, and you have a pointer to the
>>> MachineState, so I don't see why you could't just derive nr_irqs and
>>> nr_servers from that, rather than having them passed in.
>>
>> for nr_servers I agree. nr_irqs comes from the machine class and it will
>> not make any sense using the machine class in the init routine of the
>> 'dual' sPAPR IRQ backend supporting both modes. See patch 34 which
>> initializes both backend for the 'dual' machine.
> 
> Uh.. I guess I'll comment when I get to that patch, but I don't see
> why accessing the machine class would be a problem.  If we have the
> MachineState we can get to the MachineClass.> 
>>>> +    object_property_set_bool(obj, true, "realized", &local_err);
>>>> +    if (local_err) {
>>>> +        error_propagate(errp, local_err);
>>>> +        return NULL;
>>>> +    }
>>>> +    qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());
>>>
>>> Whereas the XiveSource and XiveRouter I think make more sense as
>>> "device components" rather than SysBusDevice subclasses, 
>>
>> Yes. I changed that.
>>
>>> I think it
>>> *does* make sense for the PAPR-XIVE object to be a full fledged
>>> SysBusDevice.
>>
>> Ah. That I didn't do but thinking of it, it makes sense as it is the
>> object managing the TIMA and ESB memory region mapping for the machine. 
>>
>>> And for that reason, I think it makes more sense to create it with
>>> qdev_create(), which should avoid having to manually fiddle with the
>>> parent bus.
>>
>> OK. I will give it a try. 
>>
>>>> +    xive = SPAPR_XIVE(obj);
>>>> +
>>>> +    /* Enable the CPU IPIs */
>>>> +    for (i = 0; i < nr_servers; ++i) {
>>>> +        spapr_xive_irq_enable(xive, SPAPR_IRQ_IPI + i, false);
>>>
>>> This comment possibly belonged on an earlier patch.  I don't love the
>>> "..._enable" name - to me that suggests something runtime rather than
>>> configuration time.  A better option isn't quickly occurring to me
>>> though :/.
>>
>> Instead, I could call the sPAPR IRQ claim method  : 
>>
>>     for (i = 0; i < nr_servers; ++i) {
>> 	spapr_irq_xive.claim(spapr, SPAPR_IRQ_IPI + i, false, &local_err);
>>     }
>>
>>
>> What it does is to set the EAS_VALID bit in the EAT (it also sets the 
>> LSI bit). what about :
>> 	
>> 	spapr_xive_irq_validate() 
>> 	spapr_xive_irq_invalidate() 
>>
>> or to map the sPAPR IRQ backend names :
>>
>> 	spapr_xive_irq_claim() 
>> 	spapr_xive_irq_free()
> 
> Let's use claim/free to match the terms spapr already uses.

OK.

Thanks,

C.

> 
> 
>>
>>
>>>
>>>> +    }
>>>> +
>>>> +    return xive;
>>>> +}
>>>> +
>>>> +static void spapr_irq_init_xive(sPAPRMachineState *spapr, int nr_irqs,
>>>> +                                int nr_servers, Error **errp)
>>>> +{
>>>> +    MachineState *machine = MACHINE(spapr);
>>>> +    Error *local_err = NULL;
>>>> +
>>>> +    /* KVM XIVE support */
>>>> +    if (kvm_enabled()) {
>>>> +        if (machine_kernel_irqchip_required(machine)) {
>>>> +            error_setg(errp, "kernel_irqchip requested. no XIVE support");
>>>> +            return;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    /* QEMU XIVE support */
>>>> +    spapr->xive = spapr_xive_create(spapr, TYPE_SPAPR_XIVE, nr_irqs, nr_servers,
>>>> +                                    &local_err);
>>>> +    if (local_err) {
>>>> +        error_propagate(errp, local_err);
>>>> +        return;
>>>> +    }
>>>> +}
>>>> +
>>>> +static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
>>>> +                                Error **errp)
>>>> +{
>>>> +    if (!spapr_xive_irq_enable(spapr->xive, irq, lsi)) {
>>>> +        error_setg(errp, "IRQ %d is invalid", irq);
>>>> +        return -1;
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    for (i = irq; i < irq + num; ++i) {
>>>> +        spapr_xive_irq_disable(spapr->xive, i);
>>>> +    }
>>>> +}
>>>> +
>>>> +static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
>>>> +{
>>>> +    return spapr_xive_qirq(spapr->xive, irq);
>>>> +}
>>>> +
>>>> +static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
>>>> +                                      Monitor *mon)
>>>> +{
>>>> +    CPUState *cs;
>>>> +
>>>> +    CPU_FOREACH(cs) {
>>>> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
>>>> +
>>>> +        xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
>>>> +    }
>>>> +
>>>> +    spapr_xive_pic_print_info(spapr->xive, mon);
>>>
>>> Any reason the info dumping routines are split into two?
>>
>> Not the same objects. Are you suggesting that we could print all the info 
>> from the sPAPR XIVE model ? including the XiveTCTX. I thought of doing 
>> that also. Fine for me if it's ok for you.
> 
> Ah.. I think I got xive_pic_print_info() and
> xive_tctx_pic_print_info() mixed up.  Never mind.
> 
>>
>> Thanks,
>>
>> C.
>>
>>>
>>>> +}
>>>> +
>>>> +/*
>>>> + * XIVE uses the full IRQ number space. Set it to 8K to be compatible
>>>> + * with XICS.
>>>> + */
>>>> +
>>>> +#define SPAPR_IRQ_XIVE_NR_IRQS     0x2000
>>>> +#define SPAPR_IRQ_XIVE_NR_MSIS     (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
>>>> +
>>>> +sPAPRIrq spapr_irq_xive = {
>>>> +    .nr_irqs     = SPAPR_IRQ_XIVE_NR_IRQS,
>>>> +    .nr_msis     = SPAPR_IRQ_XIVE_NR_MSIS,
>>>> +
>>>> +    .init        = spapr_irq_init_xive,
>>>> +    .claim       = spapr_irq_claim_xive,
>>>> +    .free        = spapr_irq_free_xive,
>>>> +    .qirq        = spapr_qirq_xive,
>>>> +    .print_info  = spapr_irq_print_info_xive,
>>>> +};
>>>> +
>>>>  /*
>>>>   * sPAPR IRQ frontend routines for devices
>>>>   */
>>>> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
>>>> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp)
>>>>  {
>>>>      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>>>>  
>>>> @@ -216,7 +329,7 @@ void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
>>>>          spapr_irq_msi_init(spapr, smc->irq->nr_msis);
>>>>      }
>>>>  
>>>> -    smc->irq->init(spapr, smc->irq->nr_irqs, errp);
>>>> +    smc->irq->init(spapr, smc->irq->nr_irqs, nr_servers, errp);
>>>>  }
>>>>  
>>>>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp)
>>>
>>
>
David Gibson Nov. 29, 2018, 10:39 p.m. UTC | #5
On Thu, Nov 29, 2018 at 04:34:51PM +0100, Cédric Le Goater wrote:
> On 11/29/18 2:07 AM, David Gibson wrote:
> > On Wed, Nov 28, 2018 at 06:16:58PM +0100, Cédric Le Goater wrote:
> >> On 11/28/18 4:28 AM, David Gibson wrote:
> >>> On Fri, Nov 16, 2018 at 11:57:08AM +0100, Cédric Le Goater wrote:
> >>>> The XIVE IRQ backend uses the same layout as the new XICS backend but
> >>>> covers the full range of the IRQ number space. The IRQ numbers for the
> >>>> CPU IPIs are allocated at the bottom of this space, below 4K, to
> >>>> preserve compatibility with XICS which does not use that range.
> >>>>
> >>>> This should be enough given that the maximum number of CPUs is 1024
> >>>> for the sPAPR machine under QEMU. For the record, the biggest POWER8
> >>>> or POWER9 system has a maximum of 1536 HW threads (16 sockets, 192
> >>>> cores, SMT8).
> >>>>
> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>>> ---
> >>>>  include/hw/ppc/spapr.h     |   2 +
> >>>>  include/hw/ppc/spapr_irq.h |   7 ++-
> >>>>  hw/ppc/spapr.c             |   2 +-
> >>>>  hw/ppc/spapr_irq.c         | 119 ++++++++++++++++++++++++++++++++++++-
> >>>>  4 files changed, 124 insertions(+), 6 deletions(-)
> >>>>
> >>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >>>> index 6279711fe8f7..1fbc2663e06c 100644
> >>>> --- a/include/hw/ppc/spapr.h
> >>>> +++ b/include/hw/ppc/spapr.h
> >>>> @@ -16,6 +16,7 @@ typedef struct sPAPREventLogEntry sPAPREventLogEntry;
> >>>>  typedef struct sPAPREventSource sPAPREventSource;
> >>>>  typedef struct sPAPRPendingHPT sPAPRPendingHPT;
> >>>>  typedef struct ICSState ICSState;
> >>>> +typedef struct sPAPRXive sPAPRXive;
> >>>>  
> >>>>  #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
> >>>>  #define SPAPR_ENTRY_POINT       0x100
> >>>> @@ -175,6 +176,7 @@ struct sPAPRMachineState {
> >>>>      const char *icp_type;
> >>>>      int32_t irq_map_nr;
> >>>>      unsigned long *irq_map;
> >>>> +    sPAPRXive  *xive;
> >>>>  
> >>>>      bool cmd_line_caps[SPAPR_CAP_NUM];
> >>>>      sPAPRCapabilities def, eff, mig;
> >>>> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> >>>> index 0e9229bf219e..c854ae527808 100644
> >>>> --- a/include/hw/ppc/spapr_irq.h
> >>>> +++ b/include/hw/ppc/spapr_irq.h
> >>>> @@ -13,6 +13,7 @@
> >>>>  /*
> >>>>   * IRQ range offsets per device type
> >>>>   */
> >>>> +#define SPAPR_IRQ_IPI        0x0
> >>>>  #define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
> >>>>  #define SPAPR_IRQ_HOTPLUG    0x1001
> >>>>  #define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
> >>>> @@ -33,7 +34,8 @@ typedef struct sPAPRIrq {
> >>>>      uint32_t    nr_irqs;
> >>>>      uint32_t    nr_msis;
> >>>>  
> >>>> -    void (*init)(sPAPRMachineState *spapr, int nr_irqs, Error **errp);
> >>>> +    void (*init)(sPAPRMachineState *spapr, int nr_irqs, int nr_servers,
> >>>> +                 Error **errp);
> >>>>      int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
> >>>>      void (*free)(sPAPRMachineState *spapr, int irq, int num);
> >>>>      qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
> >>>> @@ -42,8 +44,9 @@ typedef struct sPAPRIrq {
> >>>>  
> >>>>  extern sPAPRIrq spapr_irq_xics;
> >>>>  extern sPAPRIrq spapr_irq_xics_legacy;
> >>>> +extern sPAPRIrq spapr_irq_xive;
> >>>>  
> >>>> -void spapr_irq_init(sPAPRMachineState *spapr, Error **errp);
> >>>> +void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp);
> >>>
> >>> I don't see why nr_servers needs to become a parameter, since it can
> >>> be derived from spapr within this routine.
> >>
> >> ok. This is true. We can use directly xics_max_server_number(spapr).
> >>
> >>>>  int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
> >>>>  void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
> >>>>  qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
> >>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>> index e470efe7993c..9f8c19e56e7a 100644
> >>>> --- a/hw/ppc/spapr.c
> >>>> +++ b/hw/ppc/spapr.c
> >>>> @@ -2594,7 +2594,7 @@ static void spapr_machine_init(MachineState *machine)
> >>>>      spapr_set_vsmt_mode(spapr, &error_fatal);
> >>>>  
> >>>>      /* Set up Interrupt Controller before we create the VCPUs */
> >>>> -    spapr_irq_init(spapr, &error_fatal);
> >>>> +    spapr_irq_init(spapr, xics_max_server_number(spapr), &error_fatal);
> >>>
> >>> We should rename xics_max_server_number() since it's no longer xics
> >>> specific.
> >>
> >> yes.
> >>
> >>>>      /* Set up containers for ibm,client-architecture-support negotiated options
> >>>>       */
> >>>> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> >>>> index bac450ffff23..2569ae1bc7f8 100644
> >>>> --- a/hw/ppc/spapr_irq.c
> >>>> +++ b/hw/ppc/spapr_irq.c
> >>>> @@ -12,6 +12,7 @@
> >>>>  #include "qemu/error-report.h"
> >>>>  #include "qapi/error.h"
> >>>>  #include "hw/ppc/spapr.h"
> >>>> +#include "hw/ppc/spapr_xive.h"
> >>>>  #include "hw/ppc/xics.h"
> >>>>  #include "sysemu/kvm.h"
> >>>>  
> >>>> @@ -91,7 +92,7 @@ error:
> >>>>  }
> >>>>  
> >>>>  static void spapr_irq_init_xics(sPAPRMachineState *spapr, int nr_irqs,
> >>>> -                                Error **errp)
> >>>> +                                int nr_servers, Error **errp)
> >>>>  {
> >>>>      MachineState *machine = MACHINE(spapr);
> >>>>      Error *local_err = NULL;
> >>>> @@ -204,10 +205,122 @@ sPAPRIrq spapr_irq_xics = {
> >>>>      .print_info  = spapr_irq_print_info_xics,
> >>>>  };
> >>>>  
> >>>> + /*
> >>>> + * XIVE IRQ backend.
> >>>> + */
> >>>> +static sPAPRXive *spapr_xive_create(sPAPRMachineState *spapr,
> >>>> +                                    const char *type_xive, int nr_irqs,
> >>>> +                                    int nr_servers, Error **errp)
> >>>> +{
> >>>> +    sPAPRXive *xive;
> >>>> +    Error *local_err = NULL;
> >>>> +    Object *obj;
> >>>> +    uint32_t nr_ends = nr_servers << 3; /* 8 priority ENDs per CPU */
> >>>> +    int i;
> >>>> +
> >>>> +    obj = object_new(type_xive);
> >>>
> >>> What's the reason for making the type a parameter, rather than just
> >>> using the #define here.
> >>
> >> KVM.
> > 
> > Yeah, I realised that when I'd read a few patches further on.  As I
> > commented there, I don't think the separate KVM/TCG subclasses is
> > actually a good pattern to follow.
> 
> I will use the simple pattern in next spin: if (kvm) { } 

Great.

> We might want to do that for XICS also but it would break migratibility.  

Well, if that breaks migration, we already have a problem migrating
between KVM and non-KVM guests (or even KVM-with-irqchip and
KVM-without-irqchip guests).  I think we put the actual migratable
state in the base class to avoid that, but we should check.  If we
ever get time.
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 6279711fe8f7..1fbc2663e06c 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -16,6 +16,7 @@  typedef struct sPAPREventLogEntry sPAPREventLogEntry;
 typedef struct sPAPREventSource sPAPREventSource;
 typedef struct sPAPRPendingHPT sPAPRPendingHPT;
 typedef struct ICSState ICSState;
+typedef struct sPAPRXive sPAPRXive;
 
 #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
 #define SPAPR_ENTRY_POINT       0x100
@@ -175,6 +176,7 @@  struct sPAPRMachineState {
     const char *icp_type;
     int32_t irq_map_nr;
     unsigned long *irq_map;
+    sPAPRXive  *xive;
 
     bool cmd_line_caps[SPAPR_CAP_NUM];
     sPAPRCapabilities def, eff, mig;
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
index 0e9229bf219e..c854ae527808 100644
--- a/include/hw/ppc/spapr_irq.h
+++ b/include/hw/ppc/spapr_irq.h
@@ -13,6 +13,7 @@ 
 /*
  * IRQ range offsets per device type
  */
+#define SPAPR_IRQ_IPI        0x0
 #define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
 #define SPAPR_IRQ_HOTPLUG    0x1001
 #define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
@@ -33,7 +34,8 @@  typedef struct sPAPRIrq {
     uint32_t    nr_irqs;
     uint32_t    nr_msis;
 
-    void (*init)(sPAPRMachineState *spapr, int nr_irqs, Error **errp);
+    void (*init)(sPAPRMachineState *spapr, int nr_irqs, int nr_servers,
+                 Error **errp);
     int (*claim)(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
     void (*free)(sPAPRMachineState *spapr, int irq, int num);
     qemu_irq (*qirq)(sPAPRMachineState *spapr, int irq);
@@ -42,8 +44,9 @@  typedef struct sPAPRIrq {
 
 extern sPAPRIrq spapr_irq_xics;
 extern sPAPRIrq spapr_irq_xics_legacy;
+extern sPAPRIrq spapr_irq_xive;
 
-void spapr_irq_init(sPAPRMachineState *spapr, Error **errp);
+void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp);
 int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp);
 void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num);
 qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e470efe7993c..9f8c19e56e7a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2594,7 +2594,7 @@  static void spapr_machine_init(MachineState *machine)
     spapr_set_vsmt_mode(spapr, &error_fatal);
 
     /* Set up Interrupt Controller before we create the VCPUs */
-    spapr_irq_init(spapr, &error_fatal);
+    spapr_irq_init(spapr, xics_max_server_number(spapr), &error_fatal);
 
     /* Set up containers for ibm,client-architecture-support negotiated options
      */
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index bac450ffff23..2569ae1bc7f8 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -12,6 +12,7 @@ 
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_xive.h"
 #include "hw/ppc/xics.h"
 #include "sysemu/kvm.h"
 
@@ -91,7 +92,7 @@  error:
 }
 
 static void spapr_irq_init_xics(sPAPRMachineState *spapr, int nr_irqs,
-                                Error **errp)
+                                int nr_servers, Error **errp)
 {
     MachineState *machine = MACHINE(spapr);
     Error *local_err = NULL;
@@ -204,10 +205,122 @@  sPAPRIrq spapr_irq_xics = {
     .print_info  = spapr_irq_print_info_xics,
 };
 
+ /*
+ * XIVE IRQ backend.
+ */
+static sPAPRXive *spapr_xive_create(sPAPRMachineState *spapr,
+                                    const char *type_xive, int nr_irqs,
+                                    int nr_servers, Error **errp)
+{
+    sPAPRXive *xive;
+    Error *local_err = NULL;
+    Object *obj;
+    uint32_t nr_ends = nr_servers << 3; /* 8 priority ENDs per CPU */
+    int i;
+
+    obj = object_new(type_xive);
+    object_property_set_int(obj, nr_irqs, "nr-irqs", &error_abort);
+    object_property_set_int(obj, nr_ends, "nr-ends", &error_abort);
+    object_property_set_bool(obj, true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+    qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());
+    xive = SPAPR_XIVE(obj);
+
+    /* Enable the CPU IPIs */
+    for (i = 0; i < nr_servers; ++i) {
+        spapr_xive_irq_enable(xive, SPAPR_IRQ_IPI + i, false);
+    }
+
+    return xive;
+}
+
+static void spapr_irq_init_xive(sPAPRMachineState *spapr, int nr_irqs,
+                                int nr_servers, Error **errp)
+{
+    MachineState *machine = MACHINE(spapr);
+    Error *local_err = NULL;
+
+    /* KVM XIVE support */
+    if (kvm_enabled()) {
+        if (machine_kernel_irqchip_required(machine)) {
+            error_setg(errp, "kernel_irqchip requested. no XIVE support");
+            return;
+        }
+    }
+
+    /* QEMU XIVE support */
+    spapr->xive = spapr_xive_create(spapr, TYPE_SPAPR_XIVE, nr_irqs, nr_servers,
+                                    &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
+                                Error **errp)
+{
+    if (!spapr_xive_irq_enable(spapr->xive, irq, lsi)) {
+        error_setg(errp, "IRQ %d is invalid", irq);
+        return -1;
+    }
+    return 0;
+}
+
+static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
+{
+    int i;
+
+    for (i = irq; i < irq + num; ++i) {
+        spapr_xive_irq_disable(spapr->xive, i);
+    }
+}
+
+static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
+{
+    return spapr_xive_qirq(spapr->xive, irq);
+}
+
+static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
+                                      Monitor *mon)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
+    }
+
+    spapr_xive_pic_print_info(spapr->xive, mon);
+}
+
+/*
+ * XIVE uses the full IRQ number space. Set it to 8K to be compatible
+ * with XICS.
+ */
+
+#define SPAPR_IRQ_XIVE_NR_IRQS     0x2000
+#define SPAPR_IRQ_XIVE_NR_MSIS     (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
+
+sPAPRIrq spapr_irq_xive = {
+    .nr_irqs     = SPAPR_IRQ_XIVE_NR_IRQS,
+    .nr_msis     = SPAPR_IRQ_XIVE_NR_MSIS,
+
+    .init        = spapr_irq_init_xive,
+    .claim       = spapr_irq_claim_xive,
+    .free        = spapr_irq_free_xive,
+    .qirq        = spapr_qirq_xive,
+    .print_info  = spapr_irq_print_info_xive,
+};
+
 /*
  * sPAPR IRQ frontend routines for devices
  */
-void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
+void spapr_irq_init(sPAPRMachineState *spapr, int nr_servers, Error **errp)
 {
     sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 
@@ -216,7 +329,7 @@  void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
         spapr_irq_msi_init(spapr, smc->irq->nr_msis);
     }
 
-    smc->irq->init(spapr, smc->irq->nr_irqs, errp);
+    smc->irq->init(spapr, smc->irq->nr_irqs, nr_servers, errp);
 }
 
 int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp)