diff mbox series

[1/8] spapr: introduce an IRQ allocator at the machine level

Message ID 20171029181217.9927-2-clg@kaod.org
State New
Headers show
Series [1/8] spapr: introduce an IRQ allocator at the machine level | expand

Commit Message

Cédric Le Goater Oct. 29, 2017, 6:12 p.m. UTC
Currently, the ICSState 'ics' object of the sPAPR machine acts as the
global interrupt source handler and also as the IRQ number allocator
for the machine. Some IRQ numbers are allocated very early in the
machine initialization sequence to populate the device tree, and this
is a problem to introduce the new POWER XIVE interrupt model, as it
needs to share the IRQ numbers with the older model.

To prepare ground for XIVE, here is a proposal adding a set of new
XICSFabric operations to let the machine handle directly the IRQ
number allocation and to decorrelate the allocation from the interrupt
source object :

    bool (*irq_test)(XICSFabric *xi, int irq);
    int (*irq_alloc_block)(XICSFabric *xi, int count, int align);
    void (*irq_free_block)(XICSFabric *xi, int irq, int num);

In these prototypes, the 'irq' parameter refers to a number in the
global IRQ number space. Indexes for arrays storing different state
informations on the interrupts, like the ICSIRQState, are named
'srcno'.

On the sPAPR platform, these IRQ operations are simply backed by a
bitmap 'irq_map' in the machine.

'irq_base' is a base number in sync with the ICSState 'offset'. It
lets us allocate only the subset of the IRQ numbers used on the sPAPR
platform but we could also choose to waste some extra bytes (512) and
allocate the whole number space. 'nr_irqs' is the total number of
IRQs, required to manipulate the bitmap.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/intc/xics.c         |  3 ++-
 hw/intc/xics_spapr.c   | 57 ++++++++++++----------------------------------
 hw/ppc/spapr.c         | 62 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/hw/ppc/spapr.h |  4 ++++
 include/hw/ppc/xics.h  |  4 ++++
 5 files changed, 85 insertions(+), 45 deletions(-)

Comments

Greg Kurz Nov. 8, 2017, 9:54 a.m. UTC | #1
On Sun, 29 Oct 2017 19:12:10 +0100
Cédric Le Goater <clg@kaod.org> wrote:

> Currently, the ICSState 'ics' object of the sPAPR machine acts as the
> global interrupt source handler and also as the IRQ number allocator
> for the machine. Some IRQ numbers are allocated very early in the
> machine initialization sequence to populate the device tree, and this
> is a problem to introduce the new POWER XIVE interrupt model, as it
> needs to share the IRQ numbers with the older model.
> 
> To prepare ground for XIVE, here is a proposal adding a set of new
> XICSFabric operations to let the machine handle directly the IRQ
> number allocation and to decorrelate the allocation from the interrupt
> source object :
> 
>     bool (*irq_test)(XICSFabric *xi, int irq);
>     int (*irq_alloc_block)(XICSFabric *xi, int count, int align);
>     void (*irq_free_block)(XICSFabric *xi, int irq, int num);
> 
> In these prototypes, the 'irq' parameter refers to a number in the
> global IRQ number space. Indexes for arrays storing different state
> informations on the interrupts, like the ICSIRQState, are named
> 'srcno'.
> 
> On the sPAPR platform, these IRQ operations are simply backed by a
> bitmap 'irq_map' in the machine.
> 
> 'irq_base' is a base number in sync with the ICSState 'offset'. It
> lets us allocate only the subset of the IRQ numbers used on the sPAPR
> platform but we could also choose to waste some extra bytes (512) and
> allocate the whole number space. 'nr_irqs' is the total number of
> IRQs, required to manipulate the bitmap.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---

Hi,

The idea makes sense but this patch brings too many changes IMHO.

>  hw/intc/xics.c         |  3 ++-
>  hw/intc/xics_spapr.c   | 57 ++++++++++++----------------------------------
>  hw/ppc/spapr.c         | 62 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/hw/ppc/spapr.h |  4 ++++
>  include/hw/ppc/xics.h  |  4 ++++
>  5 files changed, 85 insertions(+), 45 deletions(-)
> 
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index cc9816e7f204..2c4899f278e2 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -53,6 +53,7 @@ void icp_pic_print_info(ICPState *icp, Monitor *mon)
>  void ics_pic_print_info(ICSState *ics, Monitor *mon)
>  {
>      uint32_t i;
> +    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>  
>      monitor_printf(mon, "ICS %4x..%4x %p\n",
>                     ics->offset, ics->offset + ics->nr_irqs - 1, ics);
> @@ -64,7 +65,7 @@ void ics_pic_print_info(ICSState *ics, Monitor *mon)
>      for (i = 0; i < ics->nr_irqs; i++) {
>          ICSIRQState *irq = ics->irqs + i;
>  
> -        if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
> +        if (!xic->irq_test(ics->xics, i + ics->offset)) {
>              continue;
>          }
>          monitor_printf(mon, "  %4x %s %02x %02x\n",
> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
> index d98ea8b13068..f2e20bca5b2e 100644
> --- a/hw/intc/xics_spapr.c
> +++ b/hw/intc/xics_spapr.c
> @@ -245,50 +245,23 @@ void xics_spapr_init(sPAPRMachineState *spapr)
>      spapr_register_hypercall(H_IPOLL, h_ipoll);
>  }
>  
> -#define ICS_IRQ_FREE(ics, srcno)   \
> -    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
> -
> -static int ics_find_free_block(ICSState *ics, int num, int alignnum)
> -{
> -    int first, i;
> -
> -    for (first = 0; first < ics->nr_irqs; first += alignnum) {
> -        if (num > (ics->nr_irqs - first)) {
> -            return -1;
> -        }
> -        for (i = first; i < first + num; ++i) {
> -            if (!ICS_IRQ_FREE(ics, i)) {
> -                break;
> -            }
> -        }
> -        if (i == (first + num)) {
> -            return first;
> -        }
> -    }
> -
> -    return -1;
> -}
> -
>  int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
>  {
>      int irq;
> +    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>  
> -    if (!ics) {
> -        return -1;
> -    }

If spapr_ics_alloc() is never called with ics == NULL, then you
should assert. Also, this looks like this deserves a separate
patch.

>      if (irq_hint) {
> -        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
> +        if (xic->irq_test(ics->xics, irq_hint)) {
>              error_setg(errp, "can't allocate IRQ %d: already in use", irq_hint);
>              return -1;
>          }
>          irq = irq_hint;
>      } else {
> -        irq = ics_find_free_block(ics, 1, 1);
> +        irq = xic->irq_alloc_block(ics->xics, 1, 0);
>          if (irq < 0) {
>              error_setg(errp, "can't allocate IRQ: no IRQ left");
>              return -1;
>          }
> -        irq += ics->offset;
>      }
>  
>      ics_set_irq_type(ics, irq - ics->offset, lsi);
> @@ -305,10 +278,8 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
>                            bool align, Error **errp)
>  {
>      int i, first = -1;
> +    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>  
> -    if (!ics) {
> -        return -1;
> -    }
>  

Ditto.

>      /*
>       * MSIMesage::data is used for storing VIRQ so
> @@ -320,9 +291,9 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
>      if (align) {
>          assert((num == 1) || (num == 2) || (num == 4) ||
>                 (num == 8) || (num == 16) || (num == 32));
> -        first = ics_find_free_block(ics, num, num);
> +        first = xic->irq_alloc_block(ics->xics, num, num);
>      } else {
> -        first = ics_find_free_block(ics, num, 1);
> +        first = xic->irq_alloc_block(ics->xics, num, 0);
>      }
>      if (first < 0) {
>          error_setg(errp, "can't find a free %d-IRQ block", num);
> @@ -331,25 +302,25 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
>  
>      if (first >= 0) {

It looks like this check isn't needed since we return in the first < 0 case.
Maybe you can fix this in a preliminary patch ?

>          for (i = first; i < first + num; ++i) {
> -            ics_set_irq_type(ics, i, lsi);
> +            ics_set_irq_type(ics, i - ics->offset, lsi);
>          }
>      }
> -    first += ics->offset;
>  
>      trace_xics_alloc_block(first, num, lsi, align);
>  
>      return first;
>  }
>  
> -static void ics_free(ICSState *ics, int srcno, int num)
> +static void ics_free(ICSState *ics, int irq, int num)
>  {
>      int i;
> +    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>  
> -    for (i = srcno; i < srcno + num; ++i) {
> -        if (ICS_IRQ_FREE(ics, i)) {
> -            trace_xics_ics_free_warn(0, i + ics->offset);
> +    for (i = irq; i < irq + num; ++i) {
> +        if (xic->irq_test(ics->xics, i)) {
> +            trace_xics_ics_free_warn(0, i);
>          }
> -        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
> +        xic->irq_free_block(ics->xics, i, 1);
>      }
>  }
>  
> @@ -357,7 +328,7 @@ void spapr_ics_free(ICSState *ics, int irq, int num)
>  {
>      if (ics_valid_irq(ics, irq)) {
>          trace_xics_ics_free(0, irq, num);
> -        ics_free(ics, irq - ics->offset, num);
> +        ics_free(ics, irq, num);
>      }
>  }
>  
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d682f013d422..88da4bad2328 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1681,6 +1681,24 @@ static const VMStateDescription vmstate_spapr_patb_entry = {
>      },
>  };
>  
> +static bool spapr_irq_map_needed(void *opaque)
> +{
> +    sPAPRMachineState *spapr = opaque;
> +
> +    return find_first_bit(spapr->irq_map, spapr->nr_irqs) < spapr->nr_irqs;
> +}

This will allow migration from older QEMU. Maybe you can add a machine property
so that the subsection is only generated for newer pseries, and you'll support
migration to older QEMU (see details at the end of === Subsections === in
docs/devel/migration.txt).

> +
> +static const VMStateDescription vmstate_spapr_irq_map = {
> +    .name = "spapr_irq_map",
> +    .version_id = 0,
> +    .minimum_version_id = 0,
> +    .needed = spapr_irq_map_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, nr_irqs),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
>  static const VMStateDescription vmstate_spapr = {
>      .name = "spapr",
>      .version_id = 3,
> @@ -1700,6 +1718,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_ov5_cas,
>          &vmstate_spapr_patb_entry,
>          &vmstate_spapr_pending_events,
> +        &vmstate_spapr_irq_map,
>          NULL
>      }
>  };
> @@ -2337,8 +2356,13 @@ static void ppc_spapr_init(MachineState *machine)
>      /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
>      load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
>  
> +    /* Initialize the IRQ allocator */
> +    spapr->nr_irqs  = XICS_IRQS_SPAPR;
> +    spapr->irq_map  = bitmap_new(spapr->nr_irqs);
> +    spapr->irq_base = XICS_IRQ_BASE;
> +
>      /* Set up Interrupt Controller before we create the VCPUs */
> -    xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal);
> +    xics_system_init(machine, spapr->nr_irqs, &error_fatal);
>  
>      /* Set up containers for ibm,client-architecture-support negotiated options
>       */
> @@ -3536,6 +3560,38 @@ static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id)
>      return cpu ? ICP(cpu->intc) : NULL;
>  }
>  
> +static bool spapr_irq_test(XICSFabric *xi, int irq)
> +{
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
> +    int srcno = irq - spapr->irq_base;
> +
> +    return test_bit(srcno, spapr->irq_map);
> +}
> +
> +static int spapr_irq_alloc_block(XICSFabric *xi, int count, int align)
> +{
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
> +    int start = 0;
> +    int srcno;
> +
> +    srcno = bitmap_find_next_zero_area(spapr->irq_map, spapr->nr_irqs, start,
> +                                       count, align);
> +    if (srcno == spapr->nr_irqs) {
> +        return -1;
> +    }
> +
> +    bitmap_set(spapr->irq_map, srcno, count);
> +    return srcno + spapr->irq_base;
> +}
> +
> +static void spapr_irq_free_block(XICSFabric *xi, int irq, int num)
> +{
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
> +    int srcno = irq - spapr->irq_base;
> +
> +    bitmap_clear(spapr->irq_map, srcno, num);
> +}
> +
>  static void spapr_pic_print_info(InterruptStatsProvider *obj,
>                                   Monitor *mon)
>  {
> @@ -3630,6 +3686,10 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      xic->ics_get = spapr_ics_get;
>      xic->ics_resend = spapr_ics_resend;
>      xic->icp_get = spapr_icp_get;
> +    xic->irq_test = spapr_irq_test;
> +    xic->irq_alloc_block = spapr_irq_alloc_block;
> +    xic->irq_free_block = spapr_irq_free_block;
> +
>      ispc->print_info = spapr_pic_print_info;
>      /* Force NUMA node memory size to be a multiple of
>       * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 9d21ca9bde3a..b962bfe09bb5 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -7,6 +7,7 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "hw/mem/pc-dimm.h"
>  #include "hw/ppc/spapr_ovec.h"
> +#include "qemu/bitmap.h"
>  
>  struct VIOsPAPRBus;
>  struct sPAPRPHBState;
> @@ -78,6 +79,9 @@ struct sPAPRMachineState {
>      struct VIOsPAPRBus *vio_bus;
>      QLIST_HEAD(, sPAPRPHBState) phbs;
>      struct sPAPRNVRAM *nvram;
> +    int32_t nr_irqs;
> +    unsigned long *irq_map;
> +    uint32_t irq_base;
>      ICSState *ics;
>      sPAPRRTCState rtc;
>  
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 28d248abad61..30e7f2e0a7dd 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -175,6 +175,10 @@ typedef struct XICSFabricClass {
>      ICSState *(*ics_get)(XICSFabric *xi, int irq);
>      void (*ics_resend)(XICSFabric *xi);
>      ICPState *(*icp_get)(XICSFabric *xi, int server);
> +    /* IRQ allocator helpers */
> +    bool (*irq_test)(XICSFabric *xi, int irq);
> +    int (*irq_alloc_block)(XICSFabric *xi, int count, int align);
> +    void (*irq_free_block)(XICSFabric *xi, int irq, int num);

API looks good to me. I suggest you introduce it in a dedicated patch, and
change the allocator implementation in another patch.

>  } XICSFabricClass;
>  
>  #define XICS_IRQS_SPAPR               1024
Cédric Le Goater Nov. 8, 2017, 4:56 p.m. UTC | #2
On 11/08/2017 09:54 AM, Greg Kurz wrote:
> On Sun, 29 Oct 2017 19:12:10 +0100
> Cédric Le Goater <clg@kaod.org> wrote:
> 
>> Currently, the ICSState 'ics' object of the sPAPR machine acts as the
>> global interrupt source handler and also as the IRQ number allocator
>> for the machine. Some IRQ numbers are allocated very early in the
>> machine initialization sequence to populate the device tree, and this
>> is a problem to introduce the new POWER XIVE interrupt model, as it
>> needs to share the IRQ numbers with the older model.
>>
>> To prepare ground for XIVE, here is a proposal adding a set of new
>> XICSFabric operations to let the machine handle directly the IRQ
>> number allocation and to decorrelate the allocation from the interrupt
>> source object :
>>
>>     bool (*irq_test)(XICSFabric *xi, int irq);
>>     int (*irq_alloc_block)(XICSFabric *xi, int count, int align);
>>     void (*irq_free_block)(XICSFabric *xi, int irq, int num);
>>
>> In these prototypes, the 'irq' parameter refers to a number in the
>> global IRQ number space. Indexes for arrays storing different state
>> informations on the interrupts, like the ICSIRQState, are named
>> 'srcno'.
>>
>> On the sPAPR platform, these IRQ operations are simply backed by a
>> bitmap 'irq_map' in the machine.
>>
>> 'irq_base' is a base number in sync with the ICSState 'offset'. It
>> lets us allocate only the subset of the IRQ numbers used on the sPAPR
>> platform but we could also choose to waste some extra bytes (512) and
>> allocate the whole number space. 'nr_irqs' is the total number of
>> IRQs, required to manipulate the bitmap.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
> 
> Hi,
> 
> The idea makes sense but this patch brings too many changes IMHO.

yes. I agree. The next version splits a bit more the changes and 
introduces the XICSFabric ops before the bitmap. 
  
>>  hw/intc/xics.c         |  3 ++-
>>  hw/intc/xics_spapr.c   | 57 ++++++++++++----------------------------------
>>  hw/ppc/spapr.c         | 62 +++++++++++++++++++++++++++++++++++++++++++++++++-
>>  include/hw/ppc/spapr.h |  4 ++++
>>  include/hw/ppc/xics.h  |  4 ++++
>>  5 files changed, 85 insertions(+), 45 deletions(-)
>>
>> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
>> index cc9816e7f204..2c4899f278e2 100644
>> --- a/hw/intc/xics.c
>> +++ b/hw/intc/xics.c
>> @@ -53,6 +53,7 @@ void icp_pic_print_info(ICPState *icp, Monitor *mon)
>>  void ics_pic_print_info(ICSState *ics, Monitor *mon)
>>  {
>>      uint32_t i;
>> +    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>>  
>>      monitor_printf(mon, "ICS %4x..%4x %p\n",
>>                     ics->offset, ics->offset + ics->nr_irqs - 1, ics);
>> @@ -64,7 +65,7 @@ void ics_pic_print_info(ICSState *ics, Monitor *mon)
>>      for (i = 0; i < ics->nr_irqs; i++) {
>>          ICSIRQState *irq = ics->irqs + i;
>>  
>> -        if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
>> +        if (!xic->irq_test(ics->xics, i + ics->offset)) {
>>              continue;
>>          }
>>          monitor_printf(mon, "  %4x %s %02x %02x\n",
>> diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
>> index d98ea8b13068..f2e20bca5b2e 100644
>> --- a/hw/intc/xics_spapr.c
>> +++ b/hw/intc/xics_spapr.c
>> @@ -245,50 +245,23 @@ void xics_spapr_init(sPAPRMachineState *spapr)
>>      spapr_register_hypercall(H_IPOLL, h_ipoll);
>>  }
>>  
>> -#define ICS_IRQ_FREE(ics, srcno)   \
>> -    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
>> -
>> -static int ics_find_free_block(ICSState *ics, int num, int alignnum)
>> -{
>> -    int first, i;
>> -
>> -    for (first = 0; first < ics->nr_irqs; first += alignnum) {
>> -        if (num > (ics->nr_irqs - first)) {
>> -            return -1;
>> -        }
>> -        for (i = first; i < first + num; ++i) {
>> -            if (!ICS_IRQ_FREE(ics, i)) {
>> -                break;
>> -            }
>> -        }
>> -        if (i == (first + num)) {
>> -            return first;
>> -        }
>> -    }
>> -
>> -    return -1;
>> -}
>> -
>>  int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
>>  {
>>      int irq;
>> +    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>>  
>> -    if (!ics) {
>> -        return -1;
>> -    }
> 
> If spapr_ics_alloc() is never called with ics == NULL, then you
> should assert. Also, this looks like this deserves a separate
> patch.

yes.
 
>>      if (irq_hint) {
>> -        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
>> +        if (xic->irq_test(ics->xics, irq_hint)) {
>>              error_setg(errp, "can't allocate IRQ %d: already in use", irq_hint);
>>              return -1;
>>          }
>>          irq = irq_hint;
>>      } else {
>> -        irq = ics_find_free_block(ics, 1, 1);
>> +        irq = xic->irq_alloc_block(ics->xics, 1, 0);
>>          if (irq < 0) {
>>              error_setg(errp, "can't allocate IRQ: no IRQ left");
>>              return -1;
>>          }
>> -        irq += ics->offset;
>>      }
>>  
>>      ics_set_irq_type(ics, irq - ics->offset, lsi);
>> @@ -305,10 +278,8 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
>>                            bool align, Error **errp)
>>  {
>>      int i, first = -1;
>> +    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>>  
>> -    if (!ics) {
>> -        return -1;
>> -    }
>>  
> 
> Ditto.
> 
>>      /*
>>       * MSIMesage::data is used for storing VIRQ so
>> @@ -320,9 +291,9 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
>>      if (align) {
>>          assert((num == 1) || (num == 2) || (num == 4) ||
>>                 (num == 8) || (num == 16) || (num == 32));
>> -        first = ics_find_free_block(ics, num, num);
>> +        first = xic->irq_alloc_block(ics->xics, num, num);
>>      } else {
>> -        first = ics_find_free_block(ics, num, 1);
>> +        first = xic->irq_alloc_block(ics->xics, num, 0);
>>      }
>>      if (first < 0) {
>>          error_setg(errp, "can't find a free %d-IRQ block", num);
>> @@ -331,25 +302,25 @@ int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
>>  
>>      if (first >= 0) {
> 
> It looks like this check isn't needed since we return in the first < 0 case.
> Maybe you can fix this in a preliminary patch ?

done.

>>          for (i = first; i < first + num; ++i) {
>> -            ics_set_irq_type(ics, i, lsi);
>> +            ics_set_irq_type(ics, i - ics->offset, lsi);
>>          }
>>      }
>> -    first += ics->offset;
>>  
>>      trace_xics_alloc_block(first, num, lsi, align);
>>  
>>      return first;
>>  }
>>  
>> -static void ics_free(ICSState *ics, int srcno, int num)
>> +static void ics_free(ICSState *ics, int irq, int num)
>>  {
>>      int i;
>> +    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
>>  
>> -    for (i = srcno; i < srcno + num; ++i) {
>> -        if (ICS_IRQ_FREE(ics, i)) {
>> -            trace_xics_ics_free_warn(0, i + ics->offset);
>> +    for (i = irq; i < irq + num; ++i) {
>> +        if (xic->irq_test(ics->xics, i)) {
>> +            trace_xics_ics_free_warn(0, i);
>>          }
>> -        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
>> +        xic->irq_free_block(ics->xics, i, 1);
>>      }
>>  }
>>  
>> @@ -357,7 +328,7 @@ void spapr_ics_free(ICSState *ics, int irq, int num)
>>  {
>>      if (ics_valid_irq(ics, irq)) {
>>          trace_xics_ics_free(0, irq, num);
>> -        ics_free(ics, irq - ics->offset, num);
>> +        ics_free(ics, irq, num);
>>      }
>>  }
>>  
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index d682f013d422..88da4bad2328 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1681,6 +1681,24 @@ static const VMStateDescription vmstate_spapr_patb_entry = {
>>      },
>>  };
>>  
>> +static bool spapr_irq_map_needed(void *opaque)
>> +{
>> +    sPAPRMachineState *spapr = opaque;
>> +
>> +    return find_first_bit(spapr->irq_map, spapr->nr_irqs) < spapr->nr_irqs;
>> +}
> 
> This will allow migration from older QEMU. Maybe you can add a machine property
> so that the subsection is only generated for newer pseries, and you'll support
> migration to older QEMU (see details at the end of === Subsections === in
> docs/devel/migration.txt).

I have found a better way to save state. We will discuss it in the next round.

>> +
>> +static const VMStateDescription vmstate_spapr_irq_map = {
>> +    .name = "spapr_irq_map",
>> +    .version_id = 0,
>> +    .minimum_version_id = 0,
>> +    .needed = spapr_irq_map_needed,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, nr_irqs),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>>  static const VMStateDescription vmstate_spapr = {
>>      .name = "spapr",
>>      .version_id = 3,
>> @@ -1700,6 +1718,7 @@ static const VMStateDescription vmstate_spapr = {
>>          &vmstate_spapr_ov5_cas,
>>          &vmstate_spapr_patb_entry,
>>          &vmstate_spapr_pending_events,
>> +        &vmstate_spapr_irq_map,
>>          NULL
>>      }
>>  };
>> @@ -2337,8 +2356,13 @@ static void ppc_spapr_init(MachineState *machine)
>>      /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
>>      load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
>>  
>> +    /* Initialize the IRQ allocator */
>> +    spapr->nr_irqs  = XICS_IRQS_SPAPR;
>> +    spapr->irq_map  = bitmap_new(spapr->nr_irqs);
>> +    spapr->irq_base = XICS_IRQ_BASE;
>> +
>>      /* Set up Interrupt Controller before we create the VCPUs */
>> -    xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal);
>> +    xics_system_init(machine, spapr->nr_irqs, &error_fatal);
>>  
>>      /* Set up containers for ibm,client-architecture-support negotiated options
>>       */
>> @@ -3536,6 +3560,38 @@ static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id)
>>      return cpu ? ICP(cpu->intc) : NULL;
>>  }
>>  
>> +static bool spapr_irq_test(XICSFabric *xi, int irq)
>> +{
>> +    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
>> +    int srcno = irq - spapr->irq_base;
>> +
>> +    return test_bit(srcno, spapr->irq_map);
>> +}
>> +
>> +static int spapr_irq_alloc_block(XICSFabric *xi, int count, int align)
>> +{
>> +    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
>> +    int start = 0;
>> +    int srcno;
>> +
>> +    srcno = bitmap_find_next_zero_area(spapr->irq_map, spapr->nr_irqs, start,
>> +                                       count, align);
>> +    if (srcno == spapr->nr_irqs) {
>> +        return -1;
>> +    }
>> +
>> +    bitmap_set(spapr->irq_map, srcno, count);
>> +    return srcno + spapr->irq_base;
>> +}
>> +
>> +static void spapr_irq_free_block(XICSFabric *xi, int irq, int num)
>> +{
>> +    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
>> +    int srcno = irq - spapr->irq_base;
>> +
>> +    bitmap_clear(spapr->irq_map, srcno, num);
>> +}
>> +
>>  static void spapr_pic_print_info(InterruptStatsProvider *obj,
>>                                   Monitor *mon)
>>  {
>> @@ -3630,6 +3686,10 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>      xic->ics_get = spapr_ics_get;
>>      xic->ics_resend = spapr_ics_resend;
>>      xic->icp_get = spapr_icp_get;
>> +    xic->irq_test = spapr_irq_test;
>> +    xic->irq_alloc_block = spapr_irq_alloc_block;
>> +    xic->irq_free_block = spapr_irq_free_block;
>> +
>>      ispc->print_info = spapr_pic_print_info;
>>      /* Force NUMA node memory size to be a multiple of
>>       * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 9d21ca9bde3a..b962bfe09bb5 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -7,6 +7,7 @@
>>  #include "hw/ppc/spapr_drc.h"
>>  #include "hw/mem/pc-dimm.h"
>>  #include "hw/ppc/spapr_ovec.h"
>> +#include "qemu/bitmap.h"
>>  
>>  struct VIOsPAPRBus;
>>  struct sPAPRPHBState;
>> @@ -78,6 +79,9 @@ struct sPAPRMachineState {
>>      struct VIOsPAPRBus *vio_bus;
>>      QLIST_HEAD(, sPAPRPHBState) phbs;
>>      struct sPAPRNVRAM *nvram;
>> +    int32_t nr_irqs;
>> +    unsigned long *irq_map;
>> +    uint32_t irq_base;
>>      ICSState *ics;
>>      sPAPRRTCState rtc;
>>  
>> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
>> index 28d248abad61..30e7f2e0a7dd 100644
>> --- a/include/hw/ppc/xics.h
>> +++ b/include/hw/ppc/xics.h
>> @@ -175,6 +175,10 @@ typedef struct XICSFabricClass {
>>      ICSState *(*ics_get)(XICSFabric *xi, int irq);
>>      void (*ics_resend)(XICSFabric *xi);
>>      ICPState *(*icp_get)(XICSFabric *xi, int server);
>> +    /* IRQ allocator helpers */
>> +    bool (*irq_test)(XICSFabric *xi, int irq);
>> +    int (*irq_alloc_block)(XICSFabric *xi, int count, int align);
>> +    void (*irq_free_block)(XICSFabric *xi, int irq, int num);
> 
> API looks good to me. I suggest you introduce it in a dedicated patch, and
> change the allocator implementation in another patch.

yep. 

Thanks

C. 


>>  } XICSFabricClass;
>>  
>>  #define XICS_IRQS_SPAPR               1024
>
diff mbox series

Patch

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index cc9816e7f204..2c4899f278e2 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -53,6 +53,7 @@  void icp_pic_print_info(ICPState *icp, Monitor *mon)
 void ics_pic_print_info(ICSState *ics, Monitor *mon)
 {
     uint32_t i;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
 
     monitor_printf(mon, "ICS %4x..%4x %p\n",
                    ics->offset, ics->offset + ics->nr_irqs - 1, ics);
@@ -64,7 +65,7 @@  void ics_pic_print_info(ICSState *ics, Monitor *mon)
     for (i = 0; i < ics->nr_irqs; i++) {
         ICSIRQState *irq = ics->irqs + i;
 
-        if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+        if (!xic->irq_test(ics->xics, i + ics->offset)) {
             continue;
         }
         monitor_printf(mon, "  %4x %s %02x %02x\n",
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index d98ea8b13068..f2e20bca5b2e 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -245,50 +245,23 @@  void xics_spapr_init(sPAPRMachineState *spapr)
     spapr_register_hypercall(H_IPOLL, h_ipoll);
 }
 
-#define ICS_IRQ_FREE(ics, srcno)   \
-    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
-
-static int ics_find_free_block(ICSState *ics, int num, int alignnum)
-{
-    int first, i;
-
-    for (first = 0; first < ics->nr_irqs; first += alignnum) {
-        if (num > (ics->nr_irqs - first)) {
-            return -1;
-        }
-        for (i = first; i < first + num; ++i) {
-            if (!ICS_IRQ_FREE(ics, i)) {
-                break;
-            }
-        }
-        if (i == (first + num)) {
-            return first;
-        }
-    }
-
-    return -1;
-}
-
 int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
 {
     int irq;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
 
-    if (!ics) {
-        return -1;
-    }
     if (irq_hint) {
-        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
+        if (xic->irq_test(ics->xics, irq_hint)) {
             error_setg(errp, "can't allocate IRQ %d: already in use", irq_hint);
             return -1;
         }
         irq = irq_hint;
     } else {
-        irq = ics_find_free_block(ics, 1, 1);
+        irq = xic->irq_alloc_block(ics->xics, 1, 0);
         if (irq < 0) {
             error_setg(errp, "can't allocate IRQ: no IRQ left");
             return -1;
         }
-        irq += ics->offset;
     }
 
     ics_set_irq_type(ics, irq - ics->offset, lsi);
@@ -305,10 +278,8 @@  int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
                           bool align, Error **errp)
 {
     int i, first = -1;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
 
-    if (!ics) {
-        return -1;
-    }
 
     /*
      * MSIMesage::data is used for storing VIRQ so
@@ -320,9 +291,9 @@  int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
     if (align) {
         assert((num == 1) || (num == 2) || (num == 4) ||
                (num == 8) || (num == 16) || (num == 32));
-        first = ics_find_free_block(ics, num, num);
+        first = xic->irq_alloc_block(ics->xics, num, num);
     } else {
-        first = ics_find_free_block(ics, num, 1);
+        first = xic->irq_alloc_block(ics->xics, num, 0);
     }
     if (first < 0) {
         error_setg(errp, "can't find a free %d-IRQ block", num);
@@ -331,25 +302,25 @@  int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
 
     if (first >= 0) {
         for (i = first; i < first + num; ++i) {
-            ics_set_irq_type(ics, i, lsi);
+            ics_set_irq_type(ics, i - ics->offset, lsi);
         }
     }
-    first += ics->offset;
 
     trace_xics_alloc_block(first, num, lsi, align);
 
     return first;
 }
 
-static void ics_free(ICSState *ics, int srcno, int num)
+static void ics_free(ICSState *ics, int irq, int num)
 {
     int i;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(ics->xics);
 
-    for (i = srcno; i < srcno + num; ++i) {
-        if (ICS_IRQ_FREE(ics, i)) {
-            trace_xics_ics_free_warn(0, i + ics->offset);
+    for (i = irq; i < irq + num; ++i) {
+        if (xic->irq_test(ics->xics, i)) {
+            trace_xics_ics_free_warn(0, i);
         }
-        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
+        xic->irq_free_block(ics->xics, i, 1);
     }
 }
 
@@ -357,7 +328,7 @@  void spapr_ics_free(ICSState *ics, int irq, int num)
 {
     if (ics_valid_irq(ics, irq)) {
         trace_xics_ics_free(0, irq, num);
-        ics_free(ics, irq - ics->offset, num);
+        ics_free(ics, irq, num);
     }
 }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d682f013d422..88da4bad2328 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1681,6 +1681,24 @@  static const VMStateDescription vmstate_spapr_patb_entry = {
     },
 };
 
+static bool spapr_irq_map_needed(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    return find_first_bit(spapr->irq_map, spapr->nr_irqs) < spapr->nr_irqs;
+}
+
+static const VMStateDescription vmstate_spapr_irq_map = {
+    .name = "spapr_irq_map",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .needed = spapr_irq_map_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, nr_irqs),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_spapr = {
     .name = "spapr",
     .version_id = 3,
@@ -1700,6 +1718,7 @@  static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_ov5_cas,
         &vmstate_spapr_patb_entry,
         &vmstate_spapr_pending_events,
+        &vmstate_spapr_irq_map,
         NULL
     }
 };
@@ -2337,8 +2356,13 @@  static void ppc_spapr_init(MachineState *machine)
     /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
     load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
 
+    /* Initialize the IRQ allocator */
+    spapr->nr_irqs  = XICS_IRQS_SPAPR;
+    spapr->irq_map  = bitmap_new(spapr->nr_irqs);
+    spapr->irq_base = XICS_IRQ_BASE;
+
     /* Set up Interrupt Controller before we create the VCPUs */
-    xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal);
+    xics_system_init(machine, spapr->nr_irqs, &error_fatal);
 
     /* Set up containers for ibm,client-architecture-support negotiated options
      */
@@ -3536,6 +3560,38 @@  static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id)
     return cpu ? ICP(cpu->intc) : NULL;
 }
 
+static bool spapr_irq_test(XICSFabric *xi, int irq)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+    int srcno = irq - spapr->irq_base;
+
+    return test_bit(srcno, spapr->irq_map);
+}
+
+static int spapr_irq_alloc_block(XICSFabric *xi, int count, int align)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+    int start = 0;
+    int srcno;
+
+    srcno = bitmap_find_next_zero_area(spapr->irq_map, spapr->nr_irqs, start,
+                                       count, align);
+    if (srcno == spapr->nr_irqs) {
+        return -1;
+    }
+
+    bitmap_set(spapr->irq_map, srcno, count);
+    return srcno + spapr->irq_base;
+}
+
+static void spapr_irq_free_block(XICSFabric *xi, int irq, int num)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+    int srcno = irq - spapr->irq_base;
+
+    bitmap_clear(spapr->irq_map, srcno, num);
+}
+
 static void spapr_pic_print_info(InterruptStatsProvider *obj,
                                  Monitor *mon)
 {
@@ -3630,6 +3686,10 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
     xic->ics_get = spapr_ics_get;
     xic->ics_resend = spapr_ics_resend;
     xic->icp_get = spapr_icp_get;
+    xic->irq_test = spapr_irq_test;
+    xic->irq_alloc_block = spapr_irq_alloc_block;
+    xic->irq_free_block = spapr_irq_free_block;
+
     ispc->print_info = spapr_pic_print_info;
     /* Force NUMA node memory size to be a multiple of
      * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 9d21ca9bde3a..b962bfe09bb5 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -7,6 +7,7 @@ 
 #include "hw/ppc/spapr_drc.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/ppc/spapr_ovec.h"
+#include "qemu/bitmap.h"
 
 struct VIOsPAPRBus;
 struct sPAPRPHBState;
@@ -78,6 +79,9 @@  struct sPAPRMachineState {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
     struct sPAPRNVRAM *nvram;
+    int32_t nr_irqs;
+    unsigned long *irq_map;
+    uint32_t irq_base;
     ICSState *ics;
     sPAPRRTCState rtc;
 
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 28d248abad61..30e7f2e0a7dd 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -175,6 +175,10 @@  typedef struct XICSFabricClass {
     ICSState *(*ics_get)(XICSFabric *xi, int irq);
     void (*ics_resend)(XICSFabric *xi);
     ICPState *(*icp_get)(XICSFabric *xi, int server);
+    /* IRQ allocator helpers */
+    bool (*irq_test)(XICSFabric *xi, int irq);
+    int (*irq_alloc_block)(XICSFabric *xi, int count, int align);
+    void (*irq_free_block)(XICSFabric *xi, int irq, int num);
 } XICSFabricClass;
 
 #define XICS_IRQS_SPAPR               1024