diff mbox series

[v6,2/4] spapr: introduce a fixed IRQ number space

Message ID 20180730141134.31153-3-clg@kaod.org
State New
Headers show
Series spapr: introduce a fixed IRQ number space and an IRQ controller backend | expand

Commit Message

Cédric Le Goater July 30, 2018, 2:11 p.m. UTC
This proposal introduces a new IRQ number space layout using static
numbers for all devices, depending on a device index, and a bitmap
allocator for the MSI IRQ numbers which are negotiated by the guest at
runtime.

As the VIO device model does not have a device index but a "reg"
property, we introduce a formula to compute an IRQ number from a "reg"
value. It should minimize most of the collisions.

The previous layout is kept in pre-3.1 machines raising the
'legacy_irq_allocation' machine class flag.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/spapr.h     |  5 +++
 include/hw/ppc/spapr_irq.h | 32 ++++++++++++++++++
 hw/ppc/spapr.c             | 32 ++++++++++++++++++
 hw/ppc/spapr_events.c      | 12 ++++---
 hw/ppc/spapr_irq.c         | 56 ++++++++++++++++++++++++++++++++
 hw/ppc/spapr_pci.c         | 29 +++++++++++++----
 hw/ppc/spapr_vio.c         | 66 ++++++++++++++++++++++++++++++++++----
 hw/ppc/Makefile.objs       |  2 +-
 8 files changed, 216 insertions(+), 18 deletions(-)
 create mode 100644 include/hw/ppc/spapr_irq.h
 create mode 100644 hw/ppc/spapr_irq.c

Comments

Greg Kurz July 31, 2018, 5:39 p.m. UTC | #1
On Mon, 30 Jul 2018 16:11:32 +0200
Cédric Le Goater <clg@kaod.org> wrote:

> This proposal introduces a new IRQ number space layout using static
> numbers for all devices, depending on a device index, and a bitmap
> allocator for the MSI IRQ numbers which are negotiated by the guest at
> runtime.
> 
> As the VIO device model does not have a device index but a "reg"
> property, we introduce a formula to compute an IRQ number from a "reg"
> value. It should minimize most of the collisions.
> 
> The previous layout is kept in pre-3.1 machines raising the
> 'legacy_irq_allocation' machine class flag.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/spapr.h     |  5 +++
>  include/hw/ppc/spapr_irq.h | 32 ++++++++++++++++++
>  hw/ppc/spapr.c             | 32 ++++++++++++++++++
>  hw/ppc/spapr_events.c      | 12 ++++---
>  hw/ppc/spapr_irq.c         | 56 ++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_pci.c         | 29 +++++++++++++----
>  hw/ppc/spapr_vio.c         | 66 ++++++++++++++++++++++++++++++++++----
>  hw/ppc/Makefile.objs       |  2 +-
>  8 files changed, 216 insertions(+), 18 deletions(-)
>  create mode 100644 include/hw/ppc/spapr_irq.h
>  create mode 100644 hw/ppc/spapr_irq.c
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 7e5de1a6fd42..73067f5ee8aa 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -8,6 +8,7 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "hw/mem/pc-dimm.h"
>  #include "hw/ppc/spapr_ovec.h"
> +#include "hw/ppc/spapr_irq.h"
>  
>  struct VIOsPAPRBus;
>  struct sPAPRPHBState;
> @@ -101,6 +102,8 @@ struct sPAPRMachineClass {
>      bool dr_lmb_enabled;       /* enable dynamic-reconfig/hotplug of LMBs */
>      bool use_ohci_by_default;  /* use USB-OHCI instead of XHCI */
>      bool pre_2_10_has_unused_icps;
> +    bool legacy_irq_allocation;
> +
>      void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index,
>                            uint64_t *buid, hwaddr *pio, 
>                            hwaddr *mmio32, hwaddr *mmio64,
> @@ -167,6 +170,8 @@ struct sPAPRMachineState {
>      char *kvm_type;
>  
>      const char *icp_type;
> +    int32_t irq_map_nr;
> +    unsigned long *irq_map;
>  
>      bool cmd_line_caps[SPAPR_CAP_NUM];
>      sPAPRCapabilities def, eff, mig;
> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> new file mode 100644
> index 000000000000..6f7f50548809
> --- /dev/null
> +++ b/include/hw/ppc/spapr_irq.h
> @@ -0,0 +1,32 @@
> +/*
> + * QEMU PowerPC sPAPR IRQ backend definitions
> + *
> + * Copyright (c) 2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#ifndef HW_SPAPR_IRQ_H
> +#define HW_SPAPR_IRQ_H
> +
> +/*
> + * IRQ range offsets per device type
> + */
> +#define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
> +#define SPAPR_IRQ_HOTPLUG    0x1001
> +#define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
> +#define SPAPR_IRQ_PCI_LSI    0x1200  /* 32+ PHBs devices */
> +
> +#define SPAPR_IRQ_MSI        0x1300  /* Offset of the dynamic range covered
> +                                      * by the bitmap allocator */
> +
> +typedef struct sPAPRMachineState sPAPRMachineState;
> +

Old compilers (GCC < 4.6) might complain about 'redefinition of typedef' if
some file, say hw/ppc/spapr.c, includes both this header and "hw/ppc/xics.h".
We had several build breaks detected by 'make docker-test-build@centos6'...
The correct way to address this would be to move the typedef to the
"qemu/typedefs.h" header.

This being said, docker-test-build@centos6 vanished with commit e7b3af81597,
so I guess we don't support such old distros anymore, and we can live with
duplicate typedefs.

> +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis);
> +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align,
> +                        Error **errp);
> +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num);
> +void spapr_irq_msi_reset(sPAPRMachineState *spapr);
> +
> +#endif
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 3c72173c7e0f..792e24453d8b 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -189,6 +189,11 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
>      sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
>      Error *local_err = NULL;
>  
> +    /* Initialize the MSI IRQ allocator. */
> +    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> +        spapr_irq_msi_init(spapr, XICS_IRQ_BASE + nr_irqs - SPAPR_IRQ_MSI);
> +    }
> +
>      if (kvm_enabled()) {
>          if (machine_kernel_irqchip_allowed(machine) &&
>              !xics_kvm_init(spapr, &local_err)) {
> @@ -1636,6 +1641,10 @@ static void spapr_machine_reset(void)
>          ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal);
>      }
>  
> +    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> +        spapr_irq_msi_reset(spapr);
> +    }
> +
>      qemu_devices_reset();
>  
>      /* DRC reset may cause a device to be unplugged. This will cause troubles
> @@ -1910,6 +1919,24 @@ static const VMStateDescription vmstate_spapr_patb_entry = {
>      },
>  };
>  
> +static bool spapr_irq_map_needed(void *opaque)
> +{
> +    sPAPRMachineState *spapr = opaque;
> +
> +    return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr);
> +}
> +
> +static const VMStateDescription vmstate_spapr_irq_map = {
> +    .name = "spapr_irq_map",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = spapr_irq_map_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
>  static const VMStateDescription vmstate_spapr = {
>      .name = "spapr",
>      .version_id = 3,
> @@ -1937,6 +1964,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_cap_cfpc,
>          &vmstate_spapr_cap_sbbc,
>          &vmstate_spapr_cap_ibs,
> +        &vmstate_spapr_irq_map,
>          NULL
>      }
>  };
> @@ -4085,8 +4113,12 @@ static void spapr_machine_3_0_instance_options(MachineState *machine)
>  
>  static void spapr_machine_3_0_class_options(MachineClass *mc)
>  {
> +    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> +
>      spapr_machine_3_1_class_options(mc);
>      SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_0);
> +
> +    smc->legacy_irq_allocation = true;
>  }
>  
>  DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index e4f5946a2188..32719a1b72d0 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -707,9 +707,11 @@ void spapr_clear_pending_events(sPAPRMachineState *spapr)
>  
>  void spapr_events_init(sPAPRMachineState *spapr)
>  {
> -    int epow_irq;
> +    int epow_irq = SPAPR_IRQ_EPOW;
>  
> -    epow_irq = spapr_irq_findone(spapr, &error_fatal);
> +    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> +        epow_irq = spapr_irq_findone(spapr, &error_fatal);
> +    }
>  
>      spapr_irq_claim(spapr, epow_irq, false, &error_fatal);
>  
> @@ -729,9 +731,11 @@ void spapr_events_init(sPAPRMachineState *spapr)
>       * checking that it's enabled.
>       */
>      if (spapr->use_hotplug_event_source) {
> -        int hp_irq;
> +        int hp_irq = SPAPR_IRQ_HOTPLUG;
>  
> -        hp_irq = spapr_irq_findone(spapr, &error_fatal);
> +        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> +            hp_irq = spapr_irq_findone(spapr, &error_fatal);
> +        }
>  
>          spapr_irq_claim(spapr, hp_irq, false, &error_fatal);
>  
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> new file mode 100644
> index 000000000000..24e9c1d4433c
> --- /dev/null
> +++ b/hw/ppc/spapr_irq.c
> @@ -0,0 +1,56 @@
> +/*
> + * QEMU PowerPC sPAPR IRQ interface
> + *
> + * Copyright (c) 2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "qemu/error-report.h"
> +#include "qapi/error.h"
> +#include "hw/ppc/spapr.h"
> +#include "hw/ppc/xics.h"
> +
> +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis)
> +{
> +    spapr->irq_map_nr = nr_msis;
> +    spapr->irq_map = bitmap_new(spapr->irq_map_nr);
> +}
> +
> +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align,
> +                        Error **errp)
> +{
> +    int irq;
> +
> +    /*
> +     * The 'align_mask' parameter of bitmap_find_next_zero_area()
> +     * should be one less than a power of 2; 0 means no
> +     * alignment. Adapt the 'align' value of the former allocator
> +     * to fit the requirements of bitmap_find_next_zero_area()
> +     */
> +    align -= 1;
> +
> +    irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
> +                                     align);
> +    if (irq == spapr->irq_map_nr) {
> +        error_setg(errp, "can't find a free %d-IRQ block", num);
> +        return -1;
> +    }
> +
> +    bitmap_set(spapr->irq_map, irq, num);
> +
> +    return irq + SPAPR_IRQ_MSI;
> +}
> +
> +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num)
> +{
> +    bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
> +}
> +
> +void spapr_irq_msi_reset(sPAPRMachineState *spapr)
> +{
> +    bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr);
> +}
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 497b896c7d24..3791ced6c536 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -334,6 +334,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>              return;
>          }
>  
> +        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> +            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
> +        }
>          spapr_irq_free(spapr, msi->first_irq, msi->num);
>          if (msi_present(pdev)) {
>              spapr_msi_setmsg(pdev, 0, false, 0, 0);
> @@ -372,7 +375,13 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>      }
>  
>      /* Allocate MSIs */
> -    irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, &err);
> +    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> +        irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
> +                             &err);
> +    } else {
> +        irq = spapr_irq_msi_alloc(spapr, req_num,
> +                                  ret_intr_type == RTAS_TYPE_MSI, &err);
> +    }
>      if (err) {
>          error_reportf_err(err, "Can't allocate MSIs for device %x: ",
>                            config_addr);
> @@ -392,6 +401,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>  
>      /* Release previous MSIs */
>      if (msi) {
> +        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> +            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
> +        }

SPAPR_MACHINE_GET_CLASS() does all the recursive type checking, and you
call it three times. Even if this isn't a hot path, maybe cache this in
an smc variable at the beginning of the function as we do pretty much
everywhere else. Also this would give prettier code IMHO.

>          spapr_irq_free(spapr, msi->first_irq, msi->num);
>          g_hash_table_remove(phb->msi, &config_addr);
>      }
> @@ -1705,14 +1717,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
>  
>      /* Initialize the LSI table */
>      for (i = 0; i < PCI_NUM_PINS; i++) {
> -        uint32_t irq;
> +        uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
>          Error *local_err = NULL;
>  
> -        irq = spapr_irq_findone(spapr, &local_err);
> -        if (local_err) {
> -            error_propagate(errp, local_err);
> -            error_prepend(errp, "can't allocate LSIs: ");
> -            return;
> +        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {

Same remark. There's another SPAPR_MACHINE_GET_CLASS() user in this
function.

> +            irq = spapr_irq_findone(spapr, &local_err);
> +            if (local_err) {
> +                error_propagate(errp, local_err);
> +                error_prepend(errp, "can't allocate LSIs: ");
> +                return;
> +            }
>          }
>  
>          spapr_irq_claim(spapr, irq, true, &local_err);
> @@ -2123,6 +2137,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>      _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
>      _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
>      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
> +    /* TODO: fine tune the total count of allocatable MSIs per PHB */
>      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR));
>  
>      /* Dynamic DMA window */
> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
> index be9af71437cc..840d4a3c451c 100644
> --- a/hw/ppc/spapr_vio.c
> +++ b/hw/ppc/spapr_vio.c
> @@ -37,12 +37,13 @@
>  
>  #include "hw/ppc/spapr.h"
>  #include "hw/ppc/spapr_vio.h"
> -#include "hw/ppc/xics.h"
>  #include "hw/ppc/fdt.h"
>  #include "trace.h"
>  
>  #include <libfdt.h>
>  
> +#define SPAPR_VIO_REG_BASE 0x71000000
> +
>  static void spapr_vio_get_irq(Object *obj, Visitor *v, const char *name,
>                                void *opaque, Error **errp)
>  {
> @@ -445,6 +446,55 @@ static void spapr_vio_busdev_reset(DeviceState *qdev)
>      }
>  }
>  
> +/*
> + * The register property of a VIO device is defined in livirt using
> + * 0x1000 as a base register number plus a 0x1000 increment. For the
> + * VIO tty device, the base number is changed to 0x30000000. QEMU uses
> + * a base register number of 0x71000000 and then a simple increment.
> + *
> + * The formula below tries to compute a unique index number from the
> + * register value that will be used to define the IRQ number of the
> + * VIO device.
> + *
> + * A maximum of 256 VIO devices is covered. Collisions are possible
> + * but they will be detected when the IRQ is claimed.
> + */
> +static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg)
> +{
> +    uint32_t irq;
> +
> +    if (reg >= SPAPR_VIO_REG_BASE) {
> +        /*
> +         * VIO device register values when allocated by QEMU. For
> +         * these, we simply mask the high bits to fit the overall
> +         * range: [0x00 - 0xff].
> +         *
> +         * The nvram VIO device (reg=0x71000000) is a static device of
> +         * the pseries machine and so is always allocated by QEMU. Its
> +         * IRQ number is 0x0.
> +         */
> +        irq = reg & 0xff;
> +
> +    } else if (reg >= 0x30000000) {
> +        /*
> +         * VIO tty devices register values, when allocated by livirt,
> +         * are mapped in range [0xf0 - 0xff], gives us a maximum of 16
> +         * vtys.
> +         */
> +        irq = 0xf0 | ((reg >> 12) & 0xf);
> +
> +    } else {
> +        /*
> +         * Other VIO devices register values, when allocated by
> +         * livirt, should be mapped in range [0x00 - 0xef]. Conflicts
> +         * will be detected when IRQ is claimed.
> +         */
> +        irq = (reg >> 12) & 0xff;
> +    }
> +

Nice formula :)

The patch looks quite good to me, and my remarks about SPAPR_MACHINE_GET_CLASS()
can be addressed in a followup, so:

Reviewed-by: Greg Kurz <groug@kaod.org>

> +    return SPAPR_IRQ_VIO | irq;
> +}
> +
>  static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
>  {
>      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> @@ -485,10 +535,14 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
>      }
>  
>      if (!dev->irq) {
> -        dev->irq = spapr_irq_findone(spapr, &local_err);
> -        if (local_err) {
> -            error_propagate(errp, local_err);
> -            return;
> +        dev->irq = spapr_vio_reg_to_irq(dev->reg);
> +
> +        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> +            dev->irq = spapr_irq_findone(spapr, &local_err);
> +            if (local_err) {
> +                error_propagate(errp, local_err);
> +                return;
> +            }
>          }
>      }
>  
> @@ -557,7 +611,7 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
>      /* Create bus on bridge device */
>      qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
>      bus = SPAPR_VIO_BUS(qbus);
> -    bus->next_reg = 0x71000000;
> +    bus->next_reg = SPAPR_VIO_REG_BASE;
>  
>      /* hcall-vio */
>      spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index bcab6323b7ed..4ab556467289 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -4,7 +4,7 @@ obj-y += ppc.o ppc_booke.o fdt.o
>  obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o
>  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
> -obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o
> +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o
>  # IBM PowerNV
>  obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
David Gibson Aug. 1, 2018, 6:38 a.m. UTC | #2
On Tue, Jul 31, 2018 at 07:39:45PM +0200, Greg Kurz wrote:
> On Mon, 30 Jul 2018 16:11:32 +0200
> Cédric Le Goater <clg@kaod.org> wrote:
> 
> > This proposal introduces a new IRQ number space layout using static
> > numbers for all devices, depending on a device index, and a bitmap
> > allocator for the MSI IRQ numbers which are negotiated by the guest at
> > runtime.
> > 
> > As the VIO device model does not have a device index but a "reg"
> > property, we introduce a formula to compute an IRQ number from a "reg"
> > value. It should minimize most of the collisions.
> > 
> > The previous layout is kept in pre-3.1 machines raising the
> > 'legacy_irq_allocation' machine class flag.
> > 
> > Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > ---
> >  include/hw/ppc/spapr.h     |  5 +++
> >  include/hw/ppc/spapr_irq.h | 32 ++++++++++++++++++
> >  hw/ppc/spapr.c             | 32 ++++++++++++++++++
> >  hw/ppc/spapr_events.c      | 12 ++++---
> >  hw/ppc/spapr_irq.c         | 56 ++++++++++++++++++++++++++++++++
> >  hw/ppc/spapr_pci.c         | 29 +++++++++++++----
> >  hw/ppc/spapr_vio.c         | 66 ++++++++++++++++++++++++++++++++++----
> >  hw/ppc/Makefile.objs       |  2 +-
> >  8 files changed, 216 insertions(+), 18 deletions(-)
> >  create mode 100644 include/hw/ppc/spapr_irq.h
> >  create mode 100644 hw/ppc/spapr_irq.c
> > 
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 7e5de1a6fd42..73067f5ee8aa 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -8,6 +8,7 @@
> >  #include "hw/ppc/spapr_drc.h"
> >  #include "hw/mem/pc-dimm.h"
> >  #include "hw/ppc/spapr_ovec.h"
> > +#include "hw/ppc/spapr_irq.h"
> >  
> >  struct VIOsPAPRBus;
> >  struct sPAPRPHBState;
> > @@ -101,6 +102,8 @@ struct sPAPRMachineClass {
> >      bool dr_lmb_enabled;       /* enable dynamic-reconfig/hotplug of LMBs */
> >      bool use_ohci_by_default;  /* use USB-OHCI instead of XHCI */
> >      bool pre_2_10_has_unused_icps;
> > +    bool legacy_irq_allocation;
> > +
> >      void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index,
> >                            uint64_t *buid, hwaddr *pio, 
> >                            hwaddr *mmio32, hwaddr *mmio64,
> > @@ -167,6 +170,8 @@ struct sPAPRMachineState {
> >      char *kvm_type;
> >  
> >      const char *icp_type;
> > +    int32_t irq_map_nr;
> > +    unsigned long *irq_map;
> >  
> >      bool cmd_line_caps[SPAPR_CAP_NUM];
> >      sPAPRCapabilities def, eff, mig;
> > diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> > new file mode 100644
> > index 000000000000..6f7f50548809
> > --- /dev/null
> > +++ b/include/hw/ppc/spapr_irq.h
> > @@ -0,0 +1,32 @@
> > +/*
> > + * QEMU PowerPC sPAPR IRQ backend definitions
> > + *
> > + * Copyright (c) 2018, IBM Corporation.
> > + *
> > + * This code is licensed under the GPL version 2 or later. See the
> > + * COPYING file in the top-level directory.
> > + */
> > +
> > +#ifndef HW_SPAPR_IRQ_H
> > +#define HW_SPAPR_IRQ_H
> > +
> > +/*
> > + * IRQ range offsets per device type
> > + */
> > +#define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
> > +#define SPAPR_IRQ_HOTPLUG    0x1001
> > +#define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
> > +#define SPAPR_IRQ_PCI_LSI    0x1200  /* 32+ PHBs devices */
> > +
> > +#define SPAPR_IRQ_MSI        0x1300  /* Offset of the dynamic range covered
> > +                                      * by the bitmap allocator */
> > +
> > +typedef struct sPAPRMachineState sPAPRMachineState;
> > +
> 
> Old compilers (GCC < 4.6) might complain about 'redefinition of typedef' if
> some file, say hw/ppc/spapr.c, includes both this header and "hw/ppc/xics.h".
> We had several build breaks detected by 'make docker-test-build@centos6'...
> The correct way to address this would be to move the typedef to the
> "qemu/typedefs.h" header.
> 
> This being said, docker-test-build@centos6 vanished with commit e7b3af81597,
> so I guess we don't support such old distros anymore, and we can live with
> duplicate typedefs.
> 
> > +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis);
> > +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align,
> > +                        Error **errp);
> > +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num);
> > +void spapr_irq_msi_reset(sPAPRMachineState *spapr);
> > +
> > +#endif
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 3c72173c7e0f..792e24453d8b 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -189,6 +189,11 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
> >      sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
> >      Error *local_err = NULL;
> >  
> > +    /* Initialize the MSI IRQ allocator. */
> > +    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> > +        spapr_irq_msi_init(spapr, XICS_IRQ_BASE + nr_irqs - SPAPR_IRQ_MSI);
> > +    }
> > +
> >      if (kvm_enabled()) {
> >          if (machine_kernel_irqchip_allowed(machine) &&
> >              !xics_kvm_init(spapr, &local_err)) {
> > @@ -1636,6 +1641,10 @@ static void spapr_machine_reset(void)
> >          ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal);
> >      }
> >  
> > +    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> > +        spapr_irq_msi_reset(spapr);
> > +    }
> > +
> >      qemu_devices_reset();
> >  
> >      /* DRC reset may cause a device to be unplugged. This will cause troubles
> > @@ -1910,6 +1919,24 @@ static const VMStateDescription vmstate_spapr_patb_entry = {
> >      },
> >  };
> >  
> > +static bool spapr_irq_map_needed(void *opaque)
> > +{
> > +    sPAPRMachineState *spapr = opaque;
> > +
> > +    return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr);
> > +}
> > +
> > +static const VMStateDescription vmstate_spapr_irq_map = {
> > +    .name = "spapr_irq_map",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +    .needed = spapr_irq_map_needed,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr),
> > +        VMSTATE_END_OF_LIST()
> > +    },
> > +};
> > +
> >  static const VMStateDescription vmstate_spapr = {
> >      .name = "spapr",
> >      .version_id = 3,
> > @@ -1937,6 +1964,7 @@ static const VMStateDescription vmstate_spapr = {
> >          &vmstate_spapr_cap_cfpc,
> >          &vmstate_spapr_cap_sbbc,
> >          &vmstate_spapr_cap_ibs,
> > +        &vmstate_spapr_irq_map,
> >          NULL
> >      }
> >  };
> > @@ -4085,8 +4113,12 @@ static void spapr_machine_3_0_instance_options(MachineState *machine)
> >  
> >  static void spapr_machine_3_0_class_options(MachineClass *mc)
> >  {
> > +    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> > +
> >      spapr_machine_3_1_class_options(mc);
> >      SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_0);
> > +
> > +    smc->legacy_irq_allocation = true;
> >  }
> >  
> >  DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
> > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > index e4f5946a2188..32719a1b72d0 100644
> > --- a/hw/ppc/spapr_events.c
> > +++ b/hw/ppc/spapr_events.c
> > @@ -707,9 +707,11 @@ void spapr_clear_pending_events(sPAPRMachineState *spapr)
> >  
> >  void spapr_events_init(sPAPRMachineState *spapr)
> >  {
> > -    int epow_irq;
> > +    int epow_irq = SPAPR_IRQ_EPOW;
> >  
> > -    epow_irq = spapr_irq_findone(spapr, &error_fatal);
> > +    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> > +        epow_irq = spapr_irq_findone(spapr, &error_fatal);
> > +    }
> >  
> >      spapr_irq_claim(spapr, epow_irq, false, &error_fatal);
> >  
> > @@ -729,9 +731,11 @@ void spapr_events_init(sPAPRMachineState *spapr)
> >       * checking that it's enabled.
> >       */
> >      if (spapr->use_hotplug_event_source) {
> > -        int hp_irq;
> > +        int hp_irq = SPAPR_IRQ_HOTPLUG;
> >  
> > -        hp_irq = spapr_irq_findone(spapr, &error_fatal);
> > +        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> > +            hp_irq = spapr_irq_findone(spapr, &error_fatal);
> > +        }
> >  
> >          spapr_irq_claim(spapr, hp_irq, false, &error_fatal);
> >  
> > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> > new file mode 100644
> > index 000000000000..24e9c1d4433c
> > --- /dev/null
> > +++ b/hw/ppc/spapr_irq.c
> > @@ -0,0 +1,56 @@
> > +/*
> > + * QEMU PowerPC sPAPR IRQ interface
> > + *
> > + * Copyright (c) 2018, IBM Corporation.
> > + *
> > + * This code is licensed under the GPL version 2 or later. See the
> > + * COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/log.h"
> > +#include "qemu/error-report.h"
> > +#include "qapi/error.h"
> > +#include "hw/ppc/spapr.h"
> > +#include "hw/ppc/xics.h"
> > +
> > +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis)
> > +{
> > +    spapr->irq_map_nr = nr_msis;
> > +    spapr->irq_map = bitmap_new(spapr->irq_map_nr);
> > +}
> > +
> > +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align,
> > +                        Error **errp)
> > +{
> > +    int irq;
> > +
> > +    /*
> > +     * The 'align_mask' parameter of bitmap_find_next_zero_area()
> > +     * should be one less than a power of 2; 0 means no
> > +     * alignment. Adapt the 'align' value of the former allocator
> > +     * to fit the requirements of bitmap_find_next_zero_area()
> > +     */
> > +    align -= 1;
> > +
> > +    irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
> > +                                     align);
> > +    if (irq == spapr->irq_map_nr) {
> > +        error_setg(errp, "can't find a free %d-IRQ block", num);
> > +        return -1;
> > +    }
> > +
> > +    bitmap_set(spapr->irq_map, irq, num);
> > +
> > +    return irq + SPAPR_IRQ_MSI;
> > +}
> > +
> > +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num)
> > +{
> > +    bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
> > +}
> > +
> > +void spapr_irq_msi_reset(sPAPRMachineState *spapr)
> > +{
> > +    bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr);
> > +}
> > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > index 497b896c7d24..3791ced6c536 100644
> > --- a/hw/ppc/spapr_pci.c
> > +++ b/hw/ppc/spapr_pci.c
> > @@ -334,6 +334,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >              return;
> >          }
> >  
> > +        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> > +            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
> > +        }
> >          spapr_irq_free(spapr, msi->first_irq, msi->num);
> >          if (msi_present(pdev)) {
> >              spapr_msi_setmsg(pdev, 0, false, 0, 0);
> > @@ -372,7 +375,13 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >      }
> >  
> >      /* Allocate MSIs */
> > -    irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, &err);
> > +    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> > +        irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
> > +                             &err);
> > +    } else {
> > +        irq = spapr_irq_msi_alloc(spapr, req_num,
> > +                                  ret_intr_type == RTAS_TYPE_MSI, &err);
> > +    }
> >      if (err) {
> >          error_reportf_err(err, "Can't allocate MSIs for device %x: ",
> >                            config_addr);
> > @@ -392,6 +401,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >  
> >      /* Release previous MSIs */
> >      if (msi) {
> > +        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> > +            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
> > +        }
> 
> SPAPR_MACHINE_GET_CLASS() does all the recursive type checking, and you
> call it three times. Even if this isn't a hot path, maybe cache this in
> an smc variable at the beginning of the function as we do pretty much
> everywhere else. Also this would give prettier code IMHO.

I agree with Greg that this would be a nice improvement, but it can
wait until a followup.

> >          spapr_irq_free(spapr, msi->first_irq, msi->num);
> >          g_hash_table_remove(phb->msi, &config_addr);
> >      }
> > @@ -1705,14 +1717,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
> >  
> >      /* Initialize the LSI table */
> >      for (i = 0; i < PCI_NUM_PINS; i++) {
> > -        uint32_t irq;
> > +        uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
> >          Error *local_err = NULL;
> >  
> > -        irq = spapr_irq_findone(spapr, &local_err);
> > -        if (local_err) {
> > -            error_propagate(errp, local_err);
> > -            error_prepend(errp, "can't allocate LSIs: ");
> > -            return;
> > +        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> 
> Same remark. There's another SPAPR_MACHINE_GET_CLASS() user in this
> function.
> 
> > +            irq = spapr_irq_findone(spapr, &local_err);
> > +            if (local_err) {
> > +                error_propagate(errp, local_err);
> > +                error_prepend(errp, "can't allocate LSIs: ");
> > +                return;
> > +            }
> >          }
> >  
> >          spapr_irq_claim(spapr, irq, true, &local_err);
> > @@ -2123,6 +2137,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> >      _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
> >      _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
> >      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
> > +    /* TODO: fine tune the total count of allocatable MSIs per PHB */
> >      _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR));
> >  
> >      /* Dynamic DMA window */
> > diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
> > index be9af71437cc..840d4a3c451c 100644
> > --- a/hw/ppc/spapr_vio.c
> > +++ b/hw/ppc/spapr_vio.c
> > @@ -37,12 +37,13 @@
> >  
> >  #include "hw/ppc/spapr.h"
> >  #include "hw/ppc/spapr_vio.h"
> > -#include "hw/ppc/xics.h"
> >  #include "hw/ppc/fdt.h"
> >  #include "trace.h"
> >  
> >  #include <libfdt.h>
> >  
> > +#define SPAPR_VIO_REG_BASE 0x71000000
> > +
> >  static void spapr_vio_get_irq(Object *obj, Visitor *v, const char *name,
> >                                void *opaque, Error **errp)
> >  {
> > @@ -445,6 +446,55 @@ static void spapr_vio_busdev_reset(DeviceState *qdev)
> >      }
> >  }
> >  
> > +/*
> > + * The register property of a VIO device is defined in livirt using
> > + * 0x1000 as a base register number plus a 0x1000 increment. For the
> > + * VIO tty device, the base number is changed to 0x30000000. QEMU uses
> > + * a base register number of 0x71000000 and then a simple increment.
> > + *
> > + * The formula below tries to compute a unique index number from the
> > + * register value that will be used to define the IRQ number of the
> > + * VIO device.
> > + *
> > + * A maximum of 256 VIO devices is covered. Collisions are possible
> > + * but they will be detected when the IRQ is claimed.
> > + */
> > +static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg)
> > +{
> > +    uint32_t irq;
> > +
> > +    if (reg >= SPAPR_VIO_REG_BASE) {
> > +        /*
> > +         * VIO device register values when allocated by QEMU. For
> > +         * these, we simply mask the high bits to fit the overall
> > +         * range: [0x00 - 0xff].
> > +         *
> > +         * The nvram VIO device (reg=0x71000000) is a static device of
> > +         * the pseries machine and so is always allocated by QEMU. Its
> > +         * IRQ number is 0x0.
> > +         */
> > +        irq = reg & 0xff;
> > +
> > +    } else if (reg >= 0x30000000) {
> > +        /*
> > +         * VIO tty devices register values, when allocated by livirt,
> > +         * are mapped in range [0xf0 - 0xff], gives us a maximum of 16
> > +         * vtys.
> > +         */
> > +        irq = 0xf0 | ((reg >> 12) & 0xf);
> > +
> > +    } else {
> > +        /*
> > +         * Other VIO devices register values, when allocated by
> > +         * livirt, should be mapped in range [0x00 - 0xef]. Conflicts
> > +         * will be detected when IRQ is claimed.
> > +         */
> > +        irq = (reg >> 12) & 0xff;
> > +    }
> > +
> 
> Nice formula :)
> 
> The patch looks quite good to me, and my remarks about SPAPR_MACHINE_GET_CLASS()
> can be addressed in a followup, so:
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> > +    return SPAPR_IRQ_VIO | irq;
> > +}
> > +
> >  static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
> >  {
> >      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > @@ -485,10 +535,14 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
> >      }
> >  
> >      if (!dev->irq) {
> > -        dev->irq = spapr_irq_findone(spapr, &local_err);
> > -        if (local_err) {
> > -            error_propagate(errp, local_err);
> > -            return;
> > +        dev->irq = spapr_vio_reg_to_irq(dev->reg);
> > +
> > +        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> > +            dev->irq = spapr_irq_findone(spapr, &local_err);
> > +            if (local_err) {
> > +                error_propagate(errp, local_err);
> > +                return;
> > +            }
> >          }
> >      }
> >  
> > @@ -557,7 +611,7 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
> >      /* Create bus on bridge device */
> >      qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
> >      bus = SPAPR_VIO_BUS(qbus);
> > -    bus->next_reg = 0x71000000;
> > +    bus->next_reg = SPAPR_VIO_REG_BASE;
> >  
> >      /* hcall-vio */
> >      spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
> > diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> > index bcab6323b7ed..4ab556467289 100644
> > --- a/hw/ppc/Makefile.objs
> > +++ b/hw/ppc/Makefile.objs
> > @@ -4,7 +4,7 @@ obj-y += ppc.o ppc_booke.o fdt.o
> >  obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o
> >  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
> >  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
> > -obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o
> > +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o
> >  # IBM PowerNV
> >  obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o
> >  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>
Cédric Le Goater Aug. 1, 2018, 7:14 a.m. UTC | #3
[ ... ]

>>> +typedef struct sPAPRMachineState sPAPRMachineState;
>>> +
>>
>> Old compilers (GCC < 4.6) might complain about 'redefinition of typedef' if
>> some file, say hw/ppc/spapr.c, includes both this header and "hw/ppc/xics.h".
>> We had several build breaks detected by 'make docker-test-build@centos6'...
>> The correct way to address this would be to move the typedef to the
>> "qemu/typedefs.h" header.
>>
>> This being said, docker-test-build@centos6 vanished with commit e7b3af81597,
>> so I guess we don't support such old distros anymore, and we can live with
>> duplicate typedefs.

I have a rhel6 vm for such tests but QEMU now requires python3 and 
glib-2.40 and maybe more stuff. I am not sure one can compile QEMU 3.1
on rhel/centos 6 anymore :/


[ ... ]

>>>      /* Release previous MSIs */
>>>      if (msi) {
>>> +        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
>>> +            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
>>> +        }
>>
>> SPAPR_MACHINE_GET_CLASS() does all the recursive type checking, and you
>> call it three times. Even if this isn't a hot path, maybe cache this in
>> an smc variable at the beginning of the function as we do pretty much
>> everywhere else. Also this would give prettier code IMHO.
> 
> I agree with Greg that this would be a nice improvement, but it can
> wait until a followup.

The sPAPR code base is very stable so it's not too much work to respin.
FYI, most of the XIVE v4 patchset still applies without a change.

Tell me if you find any other issues and I will resend.

Thanks,

C.
Greg Kurz Aug. 1, 2018, 9:13 a.m. UTC | #4
On Wed, 1 Aug 2018 09:14:43 +0200
Cédric Le Goater <clg@kaod.org> wrote:

> [ ... ]
> 
> >>> +typedef struct sPAPRMachineState sPAPRMachineState;
> >>> +  
> >>
> >> Old compilers (GCC < 4.6) might complain about 'redefinition of typedef' if
> >> some file, say hw/ppc/spapr.c, includes both this header and "hw/ppc/xics.h".
> >> We had several build breaks detected by 'make docker-test-build@centos6'...
> >> The correct way to address this would be to move the typedef to the
> >> "qemu/typedefs.h" header.
> >>
> >> This being said, docker-test-build@centos6 vanished with commit e7b3af81597,
> >> so I guess we don't support such old distros anymore, and we can live with
> >> duplicate typedefs.  
> 
> I have a rhel6 vm for such tests but QEMU now requires python3 and 
> glib-2.40 and maybe more stuff. I am not sure one can compile QEMU 3.1
> on rhel/centos 6 anymore :/
> 

Minimal Python version is 2.7 actually, but rhel6 only has 2.6.6.

Anyway, with these new requirements, I'm pretty sure we can't build QEMU
anymore with these distros... even though rhel 6 EOL is Nov. 2020.

> 
> [ ... ]
> 
> >>>      /* Release previous MSIs */
> >>>      if (msi) {
> >>> +        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
> >>> +            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
> >>> +        }  
> >>
> >> SPAPR_MACHINE_GET_CLASS() does all the recursive type checking, and you
> >> call it three times. Even if this isn't a hot path, maybe cache this in
> >> an smc variable at the beginning of the function as we do pretty much
> >> everywhere else. Also this would give prettier code IMHO.  
> > 
> > I agree with Greg that this would be a nice improvement, but it can
> > wait until a followup.  
> 
> The sPAPR code base is very stable so it's not too much work to respin.
> FYI, most of the XIVE v4 patchset still applies without a change.
> 
> Tell me if you find any other issues and I will resend.
> 
> Thanks,
> 
> C.
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7e5de1a6fd42..73067f5ee8aa 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -8,6 +8,7 @@ 
 #include "hw/ppc/spapr_drc.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/ppc/spapr_ovec.h"
+#include "hw/ppc/spapr_irq.h"
 
 struct VIOsPAPRBus;
 struct sPAPRPHBState;
@@ -101,6 +102,8 @@  struct sPAPRMachineClass {
     bool dr_lmb_enabled;       /* enable dynamic-reconfig/hotplug of LMBs */
     bool use_ohci_by_default;  /* use USB-OHCI instead of XHCI */
     bool pre_2_10_has_unused_icps;
+    bool legacy_irq_allocation;
+
     void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index,
                           uint64_t *buid, hwaddr *pio, 
                           hwaddr *mmio32, hwaddr *mmio64,
@@ -167,6 +170,8 @@  struct sPAPRMachineState {
     char *kvm_type;
 
     const char *icp_type;
+    int32_t irq_map_nr;
+    unsigned long *irq_map;
 
     bool cmd_line_caps[SPAPR_CAP_NUM];
     sPAPRCapabilities def, eff, mig;
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
new file mode 100644
index 000000000000..6f7f50548809
--- /dev/null
+++ b/include/hw/ppc/spapr_irq.h
@@ -0,0 +1,32 @@ 
+/*
+ * QEMU PowerPC sPAPR IRQ backend definitions
+ *
+ * Copyright (c) 2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef HW_SPAPR_IRQ_H
+#define HW_SPAPR_IRQ_H
+
+/*
+ * IRQ range offsets per device type
+ */
+#define SPAPR_IRQ_EPOW       0x1000  /* XICS_IRQ_BASE offset */
+#define SPAPR_IRQ_HOTPLUG    0x1001
+#define SPAPR_IRQ_VIO        0x1100  /* 256 VIO devices */
+#define SPAPR_IRQ_PCI_LSI    0x1200  /* 32+ PHBs devices */
+
+#define SPAPR_IRQ_MSI        0x1300  /* Offset of the dynamic range covered
+                                      * by the bitmap allocator */
+
+typedef struct sPAPRMachineState sPAPRMachineState;
+
+void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis);
+int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align,
+                        Error **errp);
+void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num);
+void spapr_irq_msi_reset(sPAPRMachineState *spapr);
+
+#endif
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 3c72173c7e0f..792e24453d8b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -189,6 +189,11 @@  static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
     sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     Error *local_err = NULL;
 
+    /* Initialize the MSI IRQ allocator. */
+    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        spapr_irq_msi_init(spapr, XICS_IRQ_BASE + nr_irqs - SPAPR_IRQ_MSI);
+    }
+
     if (kvm_enabled()) {
         if (machine_kernel_irqchip_allowed(machine) &&
             !xics_kvm_init(spapr, &local_err)) {
@@ -1636,6 +1641,10 @@  static void spapr_machine_reset(void)
         ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal);
     }
 
+    if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        spapr_irq_msi_reset(spapr);
+    }
+
     qemu_devices_reset();
 
     /* DRC reset may cause a device to be unplugged. This will cause troubles
@@ -1910,6 +1919,24 @@  static const VMStateDescription vmstate_spapr_patb_entry = {
     },
 };
 
+static bool spapr_irq_map_needed(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+
+    return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr);
+}
+
+static const VMStateDescription vmstate_spapr_irq_map = {
+    .name = "spapr_irq_map",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_irq_map_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_spapr = {
     .name = "spapr",
     .version_id = 3,
@@ -1937,6 +1964,7 @@  static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_cfpc,
         &vmstate_spapr_cap_sbbc,
         &vmstate_spapr_cap_ibs,
+        &vmstate_spapr_irq_map,
         NULL
     }
 };
@@ -4085,8 +4113,12 @@  static void spapr_machine_3_0_instance_options(MachineState *machine)
 
 static void spapr_machine_3_0_class_options(MachineClass *mc)
 {
+    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
     spapr_machine_3_1_class_options(mc);
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_0);
+
+    smc->legacy_irq_allocation = true;
 }
 
 DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e4f5946a2188..32719a1b72d0 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -707,9 +707,11 @@  void spapr_clear_pending_events(sPAPRMachineState *spapr)
 
 void spapr_events_init(sPAPRMachineState *spapr)
 {
-    int epow_irq;
+    int epow_irq = SPAPR_IRQ_EPOW;
 
-    epow_irq = spapr_irq_findone(spapr, &error_fatal);
+    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        epow_irq = spapr_irq_findone(spapr, &error_fatal);
+    }
 
     spapr_irq_claim(spapr, epow_irq, false, &error_fatal);
 
@@ -729,9 +731,11 @@  void spapr_events_init(sPAPRMachineState *spapr)
      * checking that it's enabled.
      */
     if (spapr->use_hotplug_event_source) {
-        int hp_irq;
+        int hp_irq = SPAPR_IRQ_HOTPLUG;
 
-        hp_irq = spapr_irq_findone(spapr, &error_fatal);
+        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            hp_irq = spapr_irq_findone(spapr, &error_fatal);
+        }
 
         spapr_irq_claim(spapr, hp_irq, false, &error_fatal);
 
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
new file mode 100644
index 000000000000..24e9c1d4433c
--- /dev/null
+++ b/hw/ppc/spapr_irq.c
@@ -0,0 +1,56 @@ 
+/*
+ * QEMU PowerPC sPAPR IRQ interface
+ *
+ * Copyright (c) 2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/xics.h"
+
+void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis)
+{
+    spapr->irq_map_nr = nr_msis;
+    spapr->irq_map = bitmap_new(spapr->irq_map_nr);
+}
+
+int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align,
+                        Error **errp)
+{
+    int irq;
+
+    /*
+     * The 'align_mask' parameter of bitmap_find_next_zero_area()
+     * should be one less than a power of 2; 0 means no
+     * alignment. Adapt the 'align' value of the former allocator
+     * to fit the requirements of bitmap_find_next_zero_area()
+     */
+    align -= 1;
+
+    irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
+                                     align);
+    if (irq == spapr->irq_map_nr) {
+        error_setg(errp, "can't find a free %d-IRQ block", num);
+        return -1;
+    }
+
+    bitmap_set(spapr->irq_map, irq, num);
+
+    return irq + SPAPR_IRQ_MSI;
+}
+
+void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num)
+{
+    bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
+}
+
+void spapr_irq_msi_reset(sPAPRMachineState *spapr)
+{
+    bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr);
+}
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 497b896c7d24..3791ced6c536 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -334,6 +334,9 @@  static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             return;
         }
 
+        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
+        }
         spapr_irq_free(spapr, msi->first_irq, msi->num);
         if (msi_present(pdev)) {
             spapr_msi_setmsg(pdev, 0, false, 0, 0);
@@ -372,7 +375,13 @@  static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, &err);
+    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
+                             &err);
+    } else {
+        irq = spapr_irq_msi_alloc(spapr, req_num,
+                                  ret_intr_type == RTAS_TYPE_MSI, &err);
+    }
     if (err) {
         error_reportf_err(err, "Can't allocate MSIs for device %x: ",
                           config_addr);
@@ -392,6 +401,9 @@  static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     /* Release previous MSIs */
     if (msi) {
+        if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
+        }
         spapr_irq_free(spapr, msi->first_irq, msi->num);
         g_hash_table_remove(phb->msi, &config_addr);
     }
@@ -1705,14 +1717,16 @@  static void spapr_phb_realize(DeviceState *dev, Error **errp)
 
     /* Initialize the LSI table */
     for (i = 0; i < PCI_NUM_PINS; i++) {
-        uint32_t irq;
+        uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
         Error *local_err = NULL;
 
-        irq = spapr_irq_findone(spapr, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            error_prepend(errp, "can't allocate LSIs: ");
-            return;
+        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            irq = spapr_irq_findone(spapr, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                error_prepend(errp, "can't allocate LSIs: ");
+                return;
+            }
         }
 
         spapr_irq_claim(spapr, irq, true, &local_err);
@@ -2123,6 +2137,7 @@  int spapr_populate_pci_dt(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
+    /* TODO: fine tune the total count of allocatable MSIs per PHB */
     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR));
 
     /* Dynamic DMA window */
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index be9af71437cc..840d4a3c451c 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -37,12 +37,13 @@ 
 
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
-#include "hw/ppc/xics.h"
 #include "hw/ppc/fdt.h"
 #include "trace.h"
 
 #include <libfdt.h>
 
+#define SPAPR_VIO_REG_BASE 0x71000000
+
 static void spapr_vio_get_irq(Object *obj, Visitor *v, const char *name,
                               void *opaque, Error **errp)
 {
@@ -445,6 +446,55 @@  static void spapr_vio_busdev_reset(DeviceState *qdev)
     }
 }
 
+/*
+ * The register property of a VIO device is defined in livirt using
+ * 0x1000 as a base register number plus a 0x1000 increment. For the
+ * VIO tty device, the base number is changed to 0x30000000. QEMU uses
+ * a base register number of 0x71000000 and then a simple increment.
+ *
+ * The formula below tries to compute a unique index number from the
+ * register value that will be used to define the IRQ number of the
+ * VIO device.
+ *
+ * A maximum of 256 VIO devices is covered. Collisions are possible
+ * but they will be detected when the IRQ is claimed.
+ */
+static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg)
+{
+    uint32_t irq;
+
+    if (reg >= SPAPR_VIO_REG_BASE) {
+        /*
+         * VIO device register values when allocated by QEMU. For
+         * these, we simply mask the high bits to fit the overall
+         * range: [0x00 - 0xff].
+         *
+         * The nvram VIO device (reg=0x71000000) is a static device of
+         * the pseries machine and so is always allocated by QEMU. Its
+         * IRQ number is 0x0.
+         */
+        irq = reg & 0xff;
+
+    } else if (reg >= 0x30000000) {
+        /*
+         * VIO tty devices register values, when allocated by livirt,
+         * are mapped in range [0xf0 - 0xff], gives us a maximum of 16
+         * vtys.
+         */
+        irq = 0xf0 | ((reg >> 12) & 0xf);
+
+    } else {
+        /*
+         * Other VIO devices register values, when allocated by
+         * livirt, should be mapped in range [0x00 - 0xef]. Conflicts
+         * will be detected when IRQ is claimed.
+         */
+        irq = (reg >> 12) & 0xff;
+    }
+
+    return SPAPR_IRQ_VIO | irq;
+}
+
 static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
@@ -485,10 +535,14 @@  static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
     }
 
     if (!dev->irq) {
-        dev->irq = spapr_irq_findone(spapr, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
+        dev->irq = spapr_vio_reg_to_irq(dev->reg);
+
+        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            dev->irq = spapr_irq_findone(spapr, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
         }
     }
 
@@ -557,7 +611,7 @@  VIOsPAPRBus *spapr_vio_bus_init(void)
     /* Create bus on bridge device */
     qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
     bus = SPAPR_VIO_BUS(qbus);
-    bus->next_reg = 0x71000000;
+    bus->next_reg = SPAPR_VIO_REG_BASE;
 
     /* hcall-vio */
     spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index bcab6323b7ed..4ab556467289 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,7 +4,7 @@  obj-y += ppc.o ppc_booke.o fdt.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
-obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o
+obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o
 # IBM PowerNV
 obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)