Message ID | 20180730141134.31153-3-clg@kaod.org |
---|---|
State | New |
Headers | show |
Series | spapr: introduce a fixed IRQ number space and an IRQ controller backend | expand |
On Mon, 30 Jul 2018 16:11:32 +0200 Cédric Le Goater <clg@kaod.org> wrote: > This proposal introduces a new IRQ number space layout using static > numbers for all devices, depending on a device index, and a bitmap > allocator for the MSI IRQ numbers which are negotiated by the guest at > runtime. > > As the VIO device model does not have a device index but a "reg" > property, we introduce a formula to compute an IRQ number from a "reg" > value. It should minimize most of the collisions. > > The previous layout is kept in pre-3.1 machines raising the > 'legacy_irq_allocation' machine class flag. > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > include/hw/ppc/spapr.h | 5 +++ > include/hw/ppc/spapr_irq.h | 32 ++++++++++++++++++ > hw/ppc/spapr.c | 32 ++++++++++++++++++ > hw/ppc/spapr_events.c | 12 ++++--- > hw/ppc/spapr_irq.c | 56 ++++++++++++++++++++++++++++++++ > hw/ppc/spapr_pci.c | 29 +++++++++++++---- > hw/ppc/spapr_vio.c | 66 ++++++++++++++++++++++++++++++++++---- > hw/ppc/Makefile.objs | 2 +- > 8 files changed, 216 insertions(+), 18 deletions(-) > create mode 100644 include/hw/ppc/spapr_irq.h > create mode 100644 hw/ppc/spapr_irq.c > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 7e5de1a6fd42..73067f5ee8aa 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -8,6 +8,7 @@ > #include "hw/ppc/spapr_drc.h" > #include "hw/mem/pc-dimm.h" > #include "hw/ppc/spapr_ovec.h" > +#include "hw/ppc/spapr_irq.h" > > struct VIOsPAPRBus; > struct sPAPRPHBState; > @@ -101,6 +102,8 @@ struct sPAPRMachineClass { > bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ > bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ > bool pre_2_10_has_unused_icps; > + bool legacy_irq_allocation; > + > void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, > uint64_t *buid, hwaddr *pio, > hwaddr *mmio32, hwaddr *mmio64, > @@ -167,6 +170,8 @@ struct sPAPRMachineState { > char *kvm_type; > > const char *icp_type; > + int32_t irq_map_nr; > + unsigned long *irq_map; > > bool cmd_line_caps[SPAPR_CAP_NUM]; > sPAPRCapabilities def, eff, mig; > diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h > new file mode 100644 > index 000000000000..6f7f50548809 > --- /dev/null > +++ b/include/hw/ppc/spapr_irq.h > @@ -0,0 +1,32 @@ > +/* > + * QEMU PowerPC sPAPR IRQ backend definitions > + * > + * Copyright (c) 2018, IBM Corporation. > + * > + * This code is licensed under the GPL version 2 or later. See the > + * COPYING file in the top-level directory. > + */ > + > +#ifndef HW_SPAPR_IRQ_H > +#define HW_SPAPR_IRQ_H > + > +/* > + * IRQ range offsets per device type > + */ > +#define SPAPR_IRQ_EPOW 0x1000 /* XICS_IRQ_BASE offset */ > +#define SPAPR_IRQ_HOTPLUG 0x1001 > +#define SPAPR_IRQ_VIO 0x1100 /* 256 VIO devices */ > +#define SPAPR_IRQ_PCI_LSI 0x1200 /* 32+ PHBs devices */ > + > +#define SPAPR_IRQ_MSI 0x1300 /* Offset of the dynamic range covered > + * by the bitmap allocator */ > + > +typedef struct sPAPRMachineState sPAPRMachineState; > + Old compilers (GCC < 4.6) might complain about 'redefinition of typedef' if some file, say hw/ppc/spapr.c, includes both this header and "hw/ppc/xics.h". We had several build breaks detected by 'make docker-test-build@centos6'... The correct way to address this would be to move the typedef to the "qemu/typedefs.h" header. This being said, docker-test-build@centos6 vanished with commit e7b3af81597, so I guess we don't support such old distros anymore, and we can live with duplicate typedefs. > +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis); > +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align, > + Error **errp); > +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num); > +void spapr_irq_msi_reset(sPAPRMachineState *spapr); > + > +#endif > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 3c72173c7e0f..792e24453d8b 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -189,6 +189,11 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) > sPAPRMachineState *spapr = SPAPR_MACHINE(machine); > Error *local_err = NULL; > > + /* Initialize the MSI IRQ allocator. */ > + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > + spapr_irq_msi_init(spapr, XICS_IRQ_BASE + nr_irqs - SPAPR_IRQ_MSI); > + } > + > if (kvm_enabled()) { > if (machine_kernel_irqchip_allowed(machine) && > !xics_kvm_init(spapr, &local_err)) { > @@ -1636,6 +1641,10 @@ static void spapr_machine_reset(void) > ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal); > } > > + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > + spapr_irq_msi_reset(spapr); > + } > + > qemu_devices_reset(); > > /* DRC reset may cause a device to be unplugged. This will cause troubles > @@ -1910,6 +1919,24 @@ static const VMStateDescription vmstate_spapr_patb_entry = { > }, > }; > > +static bool spapr_irq_map_needed(void *opaque) > +{ > + sPAPRMachineState *spapr = opaque; > + > + return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr); > +} > + > +static const VMStateDescription vmstate_spapr_irq_map = { > + .name = "spapr_irq_map", > + .version_id = 1, > + .minimum_version_id = 1, > + .needed = spapr_irq_map_needed, > + .fields = (VMStateField[]) { > + VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr), > + VMSTATE_END_OF_LIST() > + }, > +}; > + > static const VMStateDescription vmstate_spapr = { > .name = "spapr", > .version_id = 3, > @@ -1937,6 +1964,7 @@ static const VMStateDescription vmstate_spapr = { > &vmstate_spapr_cap_cfpc, > &vmstate_spapr_cap_sbbc, > &vmstate_spapr_cap_ibs, > + &vmstate_spapr_irq_map, > NULL > } > }; > @@ -4085,8 +4113,12 @@ static void spapr_machine_3_0_instance_options(MachineState *machine) > > static void spapr_machine_3_0_class_options(MachineClass *mc) > { > + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); > + > spapr_machine_3_1_class_options(mc); > SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_0); > + > + smc->legacy_irq_allocation = true; > } > > DEFINE_SPAPR_MACHINE(3_0, "3.0", false); > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index e4f5946a2188..32719a1b72d0 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -707,9 +707,11 @@ void spapr_clear_pending_events(sPAPRMachineState *spapr) > > void spapr_events_init(sPAPRMachineState *spapr) > { > - int epow_irq; > + int epow_irq = SPAPR_IRQ_EPOW; > > - epow_irq = spapr_irq_findone(spapr, &error_fatal); > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > + epow_irq = spapr_irq_findone(spapr, &error_fatal); > + } > > spapr_irq_claim(spapr, epow_irq, false, &error_fatal); > > @@ -729,9 +731,11 @@ void spapr_events_init(sPAPRMachineState *spapr) > * checking that it's enabled. > */ > if (spapr->use_hotplug_event_source) { > - int hp_irq; > + int hp_irq = SPAPR_IRQ_HOTPLUG; > > - hp_irq = spapr_irq_findone(spapr, &error_fatal); > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > + hp_irq = spapr_irq_findone(spapr, &error_fatal); > + } > > spapr_irq_claim(spapr, hp_irq, false, &error_fatal); > > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c > new file mode 100644 > index 000000000000..24e9c1d4433c > --- /dev/null > +++ b/hw/ppc/spapr_irq.c > @@ -0,0 +1,56 @@ > +/* > + * QEMU PowerPC sPAPR IRQ interface > + * > + * Copyright (c) 2018, IBM Corporation. > + * > + * This code is licensed under the GPL version 2 or later. See the > + * COPYING file in the top-level directory. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/log.h" > +#include "qemu/error-report.h" > +#include "qapi/error.h" > +#include "hw/ppc/spapr.h" > +#include "hw/ppc/xics.h" > + > +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis) > +{ > + spapr->irq_map_nr = nr_msis; > + spapr->irq_map = bitmap_new(spapr->irq_map_nr); > +} > + > +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align, > + Error **errp) > +{ > + int irq; > + > + /* > + * The 'align_mask' parameter of bitmap_find_next_zero_area() > + * should be one less than a power of 2; 0 means no > + * alignment. Adapt the 'align' value of the former allocator > + * to fit the requirements of bitmap_find_next_zero_area() > + */ > + align -= 1; > + > + irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, > + align); > + if (irq == spapr->irq_map_nr) { > + error_setg(errp, "can't find a free %d-IRQ block", num); > + return -1; > + } > + > + bitmap_set(spapr->irq_map, irq, num); > + > + return irq + SPAPR_IRQ_MSI; > +} > + > +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num) > +{ > + bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); > +} > + > +void spapr_irq_msi_reset(sPAPRMachineState *spapr) > +{ > + bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr); > +} > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index 497b896c7d24..3791ced6c536 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -334,6 +334,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, > return; > } > > + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); > + } > spapr_irq_free(spapr, msi->first_irq, msi->num); > if (msi_present(pdev)) { > spapr_msi_setmsg(pdev, 0, false, 0, 0); > @@ -372,7 +375,13 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, > } > > /* Allocate MSIs */ > - irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, &err); > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > + irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, > + &err); > + } else { > + irq = spapr_irq_msi_alloc(spapr, req_num, > + ret_intr_type == RTAS_TYPE_MSI, &err); > + } > if (err) { > error_reportf_err(err, "Can't allocate MSIs for device %x: ", > config_addr); > @@ -392,6 +401,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > /* Release previous MSIs */ > if (msi) { > + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); > + } SPAPR_MACHINE_GET_CLASS() does all the recursive type checking, and you call it three times. Even if this isn't a hot path, maybe cache this in an smc variable at the beginning of the function as we do pretty much everywhere else. Also this would give prettier code IMHO. > spapr_irq_free(spapr, msi->first_irq, msi->num); > g_hash_table_remove(phb->msi, &config_addr); > } > @@ -1705,14 +1717,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) > > /* Initialize the LSI table */ > for (i = 0; i < PCI_NUM_PINS; i++) { > - uint32_t irq; > + uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; > Error *local_err = NULL; > > - irq = spapr_irq_findone(spapr, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > - error_prepend(errp, "can't allocate LSIs: "); > - return; > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { Same remark. There's another SPAPR_MACHINE_GET_CLASS() user in this function. > + irq = spapr_irq_findone(spapr, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + error_prepend(errp, "can't allocate LSIs: "); > + return; > + } > } > > spapr_irq_claim(spapr, irq, true, &local_err); > @@ -2123,6 +2137,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, > _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges)); > _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); > _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); > + /* TODO: fine tune the total count of allocatable MSIs per PHB */ > _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR)); > > /* Dynamic DMA window */ > diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c > index be9af71437cc..840d4a3c451c 100644 > --- a/hw/ppc/spapr_vio.c > +++ b/hw/ppc/spapr_vio.c > @@ -37,12 +37,13 @@ > > #include "hw/ppc/spapr.h" > #include "hw/ppc/spapr_vio.h" > -#include "hw/ppc/xics.h" > #include "hw/ppc/fdt.h" > #include "trace.h" > > #include <libfdt.h> > > +#define SPAPR_VIO_REG_BASE 0x71000000 > + > static void spapr_vio_get_irq(Object *obj, Visitor *v, const char *name, > void *opaque, Error **errp) > { > @@ -445,6 +446,55 @@ static void spapr_vio_busdev_reset(DeviceState *qdev) > } > } > > +/* > + * The register property of a VIO device is defined in livirt using > + * 0x1000 as a base register number plus a 0x1000 increment. For the > + * VIO tty device, the base number is changed to 0x30000000. QEMU uses > + * a base register number of 0x71000000 and then a simple increment. > + * > + * The formula below tries to compute a unique index number from the > + * register value that will be used to define the IRQ number of the > + * VIO device. > + * > + * A maximum of 256 VIO devices is covered. Collisions are possible > + * but they will be detected when the IRQ is claimed. > + */ > +static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg) > +{ > + uint32_t irq; > + > + if (reg >= SPAPR_VIO_REG_BASE) { > + /* > + * VIO device register values when allocated by QEMU. For > + * these, we simply mask the high bits to fit the overall > + * range: [0x00 - 0xff]. > + * > + * The nvram VIO device (reg=0x71000000) is a static device of > + * the pseries machine and so is always allocated by QEMU. Its > + * IRQ number is 0x0. > + */ > + irq = reg & 0xff; > + > + } else if (reg >= 0x30000000) { > + /* > + * VIO tty devices register values, when allocated by livirt, > + * are mapped in range [0xf0 - 0xff], gives us a maximum of 16 > + * vtys. > + */ > + irq = 0xf0 | ((reg >> 12) & 0xf); > + > + } else { > + /* > + * Other VIO devices register values, when allocated by > + * livirt, should be mapped in range [0x00 - 0xef]. Conflicts > + * will be detected when IRQ is claimed. > + */ > + irq = (reg >> 12) & 0xff; > + } > + Nice formula :) The patch looks quite good to me, and my remarks about SPAPR_MACHINE_GET_CLASS() can be addressed in a followup, so: Reviewed-by: Greg Kurz <groug@kaod.org> > + return SPAPR_IRQ_VIO | irq; > +} > + > static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) > { > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > @@ -485,10 +535,14 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) > } > > if (!dev->irq) { > - dev->irq = spapr_irq_findone(spapr, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > - return; > + dev->irq = spapr_vio_reg_to_irq(dev->reg); > + > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > + dev->irq = spapr_irq_findone(spapr, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > } > } > > @@ -557,7 +611,7 @@ VIOsPAPRBus *spapr_vio_bus_init(void) > /* Create bus on bridge device */ > qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio"); > bus = SPAPR_VIO_BUS(qbus); > - bus->next_reg = 0x71000000; > + bus->next_reg = SPAPR_VIO_REG_BASE; > > /* hcall-vio */ > spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal); > diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs > index bcab6323b7ed..4ab556467289 100644 > --- a/hw/ppc/Makefile.objs > +++ b/hw/ppc/Makefile.objs > @@ -4,7 +4,7 @@ obj-y += ppc.o ppc_booke.o fdt.o > obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o > obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o > obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o > -obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o > +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o > # IBM PowerNV > obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o > ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
On Tue, Jul 31, 2018 at 07:39:45PM +0200, Greg Kurz wrote: > On Mon, 30 Jul 2018 16:11:32 +0200 > Cédric Le Goater <clg@kaod.org> wrote: > > > This proposal introduces a new IRQ number space layout using static > > numbers for all devices, depending on a device index, and a bitmap > > allocator for the MSI IRQ numbers which are negotiated by the guest at > > runtime. > > > > As the VIO device model does not have a device index but a "reg" > > property, we introduce a formula to compute an IRQ number from a "reg" > > value. It should minimize most of the collisions. > > > > The previous layout is kept in pre-3.1 machines raising the > > 'legacy_irq_allocation' machine class flag. > > > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > > --- > > include/hw/ppc/spapr.h | 5 +++ > > include/hw/ppc/spapr_irq.h | 32 ++++++++++++++++++ > > hw/ppc/spapr.c | 32 ++++++++++++++++++ > > hw/ppc/spapr_events.c | 12 ++++--- > > hw/ppc/spapr_irq.c | 56 ++++++++++++++++++++++++++++++++ > > hw/ppc/spapr_pci.c | 29 +++++++++++++---- > > hw/ppc/spapr_vio.c | 66 ++++++++++++++++++++++++++++++++++---- > > hw/ppc/Makefile.objs | 2 +- > > 8 files changed, 216 insertions(+), 18 deletions(-) > > create mode 100644 include/hw/ppc/spapr_irq.h > > create mode 100644 hw/ppc/spapr_irq.c > > > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > > index 7e5de1a6fd42..73067f5ee8aa 100644 > > --- a/include/hw/ppc/spapr.h > > +++ b/include/hw/ppc/spapr.h > > @@ -8,6 +8,7 @@ > > #include "hw/ppc/spapr_drc.h" > > #include "hw/mem/pc-dimm.h" > > #include "hw/ppc/spapr_ovec.h" > > +#include "hw/ppc/spapr_irq.h" > > > > struct VIOsPAPRBus; > > struct sPAPRPHBState; > > @@ -101,6 +102,8 @@ struct sPAPRMachineClass { > > bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ > > bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ > > bool pre_2_10_has_unused_icps; > > + bool legacy_irq_allocation; > > + > > void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, > > uint64_t *buid, hwaddr *pio, > > hwaddr *mmio32, hwaddr *mmio64, > > @@ -167,6 +170,8 @@ struct sPAPRMachineState { > > char *kvm_type; > > > > const char *icp_type; > > + int32_t irq_map_nr; > > + unsigned long *irq_map; > > > > bool cmd_line_caps[SPAPR_CAP_NUM]; > > sPAPRCapabilities def, eff, mig; > > diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h > > new file mode 100644 > > index 000000000000..6f7f50548809 > > --- /dev/null > > +++ b/include/hw/ppc/spapr_irq.h > > @@ -0,0 +1,32 @@ > > +/* > > + * QEMU PowerPC sPAPR IRQ backend definitions > > + * > > + * Copyright (c) 2018, IBM Corporation. > > + * > > + * This code is licensed under the GPL version 2 or later. See the > > + * COPYING file in the top-level directory. > > + */ > > + > > +#ifndef HW_SPAPR_IRQ_H > > +#define HW_SPAPR_IRQ_H > > + > > +/* > > + * IRQ range offsets per device type > > + */ > > +#define SPAPR_IRQ_EPOW 0x1000 /* XICS_IRQ_BASE offset */ > > +#define SPAPR_IRQ_HOTPLUG 0x1001 > > +#define SPAPR_IRQ_VIO 0x1100 /* 256 VIO devices */ > > +#define SPAPR_IRQ_PCI_LSI 0x1200 /* 32+ PHBs devices */ > > + > > +#define SPAPR_IRQ_MSI 0x1300 /* Offset of the dynamic range covered > > + * by the bitmap allocator */ > > + > > +typedef struct sPAPRMachineState sPAPRMachineState; > > + > > Old compilers (GCC < 4.6) might complain about 'redefinition of typedef' if > some file, say hw/ppc/spapr.c, includes both this header and "hw/ppc/xics.h". > We had several build breaks detected by 'make docker-test-build@centos6'... > The correct way to address this would be to move the typedef to the > "qemu/typedefs.h" header. > > This being said, docker-test-build@centos6 vanished with commit e7b3af81597, > so I guess we don't support such old distros anymore, and we can live with > duplicate typedefs. > > > +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis); > > +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align, > > + Error **errp); > > +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num); > > +void spapr_irq_msi_reset(sPAPRMachineState *spapr); > > + > > +#endif > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index 3c72173c7e0f..792e24453d8b 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -189,6 +189,11 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) > > sPAPRMachineState *spapr = SPAPR_MACHINE(machine); > > Error *local_err = NULL; > > > > + /* Initialize the MSI IRQ allocator. */ > > + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > + spapr_irq_msi_init(spapr, XICS_IRQ_BASE + nr_irqs - SPAPR_IRQ_MSI); > > + } > > + > > if (kvm_enabled()) { > > if (machine_kernel_irqchip_allowed(machine) && > > !xics_kvm_init(spapr, &local_err)) { > > @@ -1636,6 +1641,10 @@ static void spapr_machine_reset(void) > > ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal); > > } > > > > + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > + spapr_irq_msi_reset(spapr); > > + } > > + > > qemu_devices_reset(); > > > > /* DRC reset may cause a device to be unplugged. This will cause troubles > > @@ -1910,6 +1919,24 @@ static const VMStateDescription vmstate_spapr_patb_entry = { > > }, > > }; > > > > +static bool spapr_irq_map_needed(void *opaque) > > +{ > > + sPAPRMachineState *spapr = opaque; > > + > > + return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr); > > +} > > + > > +static const VMStateDescription vmstate_spapr_irq_map = { > > + .name = "spapr_irq_map", > > + .version_id = 1, > > + .minimum_version_id = 1, > > + .needed = spapr_irq_map_needed, > > + .fields = (VMStateField[]) { > > + VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr), > > + VMSTATE_END_OF_LIST() > > + }, > > +}; > > + > > static const VMStateDescription vmstate_spapr = { > > .name = "spapr", > > .version_id = 3, > > @@ -1937,6 +1964,7 @@ static const VMStateDescription vmstate_spapr = { > > &vmstate_spapr_cap_cfpc, > > &vmstate_spapr_cap_sbbc, > > &vmstate_spapr_cap_ibs, > > + &vmstate_spapr_irq_map, > > NULL > > } > > }; > > @@ -4085,8 +4113,12 @@ static void spapr_machine_3_0_instance_options(MachineState *machine) > > > > static void spapr_machine_3_0_class_options(MachineClass *mc) > > { > > + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); > > + > > spapr_machine_3_1_class_options(mc); > > SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_0); > > + > > + smc->legacy_irq_allocation = true; > > } > > > > DEFINE_SPAPR_MACHINE(3_0, "3.0", false); > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > > index e4f5946a2188..32719a1b72d0 100644 > > --- a/hw/ppc/spapr_events.c > > +++ b/hw/ppc/spapr_events.c > > @@ -707,9 +707,11 @@ void spapr_clear_pending_events(sPAPRMachineState *spapr) > > > > void spapr_events_init(sPAPRMachineState *spapr) > > { > > - int epow_irq; > > + int epow_irq = SPAPR_IRQ_EPOW; > > > > - epow_irq = spapr_irq_findone(spapr, &error_fatal); > > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > + epow_irq = spapr_irq_findone(spapr, &error_fatal); > > + } > > > > spapr_irq_claim(spapr, epow_irq, false, &error_fatal); > > > > @@ -729,9 +731,11 @@ void spapr_events_init(sPAPRMachineState *spapr) > > * checking that it's enabled. > > */ > > if (spapr->use_hotplug_event_source) { > > - int hp_irq; > > + int hp_irq = SPAPR_IRQ_HOTPLUG; > > > > - hp_irq = spapr_irq_findone(spapr, &error_fatal); > > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > + hp_irq = spapr_irq_findone(spapr, &error_fatal); > > + } > > > > spapr_irq_claim(spapr, hp_irq, false, &error_fatal); > > > > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c > > new file mode 100644 > > index 000000000000..24e9c1d4433c > > --- /dev/null > > +++ b/hw/ppc/spapr_irq.c > > @@ -0,0 +1,56 @@ > > +/* > > + * QEMU PowerPC sPAPR IRQ interface > > + * > > + * Copyright (c) 2018, IBM Corporation. > > + * > > + * This code is licensed under the GPL version 2 or later. See the > > + * COPYING file in the top-level directory. > > + */ > > + > > +#include "qemu/osdep.h" > > +#include "qemu/log.h" > > +#include "qemu/error-report.h" > > +#include "qapi/error.h" > > +#include "hw/ppc/spapr.h" > > +#include "hw/ppc/xics.h" > > + > > +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis) > > +{ > > + spapr->irq_map_nr = nr_msis; > > + spapr->irq_map = bitmap_new(spapr->irq_map_nr); > > +} > > + > > +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align, > > + Error **errp) > > +{ > > + int irq; > > + > > + /* > > + * The 'align_mask' parameter of bitmap_find_next_zero_area() > > + * should be one less than a power of 2; 0 means no > > + * alignment. Adapt the 'align' value of the former allocator > > + * to fit the requirements of bitmap_find_next_zero_area() > > + */ > > + align -= 1; > > + > > + irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, > > + align); > > + if (irq == spapr->irq_map_nr) { > > + error_setg(errp, "can't find a free %d-IRQ block", num); > > + return -1; > > + } > > + > > + bitmap_set(spapr->irq_map, irq, num); > > + > > + return irq + SPAPR_IRQ_MSI; > > +} > > + > > +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num) > > +{ > > + bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); > > +} > > + > > +void spapr_irq_msi_reset(sPAPRMachineState *spapr) > > +{ > > + bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr); > > +} > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > > index 497b896c7d24..3791ced6c536 100644 > > --- a/hw/ppc/spapr_pci.c > > +++ b/hw/ppc/spapr_pci.c > > @@ -334,6 +334,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > return; > > } > > > > + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); > > + } > > spapr_irq_free(spapr, msi->first_irq, msi->num); > > if (msi_present(pdev)) { > > spapr_msi_setmsg(pdev, 0, false, 0, 0); > > @@ -372,7 +375,13 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > } > > > > /* Allocate MSIs */ > > - irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, &err); > > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > + irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, > > + &err); > > + } else { > > + irq = spapr_irq_msi_alloc(spapr, req_num, > > + ret_intr_type == RTAS_TYPE_MSI, &err); > > + } > > if (err) { > > error_reportf_err(err, "Can't allocate MSIs for device %x: ", > > config_addr); > > @@ -392,6 +401,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > > > /* Release previous MSIs */ > > if (msi) { > > + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); > > + } > > SPAPR_MACHINE_GET_CLASS() does all the recursive type checking, and you > call it three times. Even if this isn't a hot path, maybe cache this in > an smc variable at the beginning of the function as we do pretty much > everywhere else. Also this would give prettier code IMHO. I agree with Greg that this would be a nice improvement, but it can wait until a followup. > > spapr_irq_free(spapr, msi->first_irq, msi->num); > > g_hash_table_remove(phb->msi, &config_addr); > > } > > @@ -1705,14 +1717,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) > > > > /* Initialize the LSI table */ > > for (i = 0; i < PCI_NUM_PINS; i++) { > > - uint32_t irq; > > + uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; > > Error *local_err = NULL; > > > > - irq = spapr_irq_findone(spapr, &local_err); > > - if (local_err) { > > - error_propagate(errp, local_err); > > - error_prepend(errp, "can't allocate LSIs: "); > > - return; > > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > Same remark. There's another SPAPR_MACHINE_GET_CLASS() user in this > function. > > > + irq = spapr_irq_findone(spapr, &local_err); > > + if (local_err) { > > + error_propagate(errp, local_err); > > + error_prepend(errp, "can't allocate LSIs: "); > > + return; > > + } > > } > > > > spapr_irq_claim(spapr, irq, true, &local_err); > > @@ -2123,6 +2137,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, > > _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges)); > > _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); > > _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); > > + /* TODO: fine tune the total count of allocatable MSIs per PHB */ > > _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR)); > > > > /* Dynamic DMA window */ > > diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c > > index be9af71437cc..840d4a3c451c 100644 > > --- a/hw/ppc/spapr_vio.c > > +++ b/hw/ppc/spapr_vio.c > > @@ -37,12 +37,13 @@ > > > > #include "hw/ppc/spapr.h" > > #include "hw/ppc/spapr_vio.h" > > -#include "hw/ppc/xics.h" > > #include "hw/ppc/fdt.h" > > #include "trace.h" > > > > #include <libfdt.h> > > > > +#define SPAPR_VIO_REG_BASE 0x71000000 > > + > > static void spapr_vio_get_irq(Object *obj, Visitor *v, const char *name, > > void *opaque, Error **errp) > > { > > @@ -445,6 +446,55 @@ static void spapr_vio_busdev_reset(DeviceState *qdev) > > } > > } > > > > +/* > > + * The register property of a VIO device is defined in livirt using > > + * 0x1000 as a base register number plus a 0x1000 increment. For the > > + * VIO tty device, the base number is changed to 0x30000000. QEMU uses > > + * a base register number of 0x71000000 and then a simple increment. > > + * > > + * The formula below tries to compute a unique index number from the > > + * register value that will be used to define the IRQ number of the > > + * VIO device. > > + * > > + * A maximum of 256 VIO devices is covered. Collisions are possible > > + * but they will be detected when the IRQ is claimed. > > + */ > > +static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg) > > +{ > > + uint32_t irq; > > + > > + if (reg >= SPAPR_VIO_REG_BASE) { > > + /* > > + * VIO device register values when allocated by QEMU. For > > + * these, we simply mask the high bits to fit the overall > > + * range: [0x00 - 0xff]. > > + * > > + * The nvram VIO device (reg=0x71000000) is a static device of > > + * the pseries machine and so is always allocated by QEMU. Its > > + * IRQ number is 0x0. > > + */ > > + irq = reg & 0xff; > > + > > + } else if (reg >= 0x30000000) { > > + /* > > + * VIO tty devices register values, when allocated by livirt, > > + * are mapped in range [0xf0 - 0xff], gives us a maximum of 16 > > + * vtys. > > + */ > > + irq = 0xf0 | ((reg >> 12) & 0xf); > > + > > + } else { > > + /* > > + * Other VIO devices register values, when allocated by > > + * livirt, should be mapped in range [0x00 - 0xef]. Conflicts > > + * will be detected when IRQ is claimed. > > + */ > > + irq = (reg >> 12) & 0xff; > > + } > > + > > Nice formula :) > > The patch looks quite good to me, and my remarks about SPAPR_MACHINE_GET_CLASS() > can be addressed in a followup, so: > > Reviewed-by: Greg Kurz <groug@kaod.org> > > > + return SPAPR_IRQ_VIO | irq; > > +} > > + > > static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) > > { > > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > @@ -485,10 +535,14 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) > > } > > > > if (!dev->irq) { > > - dev->irq = spapr_irq_findone(spapr, &local_err); > > - if (local_err) { > > - error_propagate(errp, local_err); > > - return; > > + dev->irq = spapr_vio_reg_to_irq(dev->reg); > > + > > + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > > + dev->irq = spapr_irq_findone(spapr, &local_err); > > + if (local_err) { > > + error_propagate(errp, local_err); > > + return; > > + } > > } > > } > > > > @@ -557,7 +611,7 @@ VIOsPAPRBus *spapr_vio_bus_init(void) > > /* Create bus on bridge device */ > > qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio"); > > bus = SPAPR_VIO_BUS(qbus); > > - bus->next_reg = 0x71000000; > > + bus->next_reg = SPAPR_VIO_REG_BASE; > > > > /* hcall-vio */ > > spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal); > > diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs > > index bcab6323b7ed..4ab556467289 100644 > > --- a/hw/ppc/Makefile.objs > > +++ b/hw/ppc/Makefile.objs > > @@ -4,7 +4,7 @@ obj-y += ppc.o ppc_booke.o fdt.o > > obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o > > obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o > > obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o > > -obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o > > +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o > > # IBM PowerNV > > obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o > > ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) >
[ ... ] >>> +typedef struct sPAPRMachineState sPAPRMachineState; >>> + >> >> Old compilers (GCC < 4.6) might complain about 'redefinition of typedef' if >> some file, say hw/ppc/spapr.c, includes both this header and "hw/ppc/xics.h". >> We had several build breaks detected by 'make docker-test-build@centos6'... >> The correct way to address this would be to move the typedef to the >> "qemu/typedefs.h" header. >> >> This being said, docker-test-build@centos6 vanished with commit e7b3af81597, >> so I guess we don't support such old distros anymore, and we can live with >> duplicate typedefs. I have a rhel6 vm for such tests but QEMU now requires python3 and glib-2.40 and maybe more stuff. I am not sure one can compile QEMU 3.1 on rhel/centos 6 anymore :/ [ ... ] >>> /* Release previous MSIs */ >>> if (msi) { >>> + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >>> + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); >>> + } >> >> SPAPR_MACHINE_GET_CLASS() does all the recursive type checking, and you >> call it three times. Even if this isn't a hot path, maybe cache this in >> an smc variable at the beginning of the function as we do pretty much >> everywhere else. Also this would give prettier code IMHO. > > I agree with Greg that this would be a nice improvement, but it can > wait until a followup. The sPAPR code base is very stable so it's not too much work to respin. FYI, most of the XIVE v4 patchset still applies without a change. Tell me if you find any other issues and I will resend. Thanks, C.
On Wed, 1 Aug 2018 09:14:43 +0200 Cédric Le Goater <clg@kaod.org> wrote: > [ ... ] > > >>> +typedef struct sPAPRMachineState sPAPRMachineState; > >>> + > >> > >> Old compilers (GCC < 4.6) might complain about 'redefinition of typedef' if > >> some file, say hw/ppc/spapr.c, includes both this header and "hw/ppc/xics.h". > >> We had several build breaks detected by 'make docker-test-build@centos6'... > >> The correct way to address this would be to move the typedef to the > >> "qemu/typedefs.h" header. > >> > >> This being said, docker-test-build@centos6 vanished with commit e7b3af81597, > >> so I guess we don't support such old distros anymore, and we can live with > >> duplicate typedefs. > > I have a rhel6 vm for such tests but QEMU now requires python3 and > glib-2.40 and maybe more stuff. I am not sure one can compile QEMU 3.1 > on rhel/centos 6 anymore :/ > Minimal Python version is 2.7 actually, but rhel6 only has 2.6.6. Anyway, with these new requirements, I'm pretty sure we can't build QEMU anymore with these distros... even though rhel 6 EOL is Nov. 2020. > > [ ... ] > > >>> /* Release previous MSIs */ > >>> if (msi) { > >>> + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { > >>> + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); > >>> + } > >> > >> SPAPR_MACHINE_GET_CLASS() does all the recursive type checking, and you > >> call it three times. Even if this isn't a hot path, maybe cache this in > >> an smc variable at the beginning of the function as we do pretty much > >> everywhere else. Also this would give prettier code IMHO. > > > > I agree with Greg that this would be a nice improvement, but it can > > wait until a followup. > > The sPAPR code base is very stable so it's not too much work to respin. > FYI, most of the XIVE v4 patchset still applies without a change. > > Tell me if you find any other issues and I will resend. > > Thanks, > > C.
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 7e5de1a6fd42..73067f5ee8aa 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -8,6 +8,7 @@ #include "hw/ppc/spapr_drc.h" #include "hw/mem/pc-dimm.h" #include "hw/ppc/spapr_ovec.h" +#include "hw/ppc/spapr_irq.h" struct VIOsPAPRBus; struct sPAPRPHBState; @@ -101,6 +102,8 @@ struct sPAPRMachineClass { bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ bool pre_2_10_has_unused_icps; + bool legacy_irq_allocation; + void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, @@ -167,6 +170,8 @@ struct sPAPRMachineState { char *kvm_type; const char *icp_type; + int32_t irq_map_nr; + unsigned long *irq_map; bool cmd_line_caps[SPAPR_CAP_NUM]; sPAPRCapabilities def, eff, mig; diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h new file mode 100644 index 000000000000..6f7f50548809 --- /dev/null +++ b/include/hw/ppc/spapr_irq.h @@ -0,0 +1,32 @@ +/* + * QEMU PowerPC sPAPR IRQ backend definitions + * + * Copyright (c) 2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef HW_SPAPR_IRQ_H +#define HW_SPAPR_IRQ_H + +/* + * IRQ range offsets per device type + */ +#define SPAPR_IRQ_EPOW 0x1000 /* XICS_IRQ_BASE offset */ +#define SPAPR_IRQ_HOTPLUG 0x1001 +#define SPAPR_IRQ_VIO 0x1100 /* 256 VIO devices */ +#define SPAPR_IRQ_PCI_LSI 0x1200 /* 32+ PHBs devices */ + +#define SPAPR_IRQ_MSI 0x1300 /* Offset of the dynamic range covered + * by the bitmap allocator */ + +typedef struct sPAPRMachineState sPAPRMachineState; + +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis); +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align, + Error **errp); +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num); +void spapr_irq_msi_reset(sPAPRMachineState *spapr); + +#endif diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 3c72173c7e0f..792e24453d8b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -189,6 +189,11 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) sPAPRMachineState *spapr = SPAPR_MACHINE(machine); Error *local_err = NULL; + /* Initialize the MSI IRQ allocator. */ + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + spapr_irq_msi_init(spapr, XICS_IRQ_BASE + nr_irqs - SPAPR_IRQ_MSI); + } + if (kvm_enabled()) { if (machine_kernel_irqchip_allowed(machine) && !xics_kvm_init(spapr, &local_err)) { @@ -1636,6 +1641,10 @@ static void spapr_machine_reset(void) ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal); } + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + spapr_irq_msi_reset(spapr); + } + qemu_devices_reset(); /* DRC reset may cause a device to be unplugged. This will cause troubles @@ -1910,6 +1919,24 @@ static const VMStateDescription vmstate_spapr_patb_entry = { }, }; +static bool spapr_irq_map_needed(void *opaque) +{ + sPAPRMachineState *spapr = opaque; + + return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr); +} + +static const VMStateDescription vmstate_spapr_irq_map = { + .name = "spapr_irq_map", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_irq_map_needed, + .fields = (VMStateField[]) { + VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_spapr = { .name = "spapr", .version_id = 3, @@ -1937,6 +1964,7 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_cap_cfpc, &vmstate_spapr_cap_sbbc, &vmstate_spapr_cap_ibs, + &vmstate_spapr_irq_map, NULL } }; @@ -4085,8 +4113,12 @@ static void spapr_machine_3_0_instance_options(MachineState *machine) static void spapr_machine_3_0_class_options(MachineClass *mc) { + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + spapr_machine_3_1_class_options(mc); SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_0); + + smc->legacy_irq_allocation = true; } DEFINE_SPAPR_MACHINE(3_0, "3.0", false); diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index e4f5946a2188..32719a1b72d0 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -707,9 +707,11 @@ void spapr_clear_pending_events(sPAPRMachineState *spapr) void spapr_events_init(sPAPRMachineState *spapr) { - int epow_irq; + int epow_irq = SPAPR_IRQ_EPOW; - epow_irq = spapr_irq_findone(spapr, &error_fatal); + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + epow_irq = spapr_irq_findone(spapr, &error_fatal); + } spapr_irq_claim(spapr, epow_irq, false, &error_fatal); @@ -729,9 +731,11 @@ void spapr_events_init(sPAPRMachineState *spapr) * checking that it's enabled. */ if (spapr->use_hotplug_event_source) { - int hp_irq; + int hp_irq = SPAPR_IRQ_HOTPLUG; - hp_irq = spapr_irq_findone(spapr, &error_fatal); + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + hp_irq = spapr_irq_findone(spapr, &error_fatal); + } spapr_irq_claim(spapr, hp_irq, false, &error_fatal); diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c new file mode 100644 index 000000000000..24e9c1d4433c --- /dev/null +++ b/hw/ppc/spapr_irq.c @@ -0,0 +1,56 @@ +/* + * QEMU PowerPC sPAPR IRQ interface + * + * Copyright (c) 2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/xics.h" + +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis) +{ + spapr->irq_map_nr = nr_msis; + spapr->irq_map = bitmap_new(spapr->irq_map_nr); +} + +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align, + Error **errp) +{ + int irq; + + /* + * The 'align_mask' parameter of bitmap_find_next_zero_area() + * should be one less than a power of 2; 0 means no + * alignment. Adapt the 'align' value of the former allocator + * to fit the requirements of bitmap_find_next_zero_area() + */ + align -= 1; + + irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, + align); + if (irq == spapr->irq_map_nr) { + error_setg(errp, "can't find a free %d-IRQ block", num); + return -1; + } + + bitmap_set(spapr->irq_map, irq, num); + + return irq + SPAPR_IRQ_MSI; +} + +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num) +{ + bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); +} + +void spapr_irq_msi_reset(sPAPRMachineState *spapr) +{ + bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr); +} diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 497b896c7d24..3791ced6c536 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -334,6 +334,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); + } spapr_irq_free(spapr, msi->first_irq, msi->num); if (msi_present(pdev)) { spapr_msi_setmsg(pdev, 0, false, 0, 0); @@ -372,7 +375,13 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, } /* Allocate MSIs */ - irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, &err); + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, + &err); + } else { + irq = spapr_irq_msi_alloc(spapr, req_num, + ret_intr_type == RTAS_TYPE_MSI, &err); + } if (err) { error_reportf_err(err, "Can't allocate MSIs for device %x: ", config_addr); @@ -392,6 +401,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, /* Release previous MSIs */ if (msi) { + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); + } spapr_irq_free(spapr, msi->first_irq, msi->num); g_hash_table_remove(phb->msi, &config_addr); } @@ -1705,14 +1717,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) /* Initialize the LSI table */ for (i = 0; i < PCI_NUM_PINS; i++) { - uint32_t irq; + uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; Error *local_err = NULL; - irq = spapr_irq_findone(spapr, &local_err); - if (local_err) { - error_propagate(errp, local_err); - error_prepend(errp, "can't allocate LSIs: "); - return; + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + irq = spapr_irq_findone(spapr, &local_err); + if (local_err) { + error_propagate(errp, local_err); + error_prepend(errp, "can't allocate LSIs: "); + return; + } } spapr_irq_claim(spapr, irq, true, &local_err); @@ -2123,6 +2137,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges)); _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); + /* TODO: fine tune the total count of allocatable MSIs per PHB */ _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR)); /* Dynamic DMA window */ diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index be9af71437cc..840d4a3c451c 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -37,12 +37,13 @@ #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" -#include "hw/ppc/xics.h" #include "hw/ppc/fdt.h" #include "trace.h" #include <libfdt.h> +#define SPAPR_VIO_REG_BASE 0x71000000 + static void spapr_vio_get_irq(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -445,6 +446,55 @@ static void spapr_vio_busdev_reset(DeviceState *qdev) } } +/* + * The register property of a VIO device is defined in livirt using + * 0x1000 as a base register number plus a 0x1000 increment. For the + * VIO tty device, the base number is changed to 0x30000000. QEMU uses + * a base register number of 0x71000000 and then a simple increment. + * + * The formula below tries to compute a unique index number from the + * register value that will be used to define the IRQ number of the + * VIO device. + * + * A maximum of 256 VIO devices is covered. Collisions are possible + * but they will be detected when the IRQ is claimed. + */ +static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg) +{ + uint32_t irq; + + if (reg >= SPAPR_VIO_REG_BASE) { + /* + * VIO device register values when allocated by QEMU. For + * these, we simply mask the high bits to fit the overall + * range: [0x00 - 0xff]. + * + * The nvram VIO device (reg=0x71000000) is a static device of + * the pseries machine and so is always allocated by QEMU. Its + * IRQ number is 0x0. + */ + irq = reg & 0xff; + + } else if (reg >= 0x30000000) { + /* + * VIO tty devices register values, when allocated by livirt, + * are mapped in range [0xf0 - 0xff], gives us a maximum of 16 + * vtys. + */ + irq = 0xf0 | ((reg >> 12) & 0xf); + + } else { + /* + * Other VIO devices register values, when allocated by + * livirt, should be mapped in range [0x00 - 0xef]. Conflicts + * will be detected when IRQ is claimed. + */ + irq = (reg >> 12) & 0xff; + } + + return SPAPR_IRQ_VIO | irq; +} + static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); @@ -485,10 +535,14 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) } if (!dev->irq) { - dev->irq = spapr_irq_findone(spapr, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; + dev->irq = spapr_vio_reg_to_irq(dev->reg); + + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + dev->irq = spapr_irq_findone(spapr, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } } @@ -557,7 +611,7 @@ VIOsPAPRBus *spapr_vio_bus_init(void) /* Create bus on bridge device */ qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio"); bus = SPAPR_VIO_BUS(qbus); - bus->next_reg = 0x71000000; + bus->next_reg = SPAPR_VIO_REG_BASE; /* hcall-vio */ spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal); diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index bcab6323b7ed..4ab556467289 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -4,7 +4,7 @@ obj-y += ppc.o ppc_booke.o fdt.o obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o -obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o # IBM PowerNV obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
This proposal introduces a new IRQ number space layout using static numbers for all devices, depending on a device index, and a bitmap allocator for the MSI IRQ numbers which are negotiated by the guest at runtime. As the VIO device model does not have a device index but a "reg" property, we introduce a formula to compute an IRQ number from a "reg" value. It should minimize most of the collisions. The previous layout is kept in pre-3.1 machines raising the 'legacy_irq_allocation' machine class flag. Signed-off-by: Cédric Le Goater <clg@kaod.org> --- include/hw/ppc/spapr.h | 5 +++ include/hw/ppc/spapr_irq.h | 32 ++++++++++++++++++ hw/ppc/spapr.c | 32 ++++++++++++++++++ hw/ppc/spapr_events.c | 12 ++++--- hw/ppc/spapr_irq.c | 56 ++++++++++++++++++++++++++++++++ hw/ppc/spapr_pci.c | 29 +++++++++++++---- hw/ppc/spapr_vio.c | 66 ++++++++++++++++++++++++++++++++++---- hw/ppc/Makefile.objs | 2 +- 8 files changed, 216 insertions(+), 18 deletions(-) create mode 100644 include/hw/ppc/spapr_irq.h create mode 100644 hw/ppc/spapr_irq.c