Message ID | 1476314039-9520-9-git-send-email-mdroth@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote: > Hotplug events were previously delivered using an EPOW interrupt > and were queued by linux guests into a circular buffer. For traditional > EPOW events like shutdown/resets, this isn't an issue, but for hotplug > events there are cases where this buffer can be exhausted, resulting > in the loss of hotplug events, resets, etc. > > Newer-style hotplug event are delivered using a dedicated event source. > We enable this in supported guests by adding standard an additional > event source in the guest device-tree via /event-sources, and, if > the guest advertises support for the newer-style hotplug events, > using the corresponding interrupt to signal the available of > hotplug/unplug events. > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> So.. are you saying that as well as allowing new event types, the new special hotplug event souce effectively allows for a bigger queue? Does that mean that we didn't even necessarily need the base+length unplug events, because we could now have sent the many single-LMB unplug requests that were necessary? Or does it not increase the effective queue enough for that? > --- > hw/ppc/spapr.c | 10 ++-- > hw/ppc/spapr_events.c | 148 ++++++++++++++++++++++++++++++++++++++----------- > include/hw/ppc/spapr.h | 3 +- > 3 files changed, 120 insertions(+), 41 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index d80a6fa..2037222 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > hwaddr initrd_size, > hwaddr kernel_size, > bool little_endian, > - const char *kernel_cmdline, > - uint32_t epow_irq) > + const char *kernel_cmdline) > { > void *fdt; > uint32_t start_prop = cpu_to_be32(initrd_base); > @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > _FDT((fdt_end_node(fdt))); > > /* event-sources */ > - spapr_events_fdt_skel(fdt, epow_irq); > + spapr_events_fdt_skel(fdt); > > /* /hypervisor node */ > if (kvm_enabled()) { > @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine) > } > g_free(filename); > > - /* Set up EPOW events infrastructure */ > + /* Set up RTAS event infrastructure */ > spapr_events_init(spapr); > > /* Set up the RTC RTAS interfaces */ > @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine) > /* Prepare the device tree */ > spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size, > kernel_size, kernel_le, > - kernel_cmdline, > - spapr->check_exception_irq); > + kernel_cmdline); > assert(spapr->fdt_skel != NULL); > > /* used by RTAS */ > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 4c7b6ae..f8bbec6 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -40,6 +40,7 @@ > #include "hw/ppc/spapr_drc.h" > #include "qemu/help_option.h" > #include "qemu/bcd.h" > +#include "hw/ppc/spapr_ovec.h" > #include <libfdt.h> > > struct rtas_error_log { > @@ -206,28 +207,104 @@ struct hp_log_full { > struct rtas_event_log_v6_hp hp; > } QEMU_PACKED; > > -#define EVENT_MASK_INTERNAL_ERRORS 0x80000000 > -#define EVENT_MASK_EPOW 0x40000000 > -#define EVENT_MASK_HOTPLUG 0x10000000 > -#define EVENT_MASK_IO 0x08000000 > +typedef enum EventClassIndex { > + EVENT_CLASS_INTERNAL_ERRORS = 0, > + EVENT_CLASS_EPOW = 1, > + EVENT_CLASS_RESERVED = 2, > + EVENT_CLASS_HOT_PLUG = 3, > + EVENT_CLASS_IO = 4, > + EVENT_CLASS_MAX > +} EventClassIndex; > + > +#define EVENT_CLASS_MASK(index) (1 << (31 - index)) > + > +typedef struct EventSource { > + const char *name; > + int irq; > + uint32_t mask; > + bool enabled; > +} EventSource; > + > +static EventSource event_source[EVENT_CLASS_MAX] = { > + [EVENT_CLASS_INTERNAL_ERRORS] = { .name = "internal-errors", }, > + [EVENT_CLASS_EPOW] = { .name = "epow-events", }, > + [EVENT_CLASS_HOT_PLUG] = { .name = "hot-plug-events", }, > + [EVENT_CLASS_IO] = { .name = "ibm,io-events", }, > +}; > + > +static void rtas_event_source_register(EventClassIndex index, int irq) > +{ > + /* we only support 1 irq per event class at the moment */ > + g_assert(!event_source[index].enabled); > + event_source[index].irq = irq; > + event_source[index].mask = EVENT_CLASS_MASK(index); > + event_source[index].enabled = true; > +} I really don't like adding a mutable global table. This should probably be under the sPAPRMachineState. > -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) > +void spapr_events_fdt_skel(void *fdt) > { > - uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; > - uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; > + uint32_t irq_ranges[EVENT_CLASS_MAX * 2]; > + int i, count = 0; > > _FDT((fdt_begin_node(fdt, "event-sources"))); > > + for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) { > + /* TODO: what does 0 entail? */ It's just part of the interrupt specifier format expected by the event-sources binding. It's not really important. > + uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 }; > + > + if (!event_source[i].enabled) { > + continue; > + } > + > + _FDT((fdt_begin_node(fdt, event_source[i].name))); > + _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > + _FDT((fdt_end_node(fdt))); > + > + irq_ranges[count++] = interrupts[0]; > + irq_ranges[count++] = cpu_to_be32(1); > + } > + > + /* TODO: confirm the count is the last expected element */ > + irq_ranges[count] = cpu_to_be32(count); > + count++; > + > _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); > _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); > _FDT((fdt_property(fdt, "interrupt-ranges", > - irq_ranges, sizeof(irq_ranges)))); > + irq_ranges, count * sizeof(uint32_t)))); > > - _FDT((fdt_begin_node(fdt, "epow-events"))); > - _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > _FDT((fdt_end_node(fdt))); > +} > > - _FDT((fdt_end_node(fdt))); > +static const EventSource *rtas_event_log_to_source(int log_type) > +{ > + const EventSource *source; > + > + switch (log_type) { > + case RTAS_LOG_TYPE_HOTPLUG: > + source = &event_source[EVENT_CLASS_HOT_PLUG]; > + if (event_source[EVENT_CLASS_HOT_PLUG].enabled) { > + break; > + } This should probably be using the flag you already have in the MachineState, rather than a global. > + /* fall back to epow for legacy hotplug interrupt source */ > + case RTAS_LOG_TYPE_EPOW: > + source = &event_source[EVENT_CLASS_EPOW]; > + break; > + default: > + source = NULL; > + } > + > + return source; > +} > + > +static int rtas_event_log_to_irq(int log_type) > +{ > + const EventSource *source = rtas_event_log_to_source(log_type); > + > + g_assert(source); > + g_assert(source->enabled); > + > + return source->irq; > } > > static void rtas_event_log_queue(int log_type, void *data, bool exception) > @@ -248,19 +325,14 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > sPAPREventLogEntry *entry = NULL; > > - /* we only queue EPOW events atm. */ > - if ((event_mask & EVENT_MASK_EPOW) == 0) { > - return NULL; > - } > - > QTAILQ_FOREACH(entry, &spapr->pending_events, next) { > + const EventSource *source = rtas_event_log_to_source(entry->log_type); > + > if (entry->exception != exception) { > continue; > } > > - /* EPOW and hotplug events are surfaced in the same manner */ > - if (entry->log_type == RTAS_LOG_TYPE_EPOW || > - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { > + if (source->mask & event_mask) { > break; > } > } > @@ -277,19 +349,14 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > sPAPREventLogEntry *entry = NULL; > > - /* we only queue EPOW events atm. */ > - if ((event_mask & EVENT_MASK_EPOW) == 0) { > - return false; > - } > - > QTAILQ_FOREACH(entry, &spapr->pending_events, next) { > + const EventSource *source = rtas_event_log_to_source(entry->log_type); > + > if (entry->exception != exception) { > continue; > } > > - /* EPOW and hotplug events are surfaced in the same manner */ > - if (entry->log_type == RTAS_LOG_TYPE_EPOW || > - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { > + if (source->mask & event_mask) { > return true; > } > } > @@ -377,7 +444,8 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) > > rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); > > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > + qemu_irq_pulse(xics_get_qirq(spapr->xics, > + rtas_event_log_to_irq(RTAS_LOG_TYPE_EPOW))); > } > > static void spapr_hotplug_set_signalled(uint32_t drc_index) > @@ -459,7 +527,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, > > rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); > > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > + qemu_irq_pulse(xics_get_qirq(spapr->xics, > + rtas_event_log_to_irq(RTAS_LOG_TYPE_HOTPLUG))); > } > > void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) > @@ -505,6 +574,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, > uint64_t xinfo; > sPAPREventLogEntry *event; > struct rtas_error_log *hdr; > + int i; > > if ((nargs < 6) || (nargs > 7) || nret != 1) { > rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); > @@ -541,8 +611,11 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, > * do the latter here, since our code relies on edge-triggered > * interrupts. > */ > - if (rtas_event_log_contains(mask, true)) { > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > + for (i = 0; i < EVENT_CLASS_MAX; i++) { > + if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { > + g_assert(event_source[i].enabled); > + qemu_irq_pulse(xics_get_qirq(spapr->xics, event_source[i].irq)); > + } > } > > return; > @@ -594,8 +667,17 @@ out_no_events: > void spapr_events_init(sPAPRMachineState *spapr) > { > QTAILQ_INIT(&spapr->pending_events); > - spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false, > - &error_fatal); > + > + rtas_event_source_register(EVENT_CLASS_EPOW, > + xics_spapr_alloc(spapr->xics, 0, 0, false, > + &error_fatal)); > + > + if (spapr->use_hotplug_event_source) { > + rtas_event_source_register(EVENT_CLASS_HOT_PLUG, > + xics_spapr_alloc(spapr->xics, 0, 0, false, > + &error_fatal)); > + } > + > spapr->epow_notifier.notify = spapr_powerdown_req; > qemu_register_powerdown_notifier(&spapr->epow_notifier); > spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index d1a4a14..2295ac6 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -71,7 +71,6 @@ struct sPAPRMachineState { > sPAPROptionVector *ov5_cas; > bool cas_reboot; > > - uint32_t check_exception_irq; > Notifier epow_notifier; > QTAILQ_HEAD(, sPAPREventLogEntry) pending_events; > bool use_hotplug_event_source; > @@ -579,7 +578,7 @@ struct sPAPREventLogEntry { > }; > > void spapr_events_init(sPAPRMachineState *sm); > -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); > +void spapr_events_fdt_skel(void *fdt); > int spapr_h_cas_compose_response(sPAPRMachineState *sm, > target_ulong addr, target_ulong size, > bool cpu_update,
On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote: > Hotplug events were previously delivered using an EPOW interrupt > and were queued by linux guests into a circular buffer. For traditional > EPOW events like shutdown/resets, this isn't an issue, but for hotplug > events there are cases where this buffer can be exhausted, resulting > in the loss of hotplug events, resets, etc. > > Newer-style hotplug event are delivered using a dedicated event source. > We enable this in supported guests by adding standard an additional > event source in the guest device-tree via /event-sources, and, if > the guest advertises support for the newer-style hotplug events, > using the corresponding interrupt to signal the available of > hotplug/unplug events. > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> > --- > hw/ppc/spapr.c | 10 ++-- > hw/ppc/spapr_events.c | 148 ++++++++++++++++++++++++++++++++++++++----------- > include/hw/ppc/spapr.h | 3 +- > 3 files changed, 120 insertions(+), 41 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index d80a6fa..2037222 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > hwaddr initrd_size, > hwaddr kernel_size, > bool little_endian, > - const char *kernel_cmdline, > - uint32_t epow_irq) > + const char *kernel_cmdline) > { > void *fdt; > uint32_t start_prop = cpu_to_be32(initrd_base); > @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > _FDT((fdt_end_node(fdt))); > > /* event-sources */ > - spapr_events_fdt_skel(fdt, epow_irq); > + spapr_events_fdt_skel(fdt); > > /* /hypervisor node */ > if (kvm_enabled()) { > @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine) > } > g_free(filename); > > - /* Set up EPOW events infrastructure */ > + /* Set up RTAS event infrastructure */ > spapr_events_init(spapr); > > /* Set up the RTC RTAS interfaces */ > @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine) > /* Prepare the device tree */ > spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size, > kernel_size, kernel_le, > - kernel_cmdline, > - spapr->check_exception_irq); > + kernel_cmdline); > assert(spapr->fdt_skel != NULL); > > /* used by RTAS */ > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 4c7b6ae..f8bbec6 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -40,6 +40,7 @@ > #include "hw/ppc/spapr_drc.h" > #include "qemu/help_option.h" > #include "qemu/bcd.h" > +#include "hw/ppc/spapr_ovec.h" > #include <libfdt.h> > > struct rtas_error_log { > @@ -206,28 +207,104 @@ struct hp_log_full { > struct rtas_event_log_v6_hp hp; > } QEMU_PACKED; > > -#define EVENT_MASK_INTERNAL_ERRORS 0x80000000 > -#define EVENT_MASK_EPOW 0x40000000 > -#define EVENT_MASK_HOTPLUG 0x10000000 > -#define EVENT_MASK_IO 0x08000000 > +typedef enum EventClassIndex { > + EVENT_CLASS_INTERNAL_ERRORS = 0, > + EVENT_CLASS_EPOW = 1, > + EVENT_CLASS_RESERVED = 2, > + EVENT_CLASS_HOT_PLUG = 3, > + EVENT_CLASS_IO = 4, > + EVENT_CLASS_MAX > +} EventClassIndex; > + > +#define EVENT_CLASS_MASK(index) (1 << (31 - index)) > + > +typedef struct EventSource { > + const char *name; > + int irq; > + uint32_t mask; > + bool enabled; > +} EventSource; > + > +static EventSource event_source[EVENT_CLASS_MAX] = { > + [EVENT_CLASS_INTERNAL_ERRORS] = { .name = "internal-errors", }, > + [EVENT_CLASS_EPOW] = { .name = "epow-events", }, > + [EVENT_CLASS_HOT_PLUG] = { .name = "hot-plug-events", }, > + [EVENT_CLASS_IO] = { .name = "ibm,io-events", }, > +}; > + > +static void rtas_event_source_register(EventClassIndex index, int irq) > +{ > + /* we only support 1 irq per event class at the moment */ > + g_assert(!event_source[index].enabled); > + event_source[index].irq = irq; > + event_source[index].mask = EVENT_CLASS_MASK(index); > + event_source[index].enabled = true; > +} > > -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) > +void spapr_events_fdt_skel(void *fdt) > { > - uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; > - uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; > + uint32_t irq_ranges[EVENT_CLASS_MAX * 2]; > + int i, count = 0; > > _FDT((fdt_begin_node(fdt, "event-sources"))); > > + for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) { > + /* TODO: what does 0 entail? */ > + uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 }; > + > + if (!event_source[i].enabled) { > + continue; > + } > + > + _FDT((fdt_begin_node(fdt, event_source[i].name))); > + _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > + _FDT((fdt_end_node(fdt))); > + > + irq_ranges[count++] = interrupts[0]; > + irq_ranges[count++] = cpu_to_be32(1); > + } > + > + /* TODO: confirm the count is the last expected element */ > + irq_ranges[count] = cpu_to_be32(count); > + count++; > + > _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); > _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); > _FDT((fdt_property(fdt, "interrupt-ranges", > - irq_ranges, sizeof(irq_ranges)))); > + irq_ranges, count * sizeof(uint32_t)))); > > - _FDT((fdt_begin_node(fdt, "epow-events"))); > - _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > _FDT((fdt_end_node(fdt))); > +} > > - _FDT((fdt_end_node(fdt))); > +static const EventSource *rtas_event_log_to_source(int log_type) > +{ > + const EventSource *source; > + > + switch (log_type) { > + case RTAS_LOG_TYPE_HOTPLUG: > + source = &event_source[EVENT_CLASS_HOT_PLUG]; > + if (event_source[EVENT_CLASS_HOT_PLUG].enabled) { > + break; > + } In addition to the above .enabled check, shouldn't you be checking if the guest indeed supports the dedicated hotplug interrupt source before returning the source ? This I believe is the reason for the CPU hotplug failures I that mentioned in reply to your 11/11 thread. I am on 4.7.x kernel which probably doesn't support hotplug interrupt source, but QEMU ends up registering and raising such an interrupt. Regards, Bharata.
Quoting David Gibson (2016-10-13 23:56:43) > On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote: > > Hotplug events were previously delivered using an EPOW interrupt > > and were queued by linux guests into a circular buffer. For traditional > > EPOW events like shutdown/resets, this isn't an issue, but for hotplug > > events there are cases where this buffer can be exhausted, resulting > > in the loss of hotplug events, resets, etc. > > > > Newer-style hotplug event are delivered using a dedicated event source. > > We enable this in supported guests by adding standard an additional > > event source in the guest device-tree via /event-sources, and, if > > the guest advertises support for the newer-style hotplug events, > > using the corresponding interrupt to signal the available of > > hotplug/unplug events. > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> > > So.. are you saying that as well as allowing new event types, the new > special hotplug event souce effectively allows for a bigger queue? > > Does that mean that we didn't even necessarily need the base+length > unplug events, because we could now have sent the many single-LMB > unplug requests that were necessary? Or does it not increase the > effective queue enough for that? I assume there are still some internal limits, but the events are now processed using a workqueue which should provide a bit more headroom than the RTAS event buffer used for EPOW events. Maybe John (on cc:) can provide more insight into what the actual limits In either case, the possibility for huge memory hotplug/unplug situations still warrant the use of count+index I think, and since support for the new event delivery mechanism is negotiated using the same option bit as count+index, there's not really any reason not to use it when we can. For situations where we can't (CPU/PCI), it might give us a bit of breathing room there as well. > > > --- > > hw/ppc/spapr.c | 10 ++-- > > hw/ppc/spapr_events.c | 148 ++++++++++++++++++++++++++++++++++++++----------- > > include/hw/ppc/spapr.h | 3 +- > > 3 files changed, 120 insertions(+), 41 deletions(-) > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index d80a6fa..2037222 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > > hwaddr initrd_size, > > hwaddr kernel_size, > > bool little_endian, > > - const char *kernel_cmdline, > > - uint32_t epow_irq) > > + const char *kernel_cmdline) > > { > > void *fdt; > > uint32_t start_prop = cpu_to_be32(initrd_base); > > @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > > _FDT((fdt_end_node(fdt))); > > > > /* event-sources */ > > - spapr_events_fdt_skel(fdt, epow_irq); > > + spapr_events_fdt_skel(fdt); > > > > /* /hypervisor node */ > > if (kvm_enabled()) { > > @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine) > > } > > g_free(filename); > > > > - /* Set up EPOW events infrastructure */ > > + /* Set up RTAS event infrastructure */ > > spapr_events_init(spapr); > > > > /* Set up the RTC RTAS interfaces */ > > @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine) > > /* Prepare the device tree */ > > spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size, > > kernel_size, kernel_le, > > - kernel_cmdline, > > - spapr->check_exception_irq); > > + kernel_cmdline); > > assert(spapr->fdt_skel != NULL); > > > > /* used by RTAS */ > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > > index 4c7b6ae..f8bbec6 100644 > > --- a/hw/ppc/spapr_events.c > > +++ b/hw/ppc/spapr_events.c > > @@ -40,6 +40,7 @@ > > #include "hw/ppc/spapr_drc.h" > > #include "qemu/help_option.h" > > #include "qemu/bcd.h" > > +#include "hw/ppc/spapr_ovec.h" > > #include <libfdt.h> > > > > struct rtas_error_log { > > @@ -206,28 +207,104 @@ struct hp_log_full { > > struct rtas_event_log_v6_hp hp; > > } QEMU_PACKED; > > > > -#define EVENT_MASK_INTERNAL_ERRORS 0x80000000 > > -#define EVENT_MASK_EPOW 0x40000000 > > -#define EVENT_MASK_HOTPLUG 0x10000000 > > -#define EVENT_MASK_IO 0x08000000 > > +typedef enum EventClassIndex { > > + EVENT_CLASS_INTERNAL_ERRORS = 0, > > + EVENT_CLASS_EPOW = 1, > > + EVENT_CLASS_RESERVED = 2, > > + EVENT_CLASS_HOT_PLUG = 3, > > + EVENT_CLASS_IO = 4, > > + EVENT_CLASS_MAX > > +} EventClassIndex; > > + > > +#define EVENT_CLASS_MASK(index) (1 << (31 - index)) > > + > > +typedef struct EventSource { > > + const char *name; > > + int irq; > > + uint32_t mask; > > + bool enabled; > > +} EventSource; > > + > > +static EventSource event_source[EVENT_CLASS_MAX] = { > > + [EVENT_CLASS_INTERNAL_ERRORS] = { .name = "internal-errors", }, > > + [EVENT_CLASS_EPOW] = { .name = "epow-events", }, > > + [EVENT_CLASS_HOT_PLUG] = { .name = "hot-plug-events", }, > > + [EVENT_CLASS_IO] = { .name = "ibm,io-events", }, > > +}; > > + > > +static void rtas_event_source_register(EventClassIndex index, int irq) > > +{ > > + /* we only support 1 irq per event class at the moment */ > > + g_assert(!event_source[index].enabled); > > + event_source[index].irq = irq; > > + event_source[index].mask = EVENT_CLASS_MASK(index); > > + event_source[index].enabled = true; > > +} > > I really don't like adding a mutable global table. This should > probably be under the sPAPRMachineState. I think I started off going that route. Not sure what steered me in this direction, but will take another look at that approach. > > > -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) > > +void spapr_events_fdt_skel(void *fdt) > > { > > - uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; > > - uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; > > + uint32_t irq_ranges[EVENT_CLASS_MAX * 2]; > > + int i, count = 0; > > > > _FDT((fdt_begin_node(fdt, "event-sources"))); > > > > + for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) { > > + /* TODO: what does 0 entail? */ > > It's just part of the interrupt specifier format expected by the > event-sources binding. It's not really important. > > > + uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 }; > > + > > + if (!event_source[i].enabled) { > > + continue; > > + } > > + > > + _FDT((fdt_begin_node(fdt, event_source[i].name))); > > + _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > > + _FDT((fdt_end_node(fdt))); > > + > > + irq_ranges[count++] = interrupts[0]; > > + irq_ranges[count++] = cpu_to_be32(1); > > + } > > + > > + /* TODO: confirm the count is the last expected element */ > > + irq_ranges[count] = cpu_to_be32(count); > > + count++; > > + > > _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); > > _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); > > _FDT((fdt_property(fdt, "interrupt-ranges", > > - irq_ranges, sizeof(irq_ranges)))); > > + irq_ranges, count * sizeof(uint32_t)))); > > > > - _FDT((fdt_begin_node(fdt, "epow-events"))); > > - _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > > _FDT((fdt_end_node(fdt))); > > +} > > > > - _FDT((fdt_end_node(fdt))); > > +static const EventSource *rtas_event_log_to_source(int log_type) > > +{ > > + const EventSource *source; > > + > > + switch (log_type) { > > + case RTAS_LOG_TYPE_HOTPLUG: > > + source = &event_source[EVENT_CLASS_HOT_PLUG]; > > + if (event_source[EVENT_CLASS_HOT_PLUG].enabled) { > > + break; > > + } > > This should probably be using the flag you already have in the > MachineState, rather than a global. > > > + /* fall back to epow for legacy hotplug interrupt source */ > > + case RTAS_LOG_TYPE_EPOW: > > + source = &event_source[EVENT_CLASS_EPOW]; > > + break; > > + default: > > + source = NULL; > > + } > > + > > + return source; > > +} > > + > > +static int rtas_event_log_to_irq(int log_type) > > +{ > > + const EventSource *source = rtas_event_log_to_source(log_type); > > + > > + g_assert(source); > > + g_assert(source->enabled); > > + > > + return source->irq; > > } > > > > static void rtas_event_log_queue(int log_type, void *data, bool exception) > > @@ -248,19 +325,14 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, > > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > sPAPREventLogEntry *entry = NULL; > > > > - /* we only queue EPOW events atm. */ > > - if ((event_mask & EVENT_MASK_EPOW) == 0) { > > - return NULL; > > - } > > - > > QTAILQ_FOREACH(entry, &spapr->pending_events, next) { > > + const EventSource *source = rtas_event_log_to_source(entry->log_type); > > + > > if (entry->exception != exception) { > > continue; > > } > > > > - /* EPOW and hotplug events are surfaced in the same manner */ > > - if (entry->log_type == RTAS_LOG_TYPE_EPOW || > > - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { > > + if (source->mask & event_mask) { > > break; > > } > > } > > @@ -277,19 +349,14 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) > > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > sPAPREventLogEntry *entry = NULL; > > > > - /* we only queue EPOW events atm. */ > > - if ((event_mask & EVENT_MASK_EPOW) == 0) { > > - return false; > > - } > > - > > QTAILQ_FOREACH(entry, &spapr->pending_events, next) { > > + const EventSource *source = rtas_event_log_to_source(entry->log_type); > > + > > if (entry->exception != exception) { > > continue; > > } > > > > - /* EPOW and hotplug events are surfaced in the same manner */ > > - if (entry->log_type == RTAS_LOG_TYPE_EPOW || > > - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { > > + if (source->mask & event_mask) { > > return true; > > } > > } > > @@ -377,7 +444,8 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) > > > > rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); > > > > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > > + qemu_irq_pulse(xics_get_qirq(spapr->xics, > > + rtas_event_log_to_irq(RTAS_LOG_TYPE_EPOW))); > > } > > > > static void spapr_hotplug_set_signalled(uint32_t drc_index) > > @@ -459,7 +527,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, > > > > rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); > > > > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > > + qemu_irq_pulse(xics_get_qirq(spapr->xics, > > + rtas_event_log_to_irq(RTAS_LOG_TYPE_HOTPLUG))); > > } > > > > void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) > > @@ -505,6 +574,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > uint64_t xinfo; > > sPAPREventLogEntry *event; > > struct rtas_error_log *hdr; > > + int i; > > > > if ((nargs < 6) || (nargs > 7) || nret != 1) { > > rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); > > @@ -541,8 +611,11 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > * do the latter here, since our code relies on edge-triggered > > * interrupts. > > */ > > - if (rtas_event_log_contains(mask, true)) { > > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > > + for (i = 0; i < EVENT_CLASS_MAX; i++) { > > + if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { > > + g_assert(event_source[i].enabled); > > + qemu_irq_pulse(xics_get_qirq(spapr->xics, event_source[i].irq)); > > + } > > } > > > > return; > > @@ -594,8 +667,17 @@ out_no_events: > > void spapr_events_init(sPAPRMachineState *spapr) > > { > > QTAILQ_INIT(&spapr->pending_events); > > - spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false, > > - &error_fatal); > > + > > + rtas_event_source_register(EVENT_CLASS_EPOW, > > + xics_spapr_alloc(spapr->xics, 0, 0, false, > > + &error_fatal)); > > + > > + if (spapr->use_hotplug_event_source) { > > + rtas_event_source_register(EVENT_CLASS_HOT_PLUG, > > + xics_spapr_alloc(spapr->xics, 0, 0, false, > > + &error_fatal)); > > + } > > + > > spapr->epow_notifier.notify = spapr_powerdown_req; > > qemu_register_powerdown_notifier(&spapr->epow_notifier); > > spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > > index d1a4a14..2295ac6 100644 > > --- a/include/hw/ppc/spapr.h > > +++ b/include/hw/ppc/spapr.h > > @@ -71,7 +71,6 @@ struct sPAPRMachineState { > > sPAPROptionVector *ov5_cas; > > bool cas_reboot; > > > > - uint32_t check_exception_irq; > > Notifier epow_notifier; > > QTAILQ_HEAD(, sPAPREventLogEntry) pending_events; > > bool use_hotplug_event_source; > > @@ -579,7 +578,7 @@ struct sPAPREventLogEntry { > > }; > > > > void spapr_events_init(sPAPRMachineState *sm); > > -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); > > +void spapr_events_fdt_skel(void *fdt); > > int spapr_h_cas_compose_response(sPAPRMachineState *sm, > > target_ulong addr, target_ulong size, > > bool cpu_update, > > -- > David Gibson | I'll have my music baroque, and my code > david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ > | _way_ _around_! > http://www.ozlabs.org/~dgibson
Quoting Bharata B Rao (2016-10-14 03:46:20) > On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote: > > Hotplug events were previously delivered using an EPOW interrupt > > and were queued by linux guests into a circular buffer. For traditional > > EPOW events like shutdown/resets, this isn't an issue, but for hotplug > > events there are cases where this buffer can be exhausted, resulting > > in the loss of hotplug events, resets, etc. > > > > Newer-style hotplug event are delivered using a dedicated event source. > > We enable this in supported guests by adding standard an additional > > event source in the guest device-tree via /event-sources, and, if > > the guest advertises support for the newer-style hotplug events, > > using the corresponding interrupt to signal the available of > > hotplug/unplug events. > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> > > --- > > hw/ppc/spapr.c | 10 ++-- > > hw/ppc/spapr_events.c | 148 ++++++++++++++++++++++++++++++++++++++----------- > > include/hw/ppc/spapr.h | 3 +- > > 3 files changed, 120 insertions(+), 41 deletions(-) > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index d80a6fa..2037222 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > > hwaddr initrd_size, > > hwaddr kernel_size, > > bool little_endian, > > - const char *kernel_cmdline, > > - uint32_t epow_irq) > > + const char *kernel_cmdline) > > { > > void *fdt; > > uint32_t start_prop = cpu_to_be32(initrd_base); > > @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > > _FDT((fdt_end_node(fdt))); > > > > /* event-sources */ > > - spapr_events_fdt_skel(fdt, epow_irq); > > + spapr_events_fdt_skel(fdt); > > > > /* /hypervisor node */ > > if (kvm_enabled()) { > > @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine) > > } > > g_free(filename); > > > > - /* Set up EPOW events infrastructure */ > > + /* Set up RTAS event infrastructure */ > > spapr_events_init(spapr); > > > > /* Set up the RTC RTAS interfaces */ > > @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine) > > /* Prepare the device tree */ > > spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size, > > kernel_size, kernel_le, > > - kernel_cmdline, > > - spapr->check_exception_irq); > > + kernel_cmdline); > > assert(spapr->fdt_skel != NULL); > > > > /* used by RTAS */ > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > > index 4c7b6ae..f8bbec6 100644 > > --- a/hw/ppc/spapr_events.c > > +++ b/hw/ppc/spapr_events.c > > @@ -40,6 +40,7 @@ > > #include "hw/ppc/spapr_drc.h" > > #include "qemu/help_option.h" > > #include "qemu/bcd.h" > > +#include "hw/ppc/spapr_ovec.h" > > #include <libfdt.h> > > > > struct rtas_error_log { > > @@ -206,28 +207,104 @@ struct hp_log_full { > > struct rtas_event_log_v6_hp hp; > > } QEMU_PACKED; > > > > -#define EVENT_MASK_INTERNAL_ERRORS 0x80000000 > > -#define EVENT_MASK_EPOW 0x40000000 > > -#define EVENT_MASK_HOTPLUG 0x10000000 > > -#define EVENT_MASK_IO 0x08000000 > > +typedef enum EventClassIndex { > > + EVENT_CLASS_INTERNAL_ERRORS = 0, > > + EVENT_CLASS_EPOW = 1, > > + EVENT_CLASS_RESERVED = 2, > > + EVENT_CLASS_HOT_PLUG = 3, > > + EVENT_CLASS_IO = 4, > > + EVENT_CLASS_MAX > > +} EventClassIndex; > > + > > +#define EVENT_CLASS_MASK(index) (1 << (31 - index)) > > + > > +typedef struct EventSource { > > + const char *name; > > + int irq; > > + uint32_t mask; > > + bool enabled; > > +} EventSource; > > + > > +static EventSource event_source[EVENT_CLASS_MAX] = { > > + [EVENT_CLASS_INTERNAL_ERRORS] = { .name = "internal-errors", }, > > + [EVENT_CLASS_EPOW] = { .name = "epow-events", }, > > + [EVENT_CLASS_HOT_PLUG] = { .name = "hot-plug-events", }, > > + [EVENT_CLASS_IO] = { .name = "ibm,io-events", }, > > +}; > > + > > +static void rtas_event_source_register(EventClassIndex index, int irq) > > +{ > > + /* we only support 1 irq per event class at the moment */ > > + g_assert(!event_source[index].enabled); > > + event_source[index].irq = irq; > > + event_source[index].mask = EVENT_CLASS_MASK(index); > > + event_source[index].enabled = true; > > +} > > > > -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) > > +void spapr_events_fdt_skel(void *fdt) > > { > > - uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; > > - uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; > > + uint32_t irq_ranges[EVENT_CLASS_MAX * 2]; > > + int i, count = 0; > > > > _FDT((fdt_begin_node(fdt, "event-sources"))); > > > > + for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) { > > + /* TODO: what does 0 entail? */ > > + uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 }; > > + > > + if (!event_source[i].enabled) { > > + continue; > > + } > > + > > + _FDT((fdt_begin_node(fdt, event_source[i].name))); > > + _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > > + _FDT((fdt_end_node(fdt))); > > + > > + irq_ranges[count++] = interrupts[0]; > > + irq_ranges[count++] = cpu_to_be32(1); > > + } > > + > > + /* TODO: confirm the count is the last expected element */ > > + irq_ranges[count] = cpu_to_be32(count); > > + count++; > > + > > _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); > > _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); > > _FDT((fdt_property(fdt, "interrupt-ranges", > > - irq_ranges, sizeof(irq_ranges)))); > > + irq_ranges, count * sizeof(uint32_t)))); > > > > - _FDT((fdt_begin_node(fdt, "epow-events"))); > > - _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > > _FDT((fdt_end_node(fdt))); > > +} > > > > - _FDT((fdt_end_node(fdt))); > > +static const EventSource *rtas_event_log_to_source(int log_type) > > +{ > > + const EventSource *source; > > + > > + switch (log_type) { > > + case RTAS_LOG_TYPE_HOTPLUG: > > + source = &event_source[EVENT_CLASS_HOT_PLUG]; > > + if (event_source[EVENT_CLASS_HOT_PLUG].enabled) { > > + break; > > + } > > In addition to the above .enabled check, shouldn't you be checking if > the guest indeed supports the dedicated hotplug interrupt source before > returning the source ? Yes, I believe you're right. I'd been relying legacy-hotplug-events=true to test the old signalling mechanism (which leaves this event source disabled), but haven't tried PCI/CPU since adding the ovec stuff, so didn't notice those were broken with legacy-hotplug-events=false. Will fix it up in next submission. > > This I believe is the reason for the CPU hotplug failures I that mentioned > in reply to your 11/11 thread. I am on 4.7.x kernel which probably doesn't > support hotplug interrupt source, but QEMU ends up registering and raising > such an interrupt. > > Regards, > Bharata.
On Fri, Oct 14, 2016 at 01:44:17PM -0500, Michael Roth wrote: > Quoting David Gibson (2016-10-13 23:56:43) > > On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote: > > > Hotplug events were previously delivered using an EPOW interrupt > > > and were queued by linux guests into a circular buffer. For traditional > > > EPOW events like shutdown/resets, this isn't an issue, but for hotplug > > > events there are cases where this buffer can be exhausted, resulting > > > in the loss of hotplug events, resets, etc. > > > > > > Newer-style hotplug event are delivered using a dedicated event source. > > > We enable this in supported guests by adding standard an additional > > > event source in the guest device-tree via /event-sources, and, if > > > the guest advertises support for the newer-style hotplug events, > > > using the corresponding interrupt to signal the available of > > > hotplug/unplug events. > > > > > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> > > > > So.. are you saying that as well as allowing new event types, the new > > special hotplug event souce effectively allows for a bigger queue? > > > > Does that mean that we didn't even necessarily need the base+length > > unplug events, because we could now have sent the many single-LMB > > unplug requests that were necessary? Or does it not increase the > > effective queue enough for that? > > I assume there are still some internal limits, but the events > are now processed using a workqueue which should provide a bit more > headroom than the RTAS event buffer used for EPOW events. Maybe > John (on cc:) can provide more insight into what the actual limits > > In either case, the possibility for huge memory hotplug/unplug > situations still warrant the use of count+index I think, and since > support for the new event delivery mechanism is negotiated using the > same option bit as count+index, there's not really any reason not to > use it when we can. For situations where we can't (CPU/PCI), it > might give us a bit of breathing room there as well. Ok, makes sense. Thanks for the extra information. > > > > > > --- > > > hw/ppc/spapr.c | 10 ++-- > > > hw/ppc/spapr_events.c | 148 ++++++++++++++++++++++++++++++++++++++----------- > > > include/hw/ppc/spapr.h | 3 +- > > > 3 files changed, 120 insertions(+), 41 deletions(-) > > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > > index d80a6fa..2037222 100644 > > > --- a/hw/ppc/spapr.c > > > +++ b/hw/ppc/spapr.c > > > @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > > > hwaddr initrd_size, > > > hwaddr kernel_size, > > > bool little_endian, > > > - const char *kernel_cmdline, > > > - uint32_t epow_irq) > > > + const char *kernel_cmdline) > > > { > > > void *fdt; > > > uint32_t start_prop = cpu_to_be32(initrd_base); > > > @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > > > _FDT((fdt_end_node(fdt))); > > > > > > /* event-sources */ > > > - spapr_events_fdt_skel(fdt, epow_irq); > > > + spapr_events_fdt_skel(fdt); > > > > > > /* /hypervisor node */ > > > if (kvm_enabled()) { > > > @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine) > > > } > > > g_free(filename); > > > > > > - /* Set up EPOW events infrastructure */ > > > + /* Set up RTAS event infrastructure */ > > > spapr_events_init(spapr); > > > > > > /* Set up the RTC RTAS interfaces */ > > > @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine) > > > /* Prepare the device tree */ > > > spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size, > > > kernel_size, kernel_le, > > > - kernel_cmdline, > > > - spapr->check_exception_irq); > > > + kernel_cmdline); > > > assert(spapr->fdt_skel != NULL); > > > > > > /* used by RTAS */ > > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > > > index 4c7b6ae..f8bbec6 100644 > > > --- a/hw/ppc/spapr_events.c > > > +++ b/hw/ppc/spapr_events.c > > > @@ -40,6 +40,7 @@ > > > #include "hw/ppc/spapr_drc.h" > > > #include "qemu/help_option.h" > > > #include "qemu/bcd.h" > > > +#include "hw/ppc/spapr_ovec.h" > > > #include <libfdt.h> > > > > > > struct rtas_error_log { > > > @@ -206,28 +207,104 @@ struct hp_log_full { > > > struct rtas_event_log_v6_hp hp; > > > } QEMU_PACKED; > > > > > > -#define EVENT_MASK_INTERNAL_ERRORS 0x80000000 > > > -#define EVENT_MASK_EPOW 0x40000000 > > > -#define EVENT_MASK_HOTPLUG 0x10000000 > > > -#define EVENT_MASK_IO 0x08000000 > > > +typedef enum EventClassIndex { > > > + EVENT_CLASS_INTERNAL_ERRORS = 0, > > > + EVENT_CLASS_EPOW = 1, > > > + EVENT_CLASS_RESERVED = 2, > > > + EVENT_CLASS_HOT_PLUG = 3, > > > + EVENT_CLASS_IO = 4, > > > + EVENT_CLASS_MAX > > > +} EventClassIndex; > > > + > > > +#define EVENT_CLASS_MASK(index) (1 << (31 - index)) > > > + > > > +typedef struct EventSource { > > > + const char *name; > > > + int irq; > > > + uint32_t mask; > > > + bool enabled; > > > +} EventSource; > > > + > > > +static EventSource event_source[EVENT_CLASS_MAX] = { > > > + [EVENT_CLASS_INTERNAL_ERRORS] = { .name = "internal-errors", }, > > > + [EVENT_CLASS_EPOW] = { .name = "epow-events", }, > > > + [EVENT_CLASS_HOT_PLUG] = { .name = "hot-plug-events", }, > > > + [EVENT_CLASS_IO] = { .name = "ibm,io-events", }, > > > +}; > > > + > > > +static void rtas_event_source_register(EventClassIndex index, int irq) > > > +{ > > > + /* we only support 1 irq per event class at the moment */ > > > + g_assert(!event_source[index].enabled); > > > + event_source[index].irq = irq; > > > + event_source[index].mask = EVENT_CLASS_MASK(index); > > > + event_source[index].enabled = true; > > > +} > > > > I really don't like adding a mutable global table. This should > > probably be under the sPAPRMachineState. > > I think I started off going that route. Not sure what steered me in this > direction, but will take another look at that approach. > > > > > > -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) > > > +void spapr_events_fdt_skel(void *fdt) > > > { > > > - uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; > > > - uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; > > > + uint32_t irq_ranges[EVENT_CLASS_MAX * 2]; > > > + int i, count = 0; > > > > > > _FDT((fdt_begin_node(fdt, "event-sources"))); > > > > > > + for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) { > > > + /* TODO: what does 0 entail? */ > > > > It's just part of the interrupt specifier format expected by the > > event-sources binding. It's not really important. > > > > > + uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 }; > > > + > > > + if (!event_source[i].enabled) { > > > + continue; > > > + } > > > + > > > + _FDT((fdt_begin_node(fdt, event_source[i].name))); > > > + _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > > > + _FDT((fdt_end_node(fdt))); > > > + > > > + irq_ranges[count++] = interrupts[0]; > > > + irq_ranges[count++] = cpu_to_be32(1); > > > + } > > > + > > > + /* TODO: confirm the count is the last expected element */ > > > + irq_ranges[count] = cpu_to_be32(count); > > > + count++; > > > + > > > _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); > > > _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); > > > _FDT((fdt_property(fdt, "interrupt-ranges", > > > - irq_ranges, sizeof(irq_ranges)))); > > > + irq_ranges, count * sizeof(uint32_t)))); > > > > > > - _FDT((fdt_begin_node(fdt, "epow-events"))); > > > - _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); > > > _FDT((fdt_end_node(fdt))); > > > +} > > > > > > - _FDT((fdt_end_node(fdt))); > > > +static const EventSource *rtas_event_log_to_source(int log_type) > > > +{ > > > + const EventSource *source; > > > + > > > + switch (log_type) { > > > + case RTAS_LOG_TYPE_HOTPLUG: > > > + source = &event_source[EVENT_CLASS_HOT_PLUG]; > > > + if (event_source[EVENT_CLASS_HOT_PLUG].enabled) { > > > + break; > > > + } > > > > This should probably be using the flag you already have in the > > MachineState, rather than a global. > > > > > + /* fall back to epow for legacy hotplug interrupt source */ > > > + case RTAS_LOG_TYPE_EPOW: > > > + source = &event_source[EVENT_CLASS_EPOW]; > > > + break; > > > + default: > > > + source = NULL; > > > + } > > > + > > > + return source; > > > +} > > > + > > > +static int rtas_event_log_to_irq(int log_type) > > > +{ > > > + const EventSource *source = rtas_event_log_to_source(log_type); > > > + > > > + g_assert(source); > > > + g_assert(source->enabled); > > > + > > > + return source->irq; > > > } > > > > > > static void rtas_event_log_queue(int log_type, void *data, bool exception) > > > @@ -248,19 +325,14 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, > > > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > > sPAPREventLogEntry *entry = NULL; > > > > > > - /* we only queue EPOW events atm. */ > > > - if ((event_mask & EVENT_MASK_EPOW) == 0) { > > > - return NULL; > > > - } > > > - > > > QTAILQ_FOREACH(entry, &spapr->pending_events, next) { > > > + const EventSource *source = rtas_event_log_to_source(entry->log_type); > > > + > > > if (entry->exception != exception) { > > > continue; > > > } > > > > > > - /* EPOW and hotplug events are surfaced in the same manner */ > > > - if (entry->log_type == RTAS_LOG_TYPE_EPOW || > > > - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { > > > + if (source->mask & event_mask) { > > > break; > > > } > > > } > > > @@ -277,19 +349,14 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) > > > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > > sPAPREventLogEntry *entry = NULL; > > > > > > - /* we only queue EPOW events atm. */ > > > - if ((event_mask & EVENT_MASK_EPOW) == 0) { > > > - return false; > > > - } > > > - > > > QTAILQ_FOREACH(entry, &spapr->pending_events, next) { > > > + const EventSource *source = rtas_event_log_to_source(entry->log_type); > > > + > > > if (entry->exception != exception) { > > > continue; > > > } > > > > > > - /* EPOW and hotplug events are surfaced in the same manner */ > > > - if (entry->log_type == RTAS_LOG_TYPE_EPOW || > > > - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { > > > + if (source->mask & event_mask) { > > > return true; > > > } > > > } > > > @@ -377,7 +444,8 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) > > > > > > rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); > > > > > > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > > > + qemu_irq_pulse(xics_get_qirq(spapr->xics, > > > + rtas_event_log_to_irq(RTAS_LOG_TYPE_EPOW))); > > > } > > > > > > static void spapr_hotplug_set_signalled(uint32_t drc_index) > > > @@ -459,7 +527,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, > > > > > > rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); > > > > > > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > > > + qemu_irq_pulse(xics_get_qirq(spapr->xics, > > > + rtas_event_log_to_irq(RTAS_LOG_TYPE_HOTPLUG))); > > > } > > > > > > void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) > > > @@ -505,6 +574,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > > uint64_t xinfo; > > > sPAPREventLogEntry *event; > > > struct rtas_error_log *hdr; > > > + int i; > > > > > > if ((nargs < 6) || (nargs > 7) || nret != 1) { > > > rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); > > > @@ -541,8 +611,11 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > > * do the latter here, since our code relies on edge-triggered > > > * interrupts. > > > */ > > > - if (rtas_event_log_contains(mask, true)) { > > > - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); > > > + for (i = 0; i < EVENT_CLASS_MAX; i++) { > > > + if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { > > > + g_assert(event_source[i].enabled); > > > + qemu_irq_pulse(xics_get_qirq(spapr->xics, event_source[i].irq)); > > > + } > > > } > > > > > > return; > > > @@ -594,8 +667,17 @@ out_no_events: > > > void spapr_events_init(sPAPRMachineState *spapr) > > > { > > > QTAILQ_INIT(&spapr->pending_events); > > > - spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false, > > > - &error_fatal); > > > + > > > + rtas_event_source_register(EVENT_CLASS_EPOW, > > > + xics_spapr_alloc(spapr->xics, 0, 0, false, > > > + &error_fatal)); > > > + > > > + if (spapr->use_hotplug_event_source) { > > > + rtas_event_source_register(EVENT_CLASS_HOT_PLUG, > > > + xics_spapr_alloc(spapr->xics, 0, 0, false, > > > + &error_fatal)); > > > + } > > > + > > > spapr->epow_notifier.notify = spapr_powerdown_req; > > > qemu_register_powerdown_notifier(&spapr->epow_notifier); > > > spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", > > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > > > index d1a4a14..2295ac6 100644 > > > --- a/include/hw/ppc/spapr.h > > > +++ b/include/hw/ppc/spapr.h > > > @@ -71,7 +71,6 @@ struct sPAPRMachineState { > > > sPAPROptionVector *ov5_cas; > > > bool cas_reboot; > > > > > > - uint32_t check_exception_irq; > > > Notifier epow_notifier; > > > QTAILQ_HEAD(, sPAPREventLogEntry) pending_events; > > > bool use_hotplug_event_source; > > > @@ -579,7 +578,7 @@ struct sPAPREventLogEntry { > > > }; > > > > > > void spapr_events_init(sPAPRMachineState *sm); > > > -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); > > > +void spapr_events_fdt_skel(void *fdt); > > > int spapr_h_cas_compose_response(sPAPRMachineState *sm, > > > target_ulong addr, target_ulong size, > > > bool cpu_update, > > >
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d80a6fa..2037222 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, hwaddr initrd_size, hwaddr kernel_size, bool little_endian, - const char *kernel_cmdline, - uint32_t epow_irq) + const char *kernel_cmdline) { void *fdt; uint32_t start_prop = cpu_to_be32(initrd_base); @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_end_node(fdt))); /* event-sources */ - spapr_events_fdt_skel(fdt, epow_irq); + spapr_events_fdt_skel(fdt); /* /hypervisor node */ if (kvm_enabled()) { @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine) } g_free(filename); - /* Set up EPOW events infrastructure */ + /* Set up RTAS event infrastructure */ spapr_events_init(spapr); /* Set up the RTC RTAS interfaces */ @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine) /* Prepare the device tree */ spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size, kernel_size, kernel_le, - kernel_cmdline, - spapr->check_exception_irq); + kernel_cmdline); assert(spapr->fdt_skel != NULL); /* used by RTAS */ diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 4c7b6ae..f8bbec6 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -40,6 +40,7 @@ #include "hw/ppc/spapr_drc.h" #include "qemu/help_option.h" #include "qemu/bcd.h" +#include "hw/ppc/spapr_ovec.h" #include <libfdt.h> struct rtas_error_log { @@ -206,28 +207,104 @@ struct hp_log_full { struct rtas_event_log_v6_hp hp; } QEMU_PACKED; -#define EVENT_MASK_INTERNAL_ERRORS 0x80000000 -#define EVENT_MASK_EPOW 0x40000000 -#define EVENT_MASK_HOTPLUG 0x10000000 -#define EVENT_MASK_IO 0x08000000 +typedef enum EventClassIndex { + EVENT_CLASS_INTERNAL_ERRORS = 0, + EVENT_CLASS_EPOW = 1, + EVENT_CLASS_RESERVED = 2, + EVENT_CLASS_HOT_PLUG = 3, + EVENT_CLASS_IO = 4, + EVENT_CLASS_MAX +} EventClassIndex; + +#define EVENT_CLASS_MASK(index) (1 << (31 - index)) + +typedef struct EventSource { + const char *name; + int irq; + uint32_t mask; + bool enabled; +} EventSource; + +static EventSource event_source[EVENT_CLASS_MAX] = { + [EVENT_CLASS_INTERNAL_ERRORS] = { .name = "internal-errors", }, + [EVENT_CLASS_EPOW] = { .name = "epow-events", }, + [EVENT_CLASS_HOT_PLUG] = { .name = "hot-plug-events", }, + [EVENT_CLASS_IO] = { .name = "ibm,io-events", }, +}; + +static void rtas_event_source_register(EventClassIndex index, int irq) +{ + /* we only support 1 irq per event class at the moment */ + g_assert(!event_source[index].enabled); + event_source[index].irq = irq; + event_source[index].mask = EVENT_CLASS_MASK(index); + event_source[index].enabled = true; +} -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) +void spapr_events_fdt_skel(void *fdt) { - uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; - uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; + uint32_t irq_ranges[EVENT_CLASS_MAX * 2]; + int i, count = 0; _FDT((fdt_begin_node(fdt, "event-sources"))); + for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) { + /* TODO: what does 0 entail? */ + uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 }; + + if (!event_source[i].enabled) { + continue; + } + + _FDT((fdt_begin_node(fdt, event_source[i].name))); + _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); + _FDT((fdt_end_node(fdt))); + + irq_ranges[count++] = interrupts[0]; + irq_ranges[count++] = cpu_to_be32(1); + } + + /* TODO: confirm the count is the last expected element */ + irq_ranges[count] = cpu_to_be32(count); + count++; + _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); _FDT((fdt_property(fdt, "interrupt-ranges", - irq_ranges, sizeof(irq_ranges)))); + irq_ranges, count * sizeof(uint32_t)))); - _FDT((fdt_begin_node(fdt, "epow-events"))); - _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); _FDT((fdt_end_node(fdt))); +} - _FDT((fdt_end_node(fdt))); +static const EventSource *rtas_event_log_to_source(int log_type) +{ + const EventSource *source; + + switch (log_type) { + case RTAS_LOG_TYPE_HOTPLUG: + source = &event_source[EVENT_CLASS_HOT_PLUG]; + if (event_source[EVENT_CLASS_HOT_PLUG].enabled) { + break; + } + /* fall back to epow for legacy hotplug interrupt source */ + case RTAS_LOG_TYPE_EPOW: + source = &event_source[EVENT_CLASS_EPOW]; + break; + default: + source = NULL; + } + + return source; +} + +static int rtas_event_log_to_irq(int log_type) +{ + const EventSource *source = rtas_event_log_to_source(log_type); + + g_assert(source); + g_assert(source->enabled); + + return source->irq; } static void rtas_event_log_queue(int log_type, void *data, bool exception) @@ -248,19 +325,14 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; - /* we only queue EPOW events atm. */ - if ((event_mask & EVENT_MASK_EPOW) == 0) { - return NULL; - } - QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + const EventSource *source = rtas_event_log_to_source(entry->log_type); + if (entry->exception != exception) { continue; } - /* EPOW and hotplug events are surfaced in the same manner */ - if (entry->log_type == RTAS_LOG_TYPE_EPOW || - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { + if (source->mask & event_mask) { break; } } @@ -277,19 +349,14 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; - /* we only queue EPOW events atm. */ - if ((event_mask & EVENT_MASK_EPOW) == 0) { - return false; - } - QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + const EventSource *source = rtas_event_log_to_source(entry->log_type); + if (entry->exception != exception) { continue; } - /* EPOW and hotplug events are surfaced in the same manner */ - if (entry->log_type == RTAS_LOG_TYPE_EPOW || - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { + if (source->mask & event_mask) { return true; } } @@ -377,7 +444,8 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); + qemu_irq_pulse(xics_get_qirq(spapr->xics, + rtas_event_log_to_irq(RTAS_LOG_TYPE_EPOW))); } static void spapr_hotplug_set_signalled(uint32_t drc_index) @@ -459,7 +527,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); + qemu_irq_pulse(xics_get_qirq(spapr->xics, + rtas_event_log_to_irq(RTAS_LOG_TYPE_HOTPLUG))); } void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) @@ -505,6 +574,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint64_t xinfo; sPAPREventLogEntry *event; struct rtas_error_log *hdr; + int i; if ((nargs < 6) || (nargs > 7) || nret != 1) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -541,8 +611,11 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, * do the latter here, since our code relies on edge-triggered * interrupts. */ - if (rtas_event_log_contains(mask, true)) { - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); + for (i = 0; i < EVENT_CLASS_MAX; i++) { + if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { + g_assert(event_source[i].enabled); + qemu_irq_pulse(xics_get_qirq(spapr->xics, event_source[i].irq)); + } } return; @@ -594,8 +667,17 @@ out_no_events: void spapr_events_init(sPAPRMachineState *spapr) { QTAILQ_INIT(&spapr->pending_events); - spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false, - &error_fatal); + + rtas_event_source_register(EVENT_CLASS_EPOW, + xics_spapr_alloc(spapr->xics, 0, 0, false, + &error_fatal)); + + if (spapr->use_hotplug_event_source) { + rtas_event_source_register(EVENT_CLASS_HOT_PLUG, + xics_spapr_alloc(spapr->xics, 0, 0, false, + &error_fatal)); + } + spapr->epow_notifier.notify = spapr_powerdown_req; qemu_register_powerdown_notifier(&spapr->epow_notifier); spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index d1a4a14..2295ac6 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -71,7 +71,6 @@ struct sPAPRMachineState { sPAPROptionVector *ov5_cas; bool cas_reboot; - uint32_t check_exception_irq; Notifier epow_notifier; QTAILQ_HEAD(, sPAPREventLogEntry) pending_events; bool use_hotplug_event_source; @@ -579,7 +578,7 @@ struct sPAPREventLogEntry { }; void spapr_events_init(sPAPRMachineState *sm); -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); +void spapr_events_fdt_skel(void *fdt); int spapr_h_cas_compose_response(sPAPRMachineState *sm, target_ulong addr, target_ulong size, bool cpu_update,
Hotplug events were previously delivered using an EPOW interrupt and were queued by linux guests into a circular buffer. For traditional EPOW events like shutdown/resets, this isn't an issue, but for hotplug events there are cases where this buffer can be exhausted, resulting in the loss of hotplug events, resets, etc. Newer-style hotplug event are delivered using a dedicated event source. We enable this in supported guests by adding standard an additional event source in the guest device-tree via /event-sources, and, if the guest advertises support for the newer-style hotplug events, using the corresponding interrupt to signal the available of hotplug/unplug events. Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> --- hw/ppc/spapr.c | 10 ++-- hw/ppc/spapr_events.c | 148 ++++++++++++++++++++++++++++++++++++++----------- include/hw/ppc/spapr.h | 3 +- 3 files changed, 120 insertions(+), 41 deletions(-)