diff mbox

[08/11] spapr_events: add support for dedicated hotplug event source

Message ID 1476314039-9520-9-git-send-email-mdroth@linux.vnet.ibm.com
State New
Headers show

Commit Message

Michael Roth Oct. 12, 2016, 11:13 p.m. UTC
Hotplug events were previously delivered using an EPOW interrupt
and were queued by linux guests into a circular buffer. For traditional
EPOW events like shutdown/resets, this isn't an issue, but for hotplug
events there are cases where this buffer can be exhausted, resulting
in the loss of hotplug events, resets, etc.

Newer-style hotplug event are delivered using a dedicated event source.
We enable this in supported guests by adding standard an additional
event source in the guest device-tree via /event-sources, and, if
the guest advertises support for the newer-style hotplug events,
using the corresponding interrupt to signal the available of
hotplug/unplug events.

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         |  10 ++--
 hw/ppc/spapr_events.c  | 148 ++++++++++++++++++++++++++++++++++++++-----------
 include/hw/ppc/spapr.h |   3 +-
 3 files changed, 120 insertions(+), 41 deletions(-)

Comments

David Gibson Oct. 14, 2016, 4:56 a.m. UTC | #1
On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote:
> Hotplug events were previously delivered using an EPOW interrupt
> and were queued by linux guests into a circular buffer. For traditional
> EPOW events like shutdown/resets, this isn't an issue, but for hotplug
> events there are cases where this buffer can be exhausted, resulting
> in the loss of hotplug events, resets, etc.
> 
> Newer-style hotplug event are delivered using a dedicated event source.
> We enable this in supported guests by adding standard an additional
> event source in the guest device-tree via /event-sources, and, if
> the guest advertises support for the newer-style hotplug events,
> using the corresponding interrupt to signal the available of
> hotplug/unplug events.
> 
> Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>

So.. are you saying that as well as allowing new event types, the new
special hotplug event souce effectively allows for a bigger queue?

Does that mean that we didn't even necessarily need the base+length
unplug events, because we could now have sent the many single-LMB
unplug requests that were necessary?  Or does it not increase the
effective queue enough for that?

> ---
>  hw/ppc/spapr.c         |  10 ++--
>  hw/ppc/spapr_events.c  | 148 ++++++++++++++++++++++++++++++++++++++-----------
>  include/hw/ppc/spapr.h |   3 +-
>  3 files changed, 120 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d80a6fa..2037222 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>                                     hwaddr initrd_size,
>                                     hwaddr kernel_size,
>                                     bool little_endian,
> -                                   const char *kernel_cmdline,
> -                                   uint32_t epow_irq)
> +                                   const char *kernel_cmdline)
>  {
>      void *fdt;
>      uint32_t start_prop = cpu_to_be32(initrd_base);
> @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>      _FDT((fdt_end_node(fdt)));
>  
>      /* event-sources */
> -    spapr_events_fdt_skel(fdt, epow_irq);
> +    spapr_events_fdt_skel(fdt);
>  
>      /* /hypervisor node */
>      if (kvm_enabled()) {
> @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine)
>      }
>      g_free(filename);
>  
> -    /* Set up EPOW events infrastructure */
> +    /* Set up RTAS event infrastructure */
>      spapr_events_init(spapr);
>  
>      /* Set up the RTC RTAS interfaces */
> @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine)
>      /* Prepare the device tree */
>      spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
>                                              kernel_size, kernel_le,
> -                                            kernel_cmdline,
> -                                            spapr->check_exception_irq);
> +                                            kernel_cmdline);
>      assert(spapr->fdt_skel != NULL);
>  
>      /* used by RTAS */
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 4c7b6ae..f8bbec6 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -40,6 +40,7 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "qemu/help_option.h"
>  #include "qemu/bcd.h"
> +#include "hw/ppc/spapr_ovec.h"
>  #include <libfdt.h>
>  
>  struct rtas_error_log {
> @@ -206,28 +207,104 @@ struct hp_log_full {
>      struct rtas_event_log_v6_hp hp;
>  } QEMU_PACKED;
>  
> -#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
> -#define EVENT_MASK_EPOW                      0x40000000
> -#define EVENT_MASK_HOTPLUG                   0x10000000
> -#define EVENT_MASK_IO                        0x08000000
> +typedef enum EventClassIndex {
> +    EVENT_CLASS_INTERNAL_ERRORS     = 0,
> +    EVENT_CLASS_EPOW                = 1,
> +    EVENT_CLASS_RESERVED            = 2,
> +    EVENT_CLASS_HOT_PLUG            = 3,
> +    EVENT_CLASS_IO                  = 4,
> +    EVENT_CLASS_MAX
> +} EventClassIndex;
> +
> +#define EVENT_CLASS_MASK(index) (1 << (31 - index))
> +
> +typedef struct EventSource {
> +    const char *name;
> +    int irq;
> +    uint32_t mask;
> +    bool enabled;
> +} EventSource;
> +
> +static EventSource event_source[EVENT_CLASS_MAX] = {
> +    [EVENT_CLASS_INTERNAL_ERRORS]       = { .name = "internal-errors", },
> +    [EVENT_CLASS_EPOW]                  = { .name = "epow-events", },
> +    [EVENT_CLASS_HOT_PLUG]              = { .name = "hot-plug-events", },
> +    [EVENT_CLASS_IO]                    = { .name = "ibm,io-events", },
> +};
> +
> +static void rtas_event_source_register(EventClassIndex index, int irq)
> +{
> +    /* we only support 1 irq per event class at the moment */
> +    g_assert(!event_source[index].enabled);
> +    event_source[index].irq = irq;
> +    event_source[index].mask = EVENT_CLASS_MASK(index);
> +    event_source[index].enabled = true;
> +}

I really don't like adding a mutable global table.  This should
probably be under the sPAPRMachineState.

> -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
> +void spapr_events_fdt_skel(void *fdt)
>  {
> -    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
> -    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
> +    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
> +    int i, count = 0;
>  
>      _FDT((fdt_begin_node(fdt, "event-sources")));
>  
> +    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
> +        /* TODO: what does 0 entail? */

It's just part of the interrupt specifier format expected by the
event-sources binding.  It's not really important.

> +        uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 };
> +
> +        if (!event_source[i].enabled) {
> +            continue;
> +        }
> +
> +        _FDT((fdt_begin_node(fdt, event_source[i].name)));
> +        _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
> +        _FDT((fdt_end_node(fdt)));
> +
> +        irq_ranges[count++] = interrupts[0];
> +        irq_ranges[count++] = cpu_to_be32(1);
> +    }
> +
> +    /* TODO: confirm the count is the last expected element */
> +    irq_ranges[count] = cpu_to_be32(count);
> +    count++;
> +
>      _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
>      _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
>      _FDT((fdt_property(fdt, "interrupt-ranges",
> -                       irq_ranges, sizeof(irq_ranges))));
> +                       irq_ranges, count * sizeof(uint32_t))));
>  
> -    _FDT((fdt_begin_node(fdt, "epow-events")));
> -    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
>      _FDT((fdt_end_node(fdt)));
> +}
>  
> -    _FDT((fdt_end_node(fdt)));
> +static const EventSource *rtas_event_log_to_source(int log_type)
> +{
> +    const EventSource *source;
> +
> +    switch (log_type) {
> +        case RTAS_LOG_TYPE_HOTPLUG:
> +            source = &event_source[EVENT_CLASS_HOT_PLUG];
> +            if (event_source[EVENT_CLASS_HOT_PLUG].enabled) {
> +                break;
> +            }

This should probably be using the flag you already have in the
MachineState, rather than a global.

> +            /* fall back to epow for legacy hotplug interrupt source */
> +        case RTAS_LOG_TYPE_EPOW:
> +            source = &event_source[EVENT_CLASS_EPOW];
> +            break;
> +        default:
> +            source = NULL;
> +    }
> +
> +    return source;
> +}
> +
> +static int rtas_event_log_to_irq(int log_type)
> +{
> +    const EventSource *source = rtas_event_log_to_source(log_type);
> +
> +    g_assert(source);
> +    g_assert(source->enabled);
> +
> +    return source->irq;
>  }
>  
>  static void rtas_event_log_queue(int log_type, void *data, bool exception)
> @@ -248,19 +325,14 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
>      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>      sPAPREventLogEntry *entry = NULL;
>  
> -    /* we only queue EPOW events atm. */
> -    if ((event_mask & EVENT_MASK_EPOW) == 0) {
> -        return NULL;
> -    }
> -
>      QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
> +        const EventSource *source = rtas_event_log_to_source(entry->log_type);
> +
>          if (entry->exception != exception) {
>              continue;
>          }
>  
> -        /* EPOW and hotplug events are surfaced in the same manner */
> -        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
> -            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
> +        if (source->mask & event_mask) {
>              break;
>          }
>      }
> @@ -277,19 +349,14 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
>      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>      sPAPREventLogEntry *entry = NULL;
>  
> -    /* we only queue EPOW events atm. */
> -    if ((event_mask & EVENT_MASK_EPOW) == 0) {
> -        return false;
> -    }
> -
>      QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
> +        const EventSource *source = rtas_event_log_to_source(entry->log_type);
> +
>          if (entry->exception != exception) {
>              continue;
>          }
>  
> -        /* EPOW and hotplug events are surfaced in the same manner */
> -        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
> -            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
> +        if (source->mask & event_mask) {
>              return true;
>          }
>      }
> @@ -377,7 +444,8 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
>  
>      rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
>  
> -    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> +    qemu_irq_pulse(xics_get_qirq(spapr->xics,
> +                                 rtas_event_log_to_irq(RTAS_LOG_TYPE_EPOW)));
>  }
>  
>  static void spapr_hotplug_set_signalled(uint32_t drc_index)
> @@ -459,7 +527,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>  
>      rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
>  
> -    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> +    qemu_irq_pulse(xics_get_qirq(spapr->xics,
> +                                 rtas_event_log_to_irq(RTAS_LOG_TYPE_HOTPLUG)));
>  }
>  
>  void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
> @@ -505,6 +574,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>      uint64_t xinfo;
>      sPAPREventLogEntry *event;
>      struct rtas_error_log *hdr;
> +    int i;
>  
>      if ((nargs < 6) || (nargs > 7) || nret != 1) {
>          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> @@ -541,8 +611,11 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>       * do the latter here, since our code relies on edge-triggered
>       * interrupts.
>       */
> -    if (rtas_event_log_contains(mask, true)) {
> -        qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> +    for (i = 0; i < EVENT_CLASS_MAX; i++) {
> +        if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) {
> +            g_assert(event_source[i].enabled);
> +            qemu_irq_pulse(xics_get_qirq(spapr->xics, event_source[i].irq));
> +        }
>      }
>  
>      return;
> @@ -594,8 +667,17 @@ out_no_events:
>  void spapr_events_init(sPAPRMachineState *spapr)
>  {
>      QTAILQ_INIT(&spapr->pending_events);
> -    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false,
> -                                            &error_fatal);
> +
> +    rtas_event_source_register(EVENT_CLASS_EPOW,
> +                               xics_spapr_alloc(spapr->xics, 0, 0, false,
> +                                                &error_fatal));
> +
> +    if (spapr->use_hotplug_event_source) {
> +        rtas_event_source_register(EVENT_CLASS_HOT_PLUG,
> +                                   xics_spapr_alloc(spapr->xics, 0, 0, false,
> +                                                    &error_fatal));
> +    }
> +
>      spapr->epow_notifier.notify = spapr_powerdown_req;
>      qemu_register_powerdown_notifier(&spapr->epow_notifier);
>      spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index d1a4a14..2295ac6 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -71,7 +71,6 @@ struct sPAPRMachineState {
>      sPAPROptionVector *ov5_cas;
>      bool cas_reboot;
>  
> -    uint32_t check_exception_irq;
>      Notifier epow_notifier;
>      QTAILQ_HEAD(, sPAPREventLogEntry) pending_events;
>      bool use_hotplug_event_source;
> @@ -579,7 +578,7 @@ struct sPAPREventLogEntry {
>  };
>  
>  void spapr_events_init(sPAPRMachineState *sm);
> -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
> +void spapr_events_fdt_skel(void *fdt);
>  int spapr_h_cas_compose_response(sPAPRMachineState *sm,
>                                   target_ulong addr, target_ulong size,
>                                   bool cpu_update,
Bharata B Rao Oct. 14, 2016, 8:46 a.m. UTC | #2
On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote:
> Hotplug events were previously delivered using an EPOW interrupt
> and were queued by linux guests into a circular buffer. For traditional
> EPOW events like shutdown/resets, this isn't an issue, but for hotplug
> events there are cases where this buffer can be exhausted, resulting
> in the loss of hotplug events, resets, etc.
> 
> Newer-style hotplug event are delivered using a dedicated event source.
> We enable this in supported guests by adding standard an additional
> event source in the guest device-tree via /event-sources, and, if
> the guest advertises support for the newer-style hotplug events,
> using the corresponding interrupt to signal the available of
> hotplug/unplug events.
> 
> Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         |  10 ++--
>  hw/ppc/spapr_events.c  | 148 ++++++++++++++++++++++++++++++++++++++-----------
>  include/hw/ppc/spapr.h |   3 +-
>  3 files changed, 120 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d80a6fa..2037222 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>                                     hwaddr initrd_size,
>                                     hwaddr kernel_size,
>                                     bool little_endian,
> -                                   const char *kernel_cmdline,
> -                                   uint32_t epow_irq)
> +                                   const char *kernel_cmdline)
>  {
>      void *fdt;
>      uint32_t start_prop = cpu_to_be32(initrd_base);
> @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>      _FDT((fdt_end_node(fdt)));
> 
>      /* event-sources */
> -    spapr_events_fdt_skel(fdt, epow_irq);
> +    spapr_events_fdt_skel(fdt);
> 
>      /* /hypervisor node */
>      if (kvm_enabled()) {
> @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine)
>      }
>      g_free(filename);
> 
> -    /* Set up EPOW events infrastructure */
> +    /* Set up RTAS event infrastructure */
>      spapr_events_init(spapr);
> 
>      /* Set up the RTC RTAS interfaces */
> @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine)
>      /* Prepare the device tree */
>      spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
>                                              kernel_size, kernel_le,
> -                                            kernel_cmdline,
> -                                            spapr->check_exception_irq);
> +                                            kernel_cmdline);
>      assert(spapr->fdt_skel != NULL);
> 
>      /* used by RTAS */
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 4c7b6ae..f8bbec6 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -40,6 +40,7 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "qemu/help_option.h"
>  #include "qemu/bcd.h"
> +#include "hw/ppc/spapr_ovec.h"
>  #include <libfdt.h>
> 
>  struct rtas_error_log {
> @@ -206,28 +207,104 @@ struct hp_log_full {
>      struct rtas_event_log_v6_hp hp;
>  } QEMU_PACKED;
> 
> -#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
> -#define EVENT_MASK_EPOW                      0x40000000
> -#define EVENT_MASK_HOTPLUG                   0x10000000
> -#define EVENT_MASK_IO                        0x08000000
> +typedef enum EventClassIndex {
> +    EVENT_CLASS_INTERNAL_ERRORS     = 0,
> +    EVENT_CLASS_EPOW                = 1,
> +    EVENT_CLASS_RESERVED            = 2,
> +    EVENT_CLASS_HOT_PLUG            = 3,
> +    EVENT_CLASS_IO                  = 4,
> +    EVENT_CLASS_MAX
> +} EventClassIndex;
> +
> +#define EVENT_CLASS_MASK(index) (1 << (31 - index))
> +
> +typedef struct EventSource {
> +    const char *name;
> +    int irq;
> +    uint32_t mask;
> +    bool enabled;
> +} EventSource;
> +
> +static EventSource event_source[EVENT_CLASS_MAX] = {
> +    [EVENT_CLASS_INTERNAL_ERRORS]       = { .name = "internal-errors", },
> +    [EVENT_CLASS_EPOW]                  = { .name = "epow-events", },
> +    [EVENT_CLASS_HOT_PLUG]              = { .name = "hot-plug-events", },
> +    [EVENT_CLASS_IO]                    = { .name = "ibm,io-events", },
> +};
> +
> +static void rtas_event_source_register(EventClassIndex index, int irq)
> +{
> +    /* we only support 1 irq per event class at the moment */
> +    g_assert(!event_source[index].enabled);
> +    event_source[index].irq = irq;
> +    event_source[index].mask = EVENT_CLASS_MASK(index);
> +    event_source[index].enabled = true;
> +}
> 
> -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
> +void spapr_events_fdt_skel(void *fdt)
>  {
> -    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
> -    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
> +    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
> +    int i, count = 0;
> 
>      _FDT((fdt_begin_node(fdt, "event-sources")));
> 
> +    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
> +        /* TODO: what does 0 entail? */
> +        uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 };
> +
> +        if (!event_source[i].enabled) {
> +            continue;
> +        }
> +
> +        _FDT((fdt_begin_node(fdt, event_source[i].name)));
> +        _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
> +        _FDT((fdt_end_node(fdt)));
> +
> +        irq_ranges[count++] = interrupts[0];
> +        irq_ranges[count++] = cpu_to_be32(1);
> +    }
> +
> +    /* TODO: confirm the count is the last expected element */
> +    irq_ranges[count] = cpu_to_be32(count);
> +    count++;
> +
>      _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
>      _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
>      _FDT((fdt_property(fdt, "interrupt-ranges",
> -                       irq_ranges, sizeof(irq_ranges))));
> +                       irq_ranges, count * sizeof(uint32_t))));
> 
> -    _FDT((fdt_begin_node(fdt, "epow-events")));
> -    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
>      _FDT((fdt_end_node(fdt)));
> +}
> 
> -    _FDT((fdt_end_node(fdt)));
> +static const EventSource *rtas_event_log_to_source(int log_type)
> +{
> +    const EventSource *source;
> +
> +    switch (log_type) {
> +        case RTAS_LOG_TYPE_HOTPLUG:
> +            source = &event_source[EVENT_CLASS_HOT_PLUG];
> +            if (event_source[EVENT_CLASS_HOT_PLUG].enabled) {
> +                break;
> +            }

In addition to the above .enabled check, shouldn't you be checking if
the guest indeed supports the dedicated hotplug interrupt source before
returning the source ?

This I believe is the reason for the CPU hotplug failures I that mentioned
in reply to your 11/11 thread. I am on 4.7.x kernel which probably doesn't
support hotplug interrupt source, but QEMU ends up registering and raising
such an interrupt.

Regards,
Bharata.
Michael Roth Oct. 14, 2016, 6:44 p.m. UTC | #3
Quoting David Gibson (2016-10-13 23:56:43)
> On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote:
> > Hotplug events were previously delivered using an EPOW interrupt
> > and were queued by linux guests into a circular buffer. For traditional
> > EPOW events like shutdown/resets, this isn't an issue, but for hotplug
> > events there are cases where this buffer can be exhausted, resulting
> > in the loss of hotplug events, resets, etc.
> > 
> > Newer-style hotplug event are delivered using a dedicated event source.
> > We enable this in supported guests by adding standard an additional
> > event source in the guest device-tree via /event-sources, and, if
> > the guest advertises support for the newer-style hotplug events,
> > using the corresponding interrupt to signal the available of
> > hotplug/unplug events.
> > 
> > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> 
> So.. are you saying that as well as allowing new event types, the new
> special hotplug event souce effectively allows for a bigger queue?
> 
> Does that mean that we didn't even necessarily need the base+length
> unplug events, because we could now have sent the many single-LMB
> unplug requests that were necessary?  Or does it not increase the
> effective queue enough for that?

I assume there are still some internal limits, but the events
are now processed using a workqueue which should provide a bit more
headroom than the RTAS event buffer used for EPOW events. Maybe
John (on cc:) can provide more insight into what the actual limits

In either case, the possibility for huge memory hotplug/unplug
situations still warrant the use of count+index I think, and since
support for the new event delivery mechanism is negotiated using the
same option bit as count+index, there's not really any reason not to
use it when we can. For situations where we can't (CPU/PCI), it
might give us a bit of breathing room there as well.

> 
> > ---
> >  hw/ppc/spapr.c         |  10 ++--
> >  hw/ppc/spapr_events.c  | 148 ++++++++++++++++++++++++++++++++++++++-----------
> >  include/hw/ppc/spapr.h |   3 +-
> >  3 files changed, 120 insertions(+), 41 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index d80a6fa..2037222 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
> >                                     hwaddr initrd_size,
> >                                     hwaddr kernel_size,
> >                                     bool little_endian,
> > -                                   const char *kernel_cmdline,
> > -                                   uint32_t epow_irq)
> > +                                   const char *kernel_cmdline)
> >  {
> >      void *fdt;
> >      uint32_t start_prop = cpu_to_be32(initrd_base);
> > @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
> >      _FDT((fdt_end_node(fdt)));
> >  
> >      /* event-sources */
> > -    spapr_events_fdt_skel(fdt, epow_irq);
> > +    spapr_events_fdt_skel(fdt);
> >  
> >      /* /hypervisor node */
> >      if (kvm_enabled()) {
> > @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine)
> >      }
> >      g_free(filename);
> >  
> > -    /* Set up EPOW events infrastructure */
> > +    /* Set up RTAS event infrastructure */
> >      spapr_events_init(spapr);
> >  
> >      /* Set up the RTC RTAS interfaces */
> > @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine)
> >      /* Prepare the device tree */
> >      spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
> >                                              kernel_size, kernel_le,
> > -                                            kernel_cmdline,
> > -                                            spapr->check_exception_irq);
> > +                                            kernel_cmdline);
> >      assert(spapr->fdt_skel != NULL);
> >  
> >      /* used by RTAS */
> > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > index 4c7b6ae..f8bbec6 100644
> > --- a/hw/ppc/spapr_events.c
> > +++ b/hw/ppc/spapr_events.c
> > @@ -40,6 +40,7 @@
> >  #include "hw/ppc/spapr_drc.h"
> >  #include "qemu/help_option.h"
> >  #include "qemu/bcd.h"
> > +#include "hw/ppc/spapr_ovec.h"
> >  #include <libfdt.h>
> >  
> >  struct rtas_error_log {
> > @@ -206,28 +207,104 @@ struct hp_log_full {
> >      struct rtas_event_log_v6_hp hp;
> >  } QEMU_PACKED;
> >  
> > -#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
> > -#define EVENT_MASK_EPOW                      0x40000000
> > -#define EVENT_MASK_HOTPLUG                   0x10000000
> > -#define EVENT_MASK_IO                        0x08000000
> > +typedef enum EventClassIndex {
> > +    EVENT_CLASS_INTERNAL_ERRORS     = 0,
> > +    EVENT_CLASS_EPOW                = 1,
> > +    EVENT_CLASS_RESERVED            = 2,
> > +    EVENT_CLASS_HOT_PLUG            = 3,
> > +    EVENT_CLASS_IO                  = 4,
> > +    EVENT_CLASS_MAX
> > +} EventClassIndex;
> > +
> > +#define EVENT_CLASS_MASK(index) (1 << (31 - index))
> > +
> > +typedef struct EventSource {
> > +    const char *name;
> > +    int irq;
> > +    uint32_t mask;
> > +    bool enabled;
> > +} EventSource;
> > +
> > +static EventSource event_source[EVENT_CLASS_MAX] = {
> > +    [EVENT_CLASS_INTERNAL_ERRORS]       = { .name = "internal-errors", },
> > +    [EVENT_CLASS_EPOW]                  = { .name = "epow-events", },
> > +    [EVENT_CLASS_HOT_PLUG]              = { .name = "hot-plug-events", },
> > +    [EVENT_CLASS_IO]                    = { .name = "ibm,io-events", },
> > +};
> > +
> > +static void rtas_event_source_register(EventClassIndex index, int irq)
> > +{
> > +    /* we only support 1 irq per event class at the moment */
> > +    g_assert(!event_source[index].enabled);
> > +    event_source[index].irq = irq;
> > +    event_source[index].mask = EVENT_CLASS_MASK(index);
> > +    event_source[index].enabled = true;
> > +}
> 
> I really don't like adding a mutable global table.  This should
> probably be under the sPAPRMachineState.

I think I started off going that route. Not sure what steered me in this
direction, but will take another look at that approach.

> 
> > -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
> > +void spapr_events_fdt_skel(void *fdt)
> >  {
> > -    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
> > -    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
> > +    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
> > +    int i, count = 0;
> >  
> >      _FDT((fdt_begin_node(fdt, "event-sources")));
> >  
> > +    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
> > +        /* TODO: what does 0 entail? */
> 
> It's just part of the interrupt specifier format expected by the
> event-sources binding.  It's not really important.
> 
> > +        uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 };
> > +
> > +        if (!event_source[i].enabled) {
> > +            continue;
> > +        }
> > +
> > +        _FDT((fdt_begin_node(fdt, event_source[i].name)));
> > +        _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
> > +        _FDT((fdt_end_node(fdt)));
> > +
> > +        irq_ranges[count++] = interrupts[0];
> > +        irq_ranges[count++] = cpu_to_be32(1);
> > +    }
> > +
> > +    /* TODO: confirm the count is the last expected element */
> > +    irq_ranges[count] = cpu_to_be32(count);
> > +    count++;
> > +
> >      _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
> >      _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
> >      _FDT((fdt_property(fdt, "interrupt-ranges",
> > -                       irq_ranges, sizeof(irq_ranges))));
> > +                       irq_ranges, count * sizeof(uint32_t))));
> >  
> > -    _FDT((fdt_begin_node(fdt, "epow-events")));
> > -    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
> >      _FDT((fdt_end_node(fdt)));
> > +}
> >  
> > -    _FDT((fdt_end_node(fdt)));
> > +static const EventSource *rtas_event_log_to_source(int log_type)
> > +{
> > +    const EventSource *source;
> > +
> > +    switch (log_type) {
> > +        case RTAS_LOG_TYPE_HOTPLUG:
> > +            source = &event_source[EVENT_CLASS_HOT_PLUG];
> > +            if (event_source[EVENT_CLASS_HOT_PLUG].enabled) {
> > +                break;
> > +            }
> 
> This should probably be using the flag you already have in the
> MachineState, rather than a global.
> 
> > +            /* fall back to epow for legacy hotplug interrupt source */
> > +        case RTAS_LOG_TYPE_EPOW:
> > +            source = &event_source[EVENT_CLASS_EPOW];
> > +            break;
> > +        default:
> > +            source = NULL;
> > +    }
> > +
> > +    return source;
> > +}
> > +
> > +static int rtas_event_log_to_irq(int log_type)
> > +{
> > +    const EventSource *source = rtas_event_log_to_source(log_type);
> > +
> > +    g_assert(source);
> > +    g_assert(source->enabled);
> > +
> > +    return source->irq;
> >  }
> >  
> >  static void rtas_event_log_queue(int log_type, void *data, bool exception)
> > @@ -248,19 +325,14 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
> >      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >      sPAPREventLogEntry *entry = NULL;
> >  
> > -    /* we only queue EPOW events atm. */
> > -    if ((event_mask & EVENT_MASK_EPOW) == 0) {
> > -        return NULL;
> > -    }
> > -
> >      QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
> > +        const EventSource *source = rtas_event_log_to_source(entry->log_type);
> > +
> >          if (entry->exception != exception) {
> >              continue;
> >          }
> >  
> > -        /* EPOW and hotplug events are surfaced in the same manner */
> > -        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
> > -            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
> > +        if (source->mask & event_mask) {
> >              break;
> >          }
> >      }
> > @@ -277,19 +349,14 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
> >      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >      sPAPREventLogEntry *entry = NULL;
> >  
> > -    /* we only queue EPOW events atm. */
> > -    if ((event_mask & EVENT_MASK_EPOW) == 0) {
> > -        return false;
> > -    }
> > -
> >      QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
> > +        const EventSource *source = rtas_event_log_to_source(entry->log_type);
> > +
> >          if (entry->exception != exception) {
> >              continue;
> >          }
> >  
> > -        /* EPOW and hotplug events are surfaced in the same manner */
> > -        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
> > -            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
> > +        if (source->mask & event_mask) {
> >              return true;
> >          }
> >      }
> > @@ -377,7 +444,8 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
> >  
> >      rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
> >  
> > -    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> > +    qemu_irq_pulse(xics_get_qirq(spapr->xics,
> > +                                 rtas_event_log_to_irq(RTAS_LOG_TYPE_EPOW)));
> >  }
> >  
> >  static void spapr_hotplug_set_signalled(uint32_t drc_index)
> > @@ -459,7 +527,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
> >  
> >      rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
> >  
> > -    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> > +    qemu_irq_pulse(xics_get_qirq(spapr->xics,
> > +                                 rtas_event_log_to_irq(RTAS_LOG_TYPE_HOTPLUG)));
> >  }
> >  
> >  void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
> > @@ -505,6 +574,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >      uint64_t xinfo;
> >      sPAPREventLogEntry *event;
> >      struct rtas_error_log *hdr;
> > +    int i;
> >  
> >      if ((nargs < 6) || (nargs > 7) || nret != 1) {
> >          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> > @@ -541,8 +611,11 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >       * do the latter here, since our code relies on edge-triggered
> >       * interrupts.
> >       */
> > -    if (rtas_event_log_contains(mask, true)) {
> > -        qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> > +    for (i = 0; i < EVENT_CLASS_MAX; i++) {
> > +        if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) {
> > +            g_assert(event_source[i].enabled);
> > +            qemu_irq_pulse(xics_get_qirq(spapr->xics, event_source[i].irq));
> > +        }
> >      }
> >  
> >      return;
> > @@ -594,8 +667,17 @@ out_no_events:
> >  void spapr_events_init(sPAPRMachineState *spapr)
> >  {
> >      QTAILQ_INIT(&spapr->pending_events);
> > -    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false,
> > -                                            &error_fatal);
> > +
> > +    rtas_event_source_register(EVENT_CLASS_EPOW,
> > +                               xics_spapr_alloc(spapr->xics, 0, 0, false,
> > +                                                &error_fatal));
> > +
> > +    if (spapr->use_hotplug_event_source) {
> > +        rtas_event_source_register(EVENT_CLASS_HOT_PLUG,
> > +                                   xics_spapr_alloc(spapr->xics, 0, 0, false,
> > +                                                    &error_fatal));
> > +    }
> > +
> >      spapr->epow_notifier.notify = spapr_powerdown_req;
> >      qemu_register_powerdown_notifier(&spapr->epow_notifier);
> >      spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index d1a4a14..2295ac6 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -71,7 +71,6 @@ struct sPAPRMachineState {
> >      sPAPROptionVector *ov5_cas;
> >      bool cas_reboot;
> >  
> > -    uint32_t check_exception_irq;
> >      Notifier epow_notifier;
> >      QTAILQ_HEAD(, sPAPREventLogEntry) pending_events;
> >      bool use_hotplug_event_source;
> > @@ -579,7 +578,7 @@ struct sPAPREventLogEntry {
> >  };
> >  
> >  void spapr_events_init(sPAPRMachineState *sm);
> > -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
> > +void spapr_events_fdt_skel(void *fdt);
> >  int spapr_h_cas_compose_response(sPAPRMachineState *sm,
> >                                   target_ulong addr, target_ulong size,
> >                                   bool cpu_update,
> 
> -- 
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                 | _way_ _around_!
> http://www.ozlabs.org/~dgibson
Michael Roth Oct. 14, 2016, 6:51 p.m. UTC | #4
Quoting Bharata B Rao (2016-10-14 03:46:20)
> On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote:
> > Hotplug events were previously delivered using an EPOW interrupt
> > and were queued by linux guests into a circular buffer. For traditional
> > EPOW events like shutdown/resets, this isn't an issue, but for hotplug
> > events there are cases where this buffer can be exhausted, resulting
> > in the loss of hotplug events, resets, etc.
> > 
> > Newer-style hotplug event are delivered using a dedicated event source.
> > We enable this in supported guests by adding standard an additional
> > event source in the guest device-tree via /event-sources, and, if
> > the guest advertises support for the newer-style hotplug events,
> > using the corresponding interrupt to signal the available of
> > hotplug/unplug events.
> > 
> > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > ---
> >  hw/ppc/spapr.c         |  10 ++--
> >  hw/ppc/spapr_events.c  | 148 ++++++++++++++++++++++++++++++++++++++-----------
> >  include/hw/ppc/spapr.h |   3 +-
> >  3 files changed, 120 insertions(+), 41 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index d80a6fa..2037222 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
> >                                     hwaddr initrd_size,
> >                                     hwaddr kernel_size,
> >                                     bool little_endian,
> > -                                   const char *kernel_cmdline,
> > -                                   uint32_t epow_irq)
> > +                                   const char *kernel_cmdline)
> >  {
> >      void *fdt;
> >      uint32_t start_prop = cpu_to_be32(initrd_base);
> > @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
> >      _FDT((fdt_end_node(fdt)));
> > 
> >      /* event-sources */
> > -    spapr_events_fdt_skel(fdt, epow_irq);
> > +    spapr_events_fdt_skel(fdt);
> > 
> >      /* /hypervisor node */
> >      if (kvm_enabled()) {
> > @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine)
> >      }
> >      g_free(filename);
> > 
> > -    /* Set up EPOW events infrastructure */
> > +    /* Set up RTAS event infrastructure */
> >      spapr_events_init(spapr);
> > 
> >      /* Set up the RTC RTAS interfaces */
> > @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine)
> >      /* Prepare the device tree */
> >      spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
> >                                              kernel_size, kernel_le,
> > -                                            kernel_cmdline,
> > -                                            spapr->check_exception_irq);
> > +                                            kernel_cmdline);
> >      assert(spapr->fdt_skel != NULL);
> > 
> >      /* used by RTAS */
> > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > index 4c7b6ae..f8bbec6 100644
> > --- a/hw/ppc/spapr_events.c
> > +++ b/hw/ppc/spapr_events.c
> > @@ -40,6 +40,7 @@
> >  #include "hw/ppc/spapr_drc.h"
> >  #include "qemu/help_option.h"
> >  #include "qemu/bcd.h"
> > +#include "hw/ppc/spapr_ovec.h"
> >  #include <libfdt.h>
> > 
> >  struct rtas_error_log {
> > @@ -206,28 +207,104 @@ struct hp_log_full {
> >      struct rtas_event_log_v6_hp hp;
> >  } QEMU_PACKED;
> > 
> > -#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
> > -#define EVENT_MASK_EPOW                      0x40000000
> > -#define EVENT_MASK_HOTPLUG                   0x10000000
> > -#define EVENT_MASK_IO                        0x08000000
> > +typedef enum EventClassIndex {
> > +    EVENT_CLASS_INTERNAL_ERRORS     = 0,
> > +    EVENT_CLASS_EPOW                = 1,
> > +    EVENT_CLASS_RESERVED            = 2,
> > +    EVENT_CLASS_HOT_PLUG            = 3,
> > +    EVENT_CLASS_IO                  = 4,
> > +    EVENT_CLASS_MAX
> > +} EventClassIndex;
> > +
> > +#define EVENT_CLASS_MASK(index) (1 << (31 - index))
> > +
> > +typedef struct EventSource {
> > +    const char *name;
> > +    int irq;
> > +    uint32_t mask;
> > +    bool enabled;
> > +} EventSource;
> > +
> > +static EventSource event_source[EVENT_CLASS_MAX] = {
> > +    [EVENT_CLASS_INTERNAL_ERRORS]       = { .name = "internal-errors", },
> > +    [EVENT_CLASS_EPOW]                  = { .name = "epow-events", },
> > +    [EVENT_CLASS_HOT_PLUG]              = { .name = "hot-plug-events", },
> > +    [EVENT_CLASS_IO]                    = { .name = "ibm,io-events", },
> > +};
> > +
> > +static void rtas_event_source_register(EventClassIndex index, int irq)
> > +{
> > +    /* we only support 1 irq per event class at the moment */
> > +    g_assert(!event_source[index].enabled);
> > +    event_source[index].irq = irq;
> > +    event_source[index].mask = EVENT_CLASS_MASK(index);
> > +    event_source[index].enabled = true;
> > +}
> > 
> > -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
> > +void spapr_events_fdt_skel(void *fdt)
> >  {
> > -    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
> > -    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
> > +    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
> > +    int i, count = 0;
> > 
> >      _FDT((fdt_begin_node(fdt, "event-sources")));
> > 
> > +    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
> > +        /* TODO: what does 0 entail? */
> > +        uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 };
> > +
> > +        if (!event_source[i].enabled) {
> > +            continue;
> > +        }
> > +
> > +        _FDT((fdt_begin_node(fdt, event_source[i].name)));
> > +        _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
> > +        _FDT((fdt_end_node(fdt)));
> > +
> > +        irq_ranges[count++] = interrupts[0];
> > +        irq_ranges[count++] = cpu_to_be32(1);
> > +    }
> > +
> > +    /* TODO: confirm the count is the last expected element */
> > +    irq_ranges[count] = cpu_to_be32(count);
> > +    count++;
> > +
> >      _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
> >      _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
> >      _FDT((fdt_property(fdt, "interrupt-ranges",
> > -                       irq_ranges, sizeof(irq_ranges))));
> > +                       irq_ranges, count * sizeof(uint32_t))));
> > 
> > -    _FDT((fdt_begin_node(fdt, "epow-events")));
> > -    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
> >      _FDT((fdt_end_node(fdt)));
> > +}
> > 
> > -    _FDT((fdt_end_node(fdt)));
> > +static const EventSource *rtas_event_log_to_source(int log_type)
> > +{
> > +    const EventSource *source;
> > +
> > +    switch (log_type) {
> > +        case RTAS_LOG_TYPE_HOTPLUG:
> > +            source = &event_source[EVENT_CLASS_HOT_PLUG];
> > +            if (event_source[EVENT_CLASS_HOT_PLUG].enabled) {
> > +                break;
> > +            }
> 
> In addition to the above .enabled check, shouldn't you be checking if
> the guest indeed supports the dedicated hotplug interrupt source before
> returning the source ?

Yes, I believe you're right. I'd been relying legacy-hotplug-events=true
to test the old signalling mechanism (which leaves this event source
disabled), but haven't tried PCI/CPU since adding the ovec stuff, so
didn't notice those were broken with legacy-hotplug-events=false.
Will fix it up in next submission.

> 
> This I believe is the reason for the CPU hotplug failures I that mentioned
> in reply to your 11/11 thread. I am on 4.7.x kernel which probably doesn't
> support hotplug interrupt source, but QEMU ends up registering and raising
> such an interrupt.
> 
> Regards,
> Bharata.
David Gibson Oct. 16, 2016, 11:39 p.m. UTC | #5
On Fri, Oct 14, 2016 at 01:44:17PM -0500, Michael Roth wrote:
> Quoting David Gibson (2016-10-13 23:56:43)
> > On Wed, Oct 12, 2016 at 06:13:56PM -0500, Michael Roth wrote:
> > > Hotplug events were previously delivered using an EPOW interrupt
> > > and were queued by linux guests into a circular buffer. For traditional
> > > EPOW events like shutdown/resets, this isn't an issue, but for hotplug
> > > events there are cases where this buffer can be exhausted, resulting
> > > in the loss of hotplug events, resets, etc.
> > > 
> > > Newer-style hotplug event are delivered using a dedicated event source.
> > > We enable this in supported guests by adding standard an additional
> > > event source in the guest device-tree via /event-sources, and, if
> > > the guest advertises support for the newer-style hotplug events,
> > > using the corresponding interrupt to signal the available of
> > > hotplug/unplug events.
> > > 
> > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> > 
> > So.. are you saying that as well as allowing new event types, the new
> > special hotplug event souce effectively allows for a bigger queue?
> > 
> > Does that mean that we didn't even necessarily need the base+length
> > unplug events, because we could now have sent the many single-LMB
> > unplug requests that were necessary?  Or does it not increase the
> > effective queue enough for that?
> 
> I assume there are still some internal limits, but the events
> are now processed using a workqueue which should provide a bit more
> headroom than the RTAS event buffer used for EPOW events. Maybe
> John (on cc:) can provide more insight into what the actual limits
> 
> In either case, the possibility for huge memory hotplug/unplug
> situations still warrant the use of count+index I think, and since
> support for the new event delivery mechanism is negotiated using the
> same option bit as count+index, there's not really any reason not to
> use it when we can. For situations where we can't (CPU/PCI), it
> might give us a bit of breathing room there as well.

Ok, makes sense.  Thanks for the extra information.

> 
> > 
> > > ---
> > >  hw/ppc/spapr.c         |  10 ++--
> > >  hw/ppc/spapr_events.c  | 148 ++++++++++++++++++++++++++++++++++++++-----------
> > >  include/hw/ppc/spapr.h |   3 +-
> > >  3 files changed, 120 insertions(+), 41 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index d80a6fa..2037222 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -275,8 +275,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
> > >                                     hwaddr initrd_size,
> > >                                     hwaddr kernel_size,
> > >                                     bool little_endian,
> > > -                                   const char *kernel_cmdline,
> > > -                                   uint32_t epow_irq)
> > > +                                   const char *kernel_cmdline)
> > >  {
> > >      void *fdt;
> > >      uint32_t start_prop = cpu_to_be32(initrd_base);
> > > @@ -437,7 +436,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
> > >      _FDT((fdt_end_node(fdt)));
> > >  
> > >      /* event-sources */
> > > -    spapr_events_fdt_skel(fdt, epow_irq);
> > > +    spapr_events_fdt_skel(fdt);
> > >  
> > >      /* /hypervisor node */
> > >      if (kvm_enabled()) {
> > > @@ -1944,7 +1943,7 @@ static void ppc_spapr_init(MachineState *machine)
> > >      }
> > >      g_free(filename);
> > >  
> > > -    /* Set up EPOW events infrastructure */
> > > +    /* Set up RTAS event infrastructure */
> > >      spapr_events_init(spapr);
> > >  
> > >      /* Set up the RTC RTAS interfaces */
> > > @@ -2076,8 +2075,7 @@ static void ppc_spapr_init(MachineState *machine)
> > >      /* Prepare the device tree */
> > >      spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
> > >                                              kernel_size, kernel_le,
> > > -                                            kernel_cmdline,
> > > -                                            spapr->check_exception_irq);
> > > +                                            kernel_cmdline);
> > >      assert(spapr->fdt_skel != NULL);
> > >  
> > >      /* used by RTAS */
> > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > > index 4c7b6ae..f8bbec6 100644
> > > --- a/hw/ppc/spapr_events.c
> > > +++ b/hw/ppc/spapr_events.c
> > > @@ -40,6 +40,7 @@
> > >  #include "hw/ppc/spapr_drc.h"
> > >  #include "qemu/help_option.h"
> > >  #include "qemu/bcd.h"
> > > +#include "hw/ppc/spapr_ovec.h"
> > >  #include <libfdt.h>
> > >  
> > >  struct rtas_error_log {
> > > @@ -206,28 +207,104 @@ struct hp_log_full {
> > >      struct rtas_event_log_v6_hp hp;
> > >  } QEMU_PACKED;
> > >  
> > > -#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
> > > -#define EVENT_MASK_EPOW                      0x40000000
> > > -#define EVENT_MASK_HOTPLUG                   0x10000000
> > > -#define EVENT_MASK_IO                        0x08000000
> > > +typedef enum EventClassIndex {
> > > +    EVENT_CLASS_INTERNAL_ERRORS     = 0,
> > > +    EVENT_CLASS_EPOW                = 1,
> > > +    EVENT_CLASS_RESERVED            = 2,
> > > +    EVENT_CLASS_HOT_PLUG            = 3,
> > > +    EVENT_CLASS_IO                  = 4,
> > > +    EVENT_CLASS_MAX
> > > +} EventClassIndex;
> > > +
> > > +#define EVENT_CLASS_MASK(index) (1 << (31 - index))
> > > +
> > > +typedef struct EventSource {
> > > +    const char *name;
> > > +    int irq;
> > > +    uint32_t mask;
> > > +    bool enabled;
> > > +} EventSource;
> > > +
> > > +static EventSource event_source[EVENT_CLASS_MAX] = {
> > > +    [EVENT_CLASS_INTERNAL_ERRORS]       = { .name = "internal-errors", },
> > > +    [EVENT_CLASS_EPOW]                  = { .name = "epow-events", },
> > > +    [EVENT_CLASS_HOT_PLUG]              = { .name = "hot-plug-events", },
> > > +    [EVENT_CLASS_IO]                    = { .name = "ibm,io-events", },
> > > +};
> > > +
> > > +static void rtas_event_source_register(EventClassIndex index, int irq)
> > > +{
> > > +    /* we only support 1 irq per event class at the moment */
> > > +    g_assert(!event_source[index].enabled);
> > > +    event_source[index].irq = irq;
> > > +    event_source[index].mask = EVENT_CLASS_MASK(index);
> > > +    event_source[index].enabled = true;
> > > +}
> > 
> > I really don't like adding a mutable global table.  This should
> > probably be under the sPAPRMachineState.
> 
> I think I started off going that route. Not sure what steered me in this
> direction, but will take another look at that approach.
> 
> > 
> > > -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
> > > +void spapr_events_fdt_skel(void *fdt)
> > >  {
> > > -    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
> > > -    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
> > > +    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
> > > +    int i, count = 0;
> > >  
> > >      _FDT((fdt_begin_node(fdt, "event-sources")));
> > >  
> > > +    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
> > > +        /* TODO: what does 0 entail? */
> > 
> > It's just part of the interrupt specifier format expected by the
> > event-sources binding.  It's not really important.
> > 
> > > +        uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 };
> > > +
> > > +        if (!event_source[i].enabled) {
> > > +            continue;
> > > +        }
> > > +
> > > +        _FDT((fdt_begin_node(fdt, event_source[i].name)));
> > > +        _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
> > > +        _FDT((fdt_end_node(fdt)));
> > > +
> > > +        irq_ranges[count++] = interrupts[0];
> > > +        irq_ranges[count++] = cpu_to_be32(1);
> > > +    }
> > > +
> > > +    /* TODO: confirm the count is the last expected element */
> > > +    irq_ranges[count] = cpu_to_be32(count);
> > > +    count++;
> > > +
> > >      _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
> > >      _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
> > >      _FDT((fdt_property(fdt, "interrupt-ranges",
> > > -                       irq_ranges, sizeof(irq_ranges))));
> > > +                       irq_ranges, count * sizeof(uint32_t))));
> > >  
> > > -    _FDT((fdt_begin_node(fdt, "epow-events")));
> > > -    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
> > >      _FDT((fdt_end_node(fdt)));
> > > +}
> > >  
> > > -    _FDT((fdt_end_node(fdt)));
> > > +static const EventSource *rtas_event_log_to_source(int log_type)
> > > +{
> > > +    const EventSource *source;
> > > +
> > > +    switch (log_type) {
> > > +        case RTAS_LOG_TYPE_HOTPLUG:
> > > +            source = &event_source[EVENT_CLASS_HOT_PLUG];
> > > +            if (event_source[EVENT_CLASS_HOT_PLUG].enabled) {
> > > +                break;
> > > +            }
> > 
> > This should probably be using the flag you already have in the
> > MachineState, rather than a global.
> > 
> > > +            /* fall back to epow for legacy hotplug interrupt source */
> > > +        case RTAS_LOG_TYPE_EPOW:
> > > +            source = &event_source[EVENT_CLASS_EPOW];
> > > +            break;
> > > +        default:
> > > +            source = NULL;
> > > +    }
> > > +
> > > +    return source;
> > > +}
> > > +
> > > +static int rtas_event_log_to_irq(int log_type)
> > > +{
> > > +    const EventSource *source = rtas_event_log_to_source(log_type);
> > > +
> > > +    g_assert(source);
> > > +    g_assert(source->enabled);
> > > +
> > > +    return source->irq;
> > >  }
> > >  
> > >  static void rtas_event_log_queue(int log_type, void *data, bool exception)
> > > @@ -248,19 +325,14 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
> > >      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > >      sPAPREventLogEntry *entry = NULL;
> > >  
> > > -    /* we only queue EPOW events atm. */
> > > -    if ((event_mask & EVENT_MASK_EPOW) == 0) {
> > > -        return NULL;
> > > -    }
> > > -
> > >      QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
> > > +        const EventSource *source = rtas_event_log_to_source(entry->log_type);
> > > +
> > >          if (entry->exception != exception) {
> > >              continue;
> > >          }
> > >  
> > > -        /* EPOW and hotplug events are surfaced in the same manner */
> > > -        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
> > > -            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
> > > +        if (source->mask & event_mask) {
> > >              break;
> > >          }
> > >      }
> > > @@ -277,19 +349,14 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
> > >      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > >      sPAPREventLogEntry *entry = NULL;
> > >  
> > > -    /* we only queue EPOW events atm. */
> > > -    if ((event_mask & EVENT_MASK_EPOW) == 0) {
> > > -        return false;
> > > -    }
> > > -
> > >      QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
> > > +        const EventSource *source = rtas_event_log_to_source(entry->log_type);
> > > +
> > >          if (entry->exception != exception) {
> > >              continue;
> > >          }
> > >  
> > > -        /* EPOW and hotplug events are surfaced in the same manner */
> > > -        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
> > > -            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
> > > +        if (source->mask & event_mask) {
> > >              return true;
> > >          }
> > >      }
> > > @@ -377,7 +444,8 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
> > >  
> > >      rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
> > >  
> > > -    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> > > +    qemu_irq_pulse(xics_get_qirq(spapr->xics,
> > > +                                 rtas_event_log_to_irq(RTAS_LOG_TYPE_EPOW)));
> > >  }
> > >  
> > >  static void spapr_hotplug_set_signalled(uint32_t drc_index)
> > > @@ -459,7 +527,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
> > >  
> > >      rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
> > >  
> > > -    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> > > +    qemu_irq_pulse(xics_get_qirq(spapr->xics,
> > > +                                 rtas_event_log_to_irq(RTAS_LOG_TYPE_HOTPLUG)));
> > >  }
> > >  
> > >  void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
> > > @@ -505,6 +574,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> > >      uint64_t xinfo;
> > >      sPAPREventLogEntry *event;
> > >      struct rtas_error_log *hdr;
> > > +    int i;
> > >  
> > >      if ((nargs < 6) || (nargs > 7) || nret != 1) {
> > >          rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> > > @@ -541,8 +611,11 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> > >       * do the latter here, since our code relies on edge-triggered
> > >       * interrupts.
> > >       */
> > > -    if (rtas_event_log_contains(mask, true)) {
> > > -        qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
> > > +    for (i = 0; i < EVENT_CLASS_MAX; i++) {
> > > +        if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) {
> > > +            g_assert(event_source[i].enabled);
> > > +            qemu_irq_pulse(xics_get_qirq(spapr->xics, event_source[i].irq));
> > > +        }
> > >      }
> > >  
> > >      return;
> > > @@ -594,8 +667,17 @@ out_no_events:
> > >  void spapr_events_init(sPAPRMachineState *spapr)
> > >  {
> > >      QTAILQ_INIT(&spapr->pending_events);
> > > -    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false,
> > > -                                            &error_fatal);
> > > +
> > > +    rtas_event_source_register(EVENT_CLASS_EPOW,
> > > +                               xics_spapr_alloc(spapr->xics, 0, 0, false,
> > > +                                                &error_fatal));
> > > +
> > > +    if (spapr->use_hotplug_event_source) {
> > > +        rtas_event_source_register(EVENT_CLASS_HOT_PLUG,
> > > +                                   xics_spapr_alloc(spapr->xics, 0, 0, false,
> > > +                                                    &error_fatal));
> > > +    }
> > > +
> > >      spapr->epow_notifier.notify = spapr_powerdown_req;
> > >      qemu_register_powerdown_notifier(&spapr->epow_notifier);
> > >      spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
> > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > > index d1a4a14..2295ac6 100644
> > > --- a/include/hw/ppc/spapr.h
> > > +++ b/include/hw/ppc/spapr.h
> > > @@ -71,7 +71,6 @@ struct sPAPRMachineState {
> > >      sPAPROptionVector *ov5_cas;
> > >      bool cas_reboot;
> > >  
> > > -    uint32_t check_exception_irq;
> > >      Notifier epow_notifier;
> > >      QTAILQ_HEAD(, sPAPREventLogEntry) pending_events;
> > >      bool use_hotplug_event_source;
> > > @@ -579,7 +578,7 @@ struct sPAPREventLogEntry {
> > >  };
> > >  
> > >  void spapr_events_init(sPAPRMachineState *sm);
> > > -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
> > > +void spapr_events_fdt_skel(void *fdt);
> > >  int spapr_h_cas_compose_response(sPAPRMachineState *sm,
> > >                                   target_ulong addr, target_ulong size,
> > >                                   bool cpu_update,
> > 
>
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d80a6fa..2037222 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -275,8 +275,7 @@  static void *spapr_create_fdt_skel(hwaddr initrd_base,
                                    hwaddr initrd_size,
                                    hwaddr kernel_size,
                                    bool little_endian,
-                                   const char *kernel_cmdline,
-                                   uint32_t epow_irq)
+                                   const char *kernel_cmdline)
 {
     void *fdt;
     uint32_t start_prop = cpu_to_be32(initrd_base);
@@ -437,7 +436,7 @@  static void *spapr_create_fdt_skel(hwaddr initrd_base,
     _FDT((fdt_end_node(fdt)));
 
     /* event-sources */
-    spapr_events_fdt_skel(fdt, epow_irq);
+    spapr_events_fdt_skel(fdt);
 
     /* /hypervisor node */
     if (kvm_enabled()) {
@@ -1944,7 +1943,7 @@  static void ppc_spapr_init(MachineState *machine)
     }
     g_free(filename);
 
-    /* Set up EPOW events infrastructure */
+    /* Set up RTAS event infrastructure */
     spapr_events_init(spapr);
 
     /* Set up the RTC RTAS interfaces */
@@ -2076,8 +2075,7 @@  static void ppc_spapr_init(MachineState *machine)
     /* Prepare the device tree */
     spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
                                             kernel_size, kernel_le,
-                                            kernel_cmdline,
-                                            spapr->check_exception_irq);
+                                            kernel_cmdline);
     assert(spapr->fdt_skel != NULL);
 
     /* used by RTAS */
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 4c7b6ae..f8bbec6 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@ 
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "hw/ppc/spapr_ovec.h"
 #include <libfdt.h>
 
 struct rtas_error_log {
@@ -206,28 +207,104 @@  struct hp_log_full {
     struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
-#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
-#define EVENT_MASK_EPOW                      0x40000000
-#define EVENT_MASK_HOTPLUG                   0x10000000
-#define EVENT_MASK_IO                        0x08000000
+typedef enum EventClassIndex {
+    EVENT_CLASS_INTERNAL_ERRORS     = 0,
+    EVENT_CLASS_EPOW                = 1,
+    EVENT_CLASS_RESERVED            = 2,
+    EVENT_CLASS_HOT_PLUG            = 3,
+    EVENT_CLASS_IO                  = 4,
+    EVENT_CLASS_MAX
+} EventClassIndex;
+
+#define EVENT_CLASS_MASK(index) (1 << (31 - index))
+
+typedef struct EventSource {
+    const char *name;
+    int irq;
+    uint32_t mask;
+    bool enabled;
+} EventSource;
+
+static EventSource event_source[EVENT_CLASS_MAX] = {
+    [EVENT_CLASS_INTERNAL_ERRORS]       = { .name = "internal-errors", },
+    [EVENT_CLASS_EPOW]                  = { .name = "epow-events", },
+    [EVENT_CLASS_HOT_PLUG]              = { .name = "hot-plug-events", },
+    [EVENT_CLASS_IO]                    = { .name = "ibm,io-events", },
+};
+
+static void rtas_event_source_register(EventClassIndex index, int irq)
+{
+    /* we only support 1 irq per event class at the moment */
+    g_assert(!event_source[index].enabled);
+    event_source[index].irq = irq;
+    event_source[index].mask = EVENT_CLASS_MASK(index);
+    event_source[index].enabled = true;
+}
 
-void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
+void spapr_events_fdt_skel(void *fdt)
 {
-    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
-    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
+    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
+    int i, count = 0;
 
     _FDT((fdt_begin_node(fdt, "event-sources")));
 
+    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
+        /* TODO: what does 0 entail? */
+        uint32_t interrupts[] = { cpu_to_be32(event_source[i].irq), 0 };
+
+        if (!event_source[i].enabled) {
+            continue;
+        }
+
+        _FDT((fdt_begin_node(fdt, event_source[i].name)));
+        _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
+        _FDT((fdt_end_node(fdt)));
+
+        irq_ranges[count++] = interrupts[0];
+        irq_ranges[count++] = cpu_to_be32(1);
+    }
+
+    /* TODO: confirm the count is the last expected element */
+    irq_ranges[count] = cpu_to_be32(count);
+    count++;
+
     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
     _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
     _FDT((fdt_property(fdt, "interrupt-ranges",
-                       irq_ranges, sizeof(irq_ranges))));
+                       irq_ranges, count * sizeof(uint32_t))));
 
-    _FDT((fdt_begin_node(fdt, "epow-events")));
-    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
     _FDT((fdt_end_node(fdt)));
+}
 
-    _FDT((fdt_end_node(fdt)));
+static const EventSource *rtas_event_log_to_source(int log_type)
+{
+    const EventSource *source;
+
+    switch (log_type) {
+        case RTAS_LOG_TYPE_HOTPLUG:
+            source = &event_source[EVENT_CLASS_HOT_PLUG];
+            if (event_source[EVENT_CLASS_HOT_PLUG].enabled) {
+                break;
+            }
+            /* fall back to epow for legacy hotplug interrupt source */
+        case RTAS_LOG_TYPE_EPOW:
+            source = &event_source[EVENT_CLASS_EPOW];
+            break;
+        default:
+            source = NULL;
+    }
+
+    return source;
+}
+
+static int rtas_event_log_to_irq(int log_type)
+{
+    const EventSource *source = rtas_event_log_to_source(log_type);
+
+    g_assert(source);
+    g_assert(source->enabled);
+
+    return source->irq;
 }
 
 static void rtas_event_log_queue(int log_type, void *data, bool exception)
@@ -248,19 +325,14 @@  static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
-    /* we only queue EPOW events atm. */
-    if ((event_mask & EVENT_MASK_EPOW) == 0) {
-        return NULL;
-    }
-
     QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        const EventSource *source = rtas_event_log_to_source(entry->log_type);
+
         if (entry->exception != exception) {
             continue;
         }
 
-        /* EPOW and hotplug events are surfaced in the same manner */
-        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
-            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
+        if (source->mask & event_mask) {
             break;
         }
     }
@@ -277,19 +349,14 @@  static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
-    /* we only queue EPOW events atm. */
-    if ((event_mask & EVENT_MASK_EPOW) == 0) {
-        return false;
-    }
-
     QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        const EventSource *source = rtas_event_log_to_source(entry->log_type);
+
         if (entry->exception != exception) {
             continue;
         }
 
-        /* EPOW and hotplug events are surfaced in the same manner */
-        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
-            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
+        if (source->mask & event_mask) {
             return true;
         }
     }
@@ -377,7 +444,8 @@  static void spapr_powerdown_req(Notifier *n, void *opaque)
 
     rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+                                 rtas_event_log_to_irq(RTAS_LOG_TYPE_EPOW)));
 }
 
 static void spapr_hotplug_set_signalled(uint32_t drc_index)
@@ -459,7 +527,8 @@  static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
 
     rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+                                 rtas_event_log_to_irq(RTAS_LOG_TYPE_HOTPLUG)));
 }
 
 void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
@@ -505,6 +574,7 @@  static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     uint64_t xinfo;
     sPAPREventLogEntry *event;
     struct rtas_error_log *hdr;
+    int i;
 
     if ((nargs < 6) || (nargs > 7) || nret != 1) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -541,8 +611,11 @@  static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
      * do the latter here, since our code relies on edge-triggered
      * interrupts.
      */
-    if (rtas_event_log_contains(mask, true)) {
-        qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    for (i = 0; i < EVENT_CLASS_MAX; i++) {
+        if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) {
+            g_assert(event_source[i].enabled);
+            qemu_irq_pulse(xics_get_qirq(spapr->xics, event_source[i].irq));
+        }
     }
 
     return;
@@ -594,8 +667,17 @@  out_no_events:
 void spapr_events_init(sPAPRMachineState *spapr)
 {
     QTAILQ_INIT(&spapr->pending_events);
-    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false,
-                                            &error_fatal);
+
+    rtas_event_source_register(EVENT_CLASS_EPOW,
+                               xics_spapr_alloc(spapr->xics, 0, 0, false,
+                                                &error_fatal));
+
+    if (spapr->use_hotplug_event_source) {
+        rtas_event_source_register(EVENT_CLASS_HOT_PLUG,
+                                   xics_spapr_alloc(spapr->xics, 0, 0, false,
+                                                    &error_fatal));
+    }
+
     spapr->epow_notifier.notify = spapr_powerdown_req;
     qemu_register_powerdown_notifier(&spapr->epow_notifier);
     spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index d1a4a14..2295ac6 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -71,7 +71,6 @@  struct sPAPRMachineState {
     sPAPROptionVector *ov5_cas;
     bool cas_reboot;
 
-    uint32_t check_exception_irq;
     Notifier epow_notifier;
     QTAILQ_HEAD(, sPAPREventLogEntry) pending_events;
     bool use_hotplug_event_source;
@@ -579,7 +578,7 @@  struct sPAPREventLogEntry {
 };
 
 void spapr_events_init(sPAPRMachineState *sm);
-void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
+void spapr_events_fdt_skel(void *fdt);
 int spapr_h_cas_compose_response(sPAPRMachineState *sm,
                                  target_ulong addr, target_ulong size,
                                  bool cpu_update,