diff mbox series

[v8,1/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

Message ID 155591657807.20338.12115795588476734752.stgit@aravinda
State New
Headers show
Series target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests | expand

Commit Message

Aravinda Prasad April 22, 2019, 7:02 a.m. UTC
This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed. This patch implements the
releasing part of the error-log while subsequent patch
(which builds error log) handles the locking part.

Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         |   18 ++++++++++++++
 hw/ppc/spapr_rtas.c    |   61 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |    9 ++++++-
 3 files changed, 87 insertions(+), 1 deletion(-)

Comments

David Gibson April 23, 2019, 6:45 a.m. UTC | #1
On Mon, Apr 22, 2019 at 12:32:58PM +0530, Aravinda Prasad wrote:
> This patch adds support in QEMU to handle "ibm,nmi-register"
> and "ibm,nmi-interlock" RTAS calls.
> 
> The machine check notification address is saved when the
> OS issues "ibm,nmi-register" RTAS call.
> 
> This patch also handles the case when multiple processors
> experience machine check at or about the same time by
> handling "ibm,nmi-interlock" call. In such cases, as per
> PAPR, subsequent processors serialize waiting for the first
> processor to issue the "ibm,nmi-interlock" call. The second
> processor that also received a machine check error waits
> till the first processor is done reading the error log.
> The first processor issues "ibm,nmi-interlock" call
> when the error log is consumed. This patch implements the
> releasing part of the error-log while subsequent patch
> (which builds error log) handles the locking part.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Although I wonder if it needs to be moved later in the series to avoid
advertising the availability of the RTAS calls to the guest before all
the prereq patches are in place to make them work properly.

> ---
>  hw/ppc/spapr.c         |   18 ++++++++++++++
>  hw/ppc/spapr_rtas.c    |   61 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    9 ++++++-
>  3 files changed, 87 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index c56939a..6642cb5 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1805,6 +1805,11 @@ static void spapr_machine_reset(void)
>      first_ppc_cpu->env.gpr[5] = 0;
>  
>      spapr->cas_reboot = false;
> +
> +    spapr->guest_machine_check_addr = -1;
> +
> +    /* Signal all vCPUs waiting on this condition */
> +    qemu_cond_broadcast(&spapr->mc_delivery_cond);
>  }
>  
>  static void spapr_create_nvram(SpaprMachineState *spapr)
> @@ -2095,6 +2100,16 @@ static const VMStateDescription vmstate_spapr_dtb = {
>      },
>  };
>  
> +static const VMStateDescription vmstate_spapr_machine_check = {
> +    .name = "spapr_machine_check",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
>  static const VMStateDescription vmstate_spapr = {
>      .name = "spapr",
>      .version_id = 3,
> @@ -2127,6 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_dtb,
>          &vmstate_spapr_cap_large_decr,
>          &vmstate_spapr_cap_ccf_assist,
> +        &vmstate_spapr_machine_check,
>          NULL
>      }
>  };
> @@ -3068,6 +3084,8 @@ static void spapr_machine_init(MachineState *machine)
>  
>          kvmppc_spapr_enable_inkernel_multitce();
>      }
> +
> +    qemu_cond_init(&spapr->mc_delivery_cond);
>  }
>  
>  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index ee24212..c2f3991 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -348,6 +348,39 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      rtas_st(rets, 1, 100);
>  }
>  
> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> +                                  SpaprMachineState *spapr,
> +                                  uint32_t token, uint32_t nargs,
> +                                  target_ulong args,
> +                                  uint32_t nret, target_ulong rets)
> +{
> +    uint64_t rtas_addr = spapr_get_rtas_addr();
> +
> +    if (!rtas_addr) {
> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +        return;
> +    }
> +
> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +}
> +
> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> +                                   SpaprMachineState *spapr,
> +                                   uint32_t token, uint32_t nargs,
> +                                   target_ulong args,
> +                                   uint32_t nret, target_ulong rets)
> +{
> +    if (!spapr->guest_machine_check_addr) {
> +        /* NMI register not called */
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +    } else {
> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +    }
> +}
> +
> +
>  static struct rtas_call {
>      const char *name;
>      spapr_rtas_fn fn;
> @@ -466,6 +499,30 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr)
>      }
>  }
>  
> +uint64_t spapr_get_rtas_addr(void)
> +{
> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +    int rtas_node;
> +    const struct fdt_property *rtas_addr_prop;
> +    void *fdt = spapr->fdt_blob;
> +    uint32_t rtas_addr;
> +
> +    /* fetch rtas addr from fdt */
> +    rtas_node = fdt_path_offset(fdt, "/rtas");
> +    if (rtas_node == 0) {
> +        return 0;
> +    }
> +
> +    rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", NULL);
> +    if (!rtas_addr_prop) {
> +        return 0;
> +    }
> +
> +    rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);
> +    return (uint64_t)rtas_addr;
> +}
> +
> +
>  static void core_rtas_register_types(void)
>  {
>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
> @@ -489,6 +546,10 @@ static void core_rtas_register_types(void)
>                          rtas_set_power_level);
>      spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
>                          rtas_get_power_level);
> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
> +                        rtas_ibm_nmi_register);
> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
> +                        rtas_ibm_nmi_interlock);
>  }
>  
>  type_init(core_rtas_register_types)
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 7e32f30..ec6f33e 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -187,6 +187,10 @@ struct SpaprMachineState {
>       * occurs during the unplug process. */
>      QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
>  
> +    /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
> +    target_ulong guest_machine_check_addr;
> +    QemuCond mc_delivery_cond;
> +
>      /*< public >*/
>      char *kvm_type;
>      char *host_model;
> @@ -623,8 +627,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>  
> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>  
>  /* RTAS ibm,get-system-parameter token values */
>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
> @@ -874,4 +880,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>  #define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
>  
>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
> +uint64_t spapr_get_rtas_addr(void);
>  #endif /* HW_SPAPR_H */
>
Aravinda Prasad April 25, 2019, 4:56 a.m. UTC | #2
On Tuesday 23 April 2019 12:15 PM, David Gibson wrote:
> On Mon, Apr 22, 2019 at 12:32:58PM +0530, Aravinda Prasad wrote:
>> This patch adds support in QEMU to handle "ibm,nmi-register"
>> and "ibm,nmi-interlock" RTAS calls.
>>
>> The machine check notification address is saved when the
>> OS issues "ibm,nmi-register" RTAS call.
>>
>> This patch also handles the case when multiple processors
>> experience machine check at or about the same time by
>> handling "ibm,nmi-interlock" call. In such cases, as per
>> PAPR, subsequent processors serialize waiting for the first
>> processor to issue the "ibm,nmi-interlock" call. The second
>> processor that also received a machine check error waits
>> till the first processor is done reading the error log.
>> The first processor issues "ibm,nmi-interlock" call
>> when the error log is consumed. This patch implements the
>> releasing part of the error-log while subsequent patch
>> (which builds error log) handles the locking part.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Although I wonder if it needs to be moved later in the series to avoid
> advertising the availability of the RTAS calls to the guest before all
> the prereq patches are in place to make them work properly.

Patch 3 and 4 uses "guest_machine_check_addr", which is set in this
patch. If we push this beyond patch 4, then I feel the use of
"guest_machine_check_addr" looks odd in patch 3 and 4.

Regards,
Aravinda

> 
>> ---
>>  hw/ppc/spapr.c         |   18 ++++++++++++++
>>  hw/ppc/spapr_rtas.c    |   61 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    9 ++++++-
>>  3 files changed, 87 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index c56939a..6642cb5 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1805,6 +1805,11 @@ static void spapr_machine_reset(void)
>>      first_ppc_cpu->env.gpr[5] = 0;
>>  
>>      spapr->cas_reboot = false;
>> +
>> +    spapr->guest_machine_check_addr = -1;
>> +
>> +    /* Signal all vCPUs waiting on this condition */
>> +    qemu_cond_broadcast(&spapr->mc_delivery_cond);
>>  }
>>  
>>  static void spapr_create_nvram(SpaprMachineState *spapr)
>> @@ -2095,6 +2100,16 @@ static const VMStateDescription vmstate_spapr_dtb = {
>>      },
>>  };
>>  
>> +static const VMStateDescription vmstate_spapr_machine_check = {
>> +    .name = "spapr_machine_check",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>>  static const VMStateDescription vmstate_spapr = {
>>      .name = "spapr",
>>      .version_id = 3,
>> @@ -2127,6 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
>>          &vmstate_spapr_dtb,
>>          &vmstate_spapr_cap_large_decr,
>>          &vmstate_spapr_cap_ccf_assist,
>> +        &vmstate_spapr_machine_check,
>>          NULL
>>      }
>>  };
>> @@ -3068,6 +3084,8 @@ static void spapr_machine_init(MachineState *machine)
>>  
>>          kvmppc_spapr_enable_inkernel_multitce();
>>      }
>> +
>> +    qemu_cond_init(&spapr->mc_delivery_cond);
>>  }
>>  
>>  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index ee24212..c2f3991 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -348,6 +348,39 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>      rtas_st(rets, 1, 100);
>>  }
>>  
>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>> +                                  SpaprMachineState *spapr,
>> +                                  uint32_t token, uint32_t nargs,
>> +                                  target_ulong args,
>> +                                  uint32_t nret, target_ulong rets)
>> +{
>> +    uint64_t rtas_addr = spapr_get_rtas_addr();
>> +
>> +    if (!rtas_addr) {
>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> +        return;
>> +    }
>> +
>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +}
>> +
>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>> +                                   SpaprMachineState *spapr,
>> +                                   uint32_t token, uint32_t nargs,
>> +                                   target_ulong args,
>> +                                   uint32_t nret, target_ulong rets)
>> +{
>> +    if (!spapr->guest_machine_check_addr) {
>> +        /* NMI register not called */
>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>> +    } else {
>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +    }
>> +}
>> +
>> +
>>  static struct rtas_call {
>>      const char *name;
>>      spapr_rtas_fn fn;
>> @@ -466,6 +499,30 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr)
>>      }
>>  }
>>  
>> +uint64_t spapr_get_rtas_addr(void)
>> +{
>> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> +    int rtas_node;
>> +    const struct fdt_property *rtas_addr_prop;
>> +    void *fdt = spapr->fdt_blob;
>> +    uint32_t rtas_addr;
>> +
>> +    /* fetch rtas addr from fdt */
>> +    rtas_node = fdt_path_offset(fdt, "/rtas");
>> +    if (rtas_node == 0) {
>> +        return 0;
>> +    }
>> +
>> +    rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", NULL);
>> +    if (!rtas_addr_prop) {
>> +        return 0;
>> +    }
>> +
>> +    rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);
>> +    return (uint64_t)rtas_addr;
>> +}
>> +
>> +
>>  static void core_rtas_register_types(void)
>>  {
>>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
>> @@ -489,6 +546,10 @@ static void core_rtas_register_types(void)
>>                          rtas_set_power_level);
>>      spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
>>                          rtas_get_power_level);
>> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
>> +                        rtas_ibm_nmi_register);
>> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
>> +                        rtas_ibm_nmi_interlock);
>>  }
>>  
>>  type_init(core_rtas_register_types)
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 7e32f30..ec6f33e 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -187,6 +187,10 @@ struct SpaprMachineState {
>>       * occurs during the unplug process. */
>>      QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
>>  
>> +    /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
>> +    target_ulong guest_machine_check_addr;
>> +    QemuCond mc_delivery_cond;
>> +
>>      /*< public >*/
>>      char *kvm_type;
>>      char *host_model;
>> @@ -623,8 +627,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
>> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>>  
>> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>>  
>>  /* RTAS ibm,get-system-parameter token values */
>>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
>> @@ -874,4 +880,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>>  #define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
>>  
>>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>> +uint64_t spapr_get_rtas_addr(void);
>>  #endif /* HW_SPAPR_H */
>>
>
Greg Kurz May 10, 2019, 9:06 a.m. UTC | #3
On Mon, 22 Apr 2019 12:32:58 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> This patch adds support in QEMU to handle "ibm,nmi-register"
> and "ibm,nmi-interlock" RTAS calls.
> 
> The machine check notification address is saved when the
> OS issues "ibm,nmi-register" RTAS call.
> 
> This patch also handles the case when multiple processors
> experience machine check at or about the same time by
> handling "ibm,nmi-interlock" call. In such cases, as per
> PAPR, subsequent processors serialize waiting for the first
> processor to issue the "ibm,nmi-interlock" call. The second
> processor that also received a machine check error waits
> till the first processor is done reading the error log.
> The first processor issues "ibm,nmi-interlock" call
> when the error log is consumed. This patch implements the
> releasing part of the error-log while subsequent patch
> (which builds error log) handles the locking part.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         |   18 ++++++++++++++
>  hw/ppc/spapr_rtas.c    |   61 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    9 ++++++-
>  3 files changed, 87 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index c56939a..6642cb5 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1805,6 +1805,11 @@ static void spapr_machine_reset(void)
>      first_ppc_cpu->env.gpr[5] = 0;
>  
>      spapr->cas_reboot = false;
> +
> +    spapr->guest_machine_check_addr = -1;
> +
> +    /* Signal all vCPUs waiting on this condition */
> +    qemu_cond_broadcast(&spapr->mc_delivery_cond);
>  }
>  
>  static void spapr_create_nvram(SpaprMachineState *spapr)
> @@ -2095,6 +2100,16 @@ static const VMStateDescription vmstate_spapr_dtb = {
>      },
>  };
>  
> +static const VMStateDescription vmstate_spapr_machine_check = {
> +    .name = "spapr_machine_check",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> +        VMSTATE_END_OF_LIST()
> +    },

This VMState descriptor is missing a .needed field because we only want
to migrate the subsection if the guest has called NMI register, ie.
spapr->guest_machine_check_addr != (target_ulong) -1.

> +};
> +
>  static const VMStateDescription vmstate_spapr = {
>      .name = "spapr",
>      .version_id = 3,
> @@ -2127,6 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_dtb,
>          &vmstate_spapr_cap_large_decr,
>          &vmstate_spapr_cap_ccf_assist,
> +        &vmstate_spapr_machine_check,
>          NULL
>      }
>  };
> @@ -3068,6 +3084,8 @@ static void spapr_machine_init(MachineState *machine)
>  
>          kvmppc_spapr_enable_inkernel_multitce();
>      }
> +
> +    qemu_cond_init(&spapr->mc_delivery_cond);
>  }
>  
>  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index ee24212..c2f3991 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -348,6 +348,39 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      rtas_st(rets, 1, 100);
>  }
>  
> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> +                                  SpaprMachineState *spapr,
> +                                  uint32_t token, uint32_t nargs,
> +                                  target_ulong args,
> +                                  uint32_t nret, target_ulong rets)
> +{
> +    uint64_t rtas_addr = spapr_get_rtas_addr();
> +
> +    if (!rtas_addr) {
> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +        return;
> +    }
> +
> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +}
> +
> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> +                                   SpaprMachineState *spapr,
> +                                   uint32_t token, uint32_t nargs,
> +                                   target_ulong args,
> +                                   uint32_t nret, target_ulong rets)
> +{
> +    if (!spapr->guest_machine_check_addr) {

Hmm... the default value is -1. It looks like the check should rather be:

    if (spapr->guest_machine_check_addr == (target_ulong) -1) {


> +        /* NMI register not called */
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +    } else {
> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +    }
> +}
> +
> +
>  static struct rtas_call {
>      const char *name;
>      spapr_rtas_fn fn;
> @@ -466,6 +499,30 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr)
>      }
>  }
>  
> +uint64_t spapr_get_rtas_addr(void)

Shouldn't this be hwaddr instead of uint64_t ?

> +{
> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +    int rtas_node;
> +    const struct fdt_property *rtas_addr_prop;
> +    void *fdt = spapr->fdt_blob;
> +    uint32_t rtas_addr;
> +
> +    /* fetch rtas addr from fdt */
> +    rtas_node = fdt_path_offset(fdt, "/rtas");
> +    if (rtas_node == 0) {
> +        return 0;
> +    }
> +
> +    rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", NULL);
> +    if (!rtas_addr_prop) {

Just for curiosity: this is ok for linux, but what about other OSes (eg. AIX) ?

> +        return 0;
> +    }
> +
> +    rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);

Also this assumes the OS called RTAS instantiate-rtas, but some other
OS might have called RTAS instantiate-rtas-64 instead. I guess it is
ok for now because SLOF only provides the 32-bit variant, but a
comment would certainly help IMHO.

> +    return (uint64_t)rtas_addr;
> +}
> +
> +
>  static void core_rtas_register_types(void)
>  {
>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
> @@ -489,6 +546,10 @@ static void core_rtas_register_types(void)
>                          rtas_set_power_level);
>      spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
>                          rtas_get_power_level);
> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
> +                        rtas_ibm_nmi_register);
> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
> +                        rtas_ibm_nmi_interlock);
>  }
>  
>  type_init(core_rtas_register_types)
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 7e32f30..ec6f33e 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -187,6 +187,10 @@ struct SpaprMachineState {
>       * occurs during the unplug process. */
>      QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
>  
> +    /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
> +    target_ulong guest_machine_check_addr;
> +    QemuCond mc_delivery_cond;
> +
>      /*< public >*/
>      char *kvm_type;
>      char *host_model;
> @@ -623,8 +627,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>  
> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>  
>  /* RTAS ibm,get-system-parameter token values */
>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
> @@ -874,4 +880,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>  #define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
>  
>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
> +uint64_t spapr_get_rtas_addr(void);
>  #endif /* HW_SPAPR_H */
> 
>
David Gibson May 10, 2019, 9:54 a.m. UTC | #4
On Fri, May 10, 2019 at 11:06:04AM +0200, Greg Kurz wrote:
> On Mon, 22 Apr 2019 12:32:58 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
> > This patch adds support in QEMU to handle "ibm,nmi-register"
> > and "ibm,nmi-interlock" RTAS calls.
> > 
> > The machine check notification address is saved when the
> > OS issues "ibm,nmi-register" RTAS call.
> > 
> > This patch also handles the case when multiple processors
> > experience machine check at or about the same time by
> > handling "ibm,nmi-interlock" call. In such cases, as per
> > PAPR, subsequent processors serialize waiting for the first
> > processor to issue the "ibm,nmi-interlock" call. The second
> > processor that also received a machine check error waits
> > till the first processor is done reading the error log.
> > The first processor issues "ibm,nmi-interlock" call
> > when the error log is consumed. This patch implements the
> > releasing part of the error-log while subsequent patch
> > (which builds error log) handles the locking part.
> > 
> > Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> > ---
> >  hw/ppc/spapr.c         |   18 ++++++++++++++
> >  hw/ppc/spapr_rtas.c    |   61 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  include/hw/ppc/spapr.h |    9 ++++++-
> >  3 files changed, 87 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index c56939a..6642cb5 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1805,6 +1805,11 @@ static void spapr_machine_reset(void)
> >      first_ppc_cpu->env.gpr[5] = 0;
> >  
> >      spapr->cas_reboot = false;
> > +
> > +    spapr->guest_machine_check_addr = -1;
> > +
> > +    /* Signal all vCPUs waiting on this condition */
> > +    qemu_cond_broadcast(&spapr->mc_delivery_cond);
> >  }
> >  
> >  static void spapr_create_nvram(SpaprMachineState *spapr)
> > @@ -2095,6 +2100,16 @@ static const VMStateDescription vmstate_spapr_dtb = {
> >      },
> >  };
> >  
> > +static const VMStateDescription vmstate_spapr_machine_check = {
> > +    .name = "spapr_machine_check",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> > +        VMSTATE_END_OF_LIST()
> > +    },
> 
> This VMState descriptor is missing a .needed field because we only want
> to migrate the subsection if the guest has called NMI register, ie.
> spapr->guest_machine_check_addr != (target_ulong) -1.
> 
> > +};
> > +
> >  static const VMStateDescription vmstate_spapr = {
> >      .name = "spapr",
> >      .version_id = 3,
> > @@ -2127,6 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
> >          &vmstate_spapr_dtb,
> >          &vmstate_spapr_cap_large_decr,
> >          &vmstate_spapr_cap_ccf_assist,
> > +        &vmstate_spapr_machine_check,
> >          NULL
> >      }
> >  };
> > @@ -3068,6 +3084,8 @@ static void spapr_machine_init(MachineState *machine)
> >  
> >          kvmppc_spapr_enable_inkernel_multitce();
> >      }
> > +
> > +    qemu_cond_init(&spapr->mc_delivery_cond);
> >  }
> >  
> >  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
> > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> > index ee24212..c2f3991 100644
> > --- a/hw/ppc/spapr_rtas.c
> > +++ b/hw/ppc/spapr_rtas.c
> > @@ -348,6 +348,39 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >      rtas_st(rets, 1, 100);
> >  }
> >  
> > +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> > +                                  SpaprMachineState *spapr,
> > +                                  uint32_t token, uint32_t nargs,
> > +                                  target_ulong args,
> > +                                  uint32_t nret, target_ulong rets)
> > +{
> > +    uint64_t rtas_addr = spapr_get_rtas_addr();
> > +
> > +    if (!rtas_addr) {
> > +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> > +        return;
> > +    }
> > +
> > +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> > +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> > +}
> > +
> > +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> > +                                   SpaprMachineState *spapr,
> > +                                   uint32_t token, uint32_t nargs,
> > +                                   target_ulong args,
> > +                                   uint32_t nret, target_ulong rets)
> > +{
> > +    if (!spapr->guest_machine_check_addr) {
> 
> Hmm... the default value is -1. It looks like the check should rather be:
> 
>     if (spapr->guest_machine_check_addr == (target_ulong) -1) {
> 
> 
> > +        /* NMI register not called */
> > +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> > +    } else {
> > +        qemu_cond_signal(&spapr->mc_delivery_cond);
> > +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> > +    }
> > +}
> > +
> > +
> >  static struct rtas_call {
> >      const char *name;
> >      spapr_rtas_fn fn;
> > @@ -466,6 +499,30 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr)
> >      }
> >  }
> >  
> > +uint64_t spapr_get_rtas_addr(void)
> 
> Shouldn't this be hwaddr instead of uint64_t ?
> 
> > +{
> > +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > +    int rtas_node;
> > +    const struct fdt_property *rtas_addr_prop;
> > +    void *fdt = spapr->fdt_blob;
> > +    uint32_t rtas_addr;
> > +
> > +    /* fetch rtas addr from fdt */
> > +    rtas_node = fdt_path_offset(fdt, "/rtas");
> > +    if (rtas_node == 0) {
> > +        return 0;
> > +    }
> > +
> > +    rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", NULL);
> > +    if (!rtas_addr_prop) {
> 
> Just for curiosity: this is ok for linux, but what about other OSes (eg. AIX) ?
> 
> > +        return 0;
> > +    }
> > +
> > +    rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);
> 
> Also this assumes the OS called RTAS instantiate-rtas, but some other
> OS might have called RTAS instantiate-rtas-64 instead. I guess it is
> ok for now because SLOF only provides the 32-bit variant, but a
> comment would certainly help IMHO.

I have a feeling kvm-unit-tests may not call instantiate-rtas at all.
Greg Kurz May 10, 2019, 2:33 p.m. UTC | #5
On Fri, 10 May 2019 11:06:04 +0200
Greg Kurz <groug@kaod.org> wrote:

> On Mon, 22 Apr 2019 12:32:58 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
> > This patch adds support in QEMU to handle "ibm,nmi-register"
> > and "ibm,nmi-interlock" RTAS calls.
> > 
> > The machine check notification address is saved when the
> > OS issues "ibm,nmi-register" RTAS call.
> > 
> > This patch also handles the case when multiple processors
> > experience machine check at or about the same time by
> > handling "ibm,nmi-interlock" call. In such cases, as per
> > PAPR, subsequent processors serialize waiting for the first
> > processor to issue the "ibm,nmi-interlock" call. The second
> > processor that also received a machine check error waits
> > till the first processor is done reading the error log.
> > The first processor issues "ibm,nmi-interlock" call
> > when the error log is consumed. This patch implements the
> > releasing part of the error-log while subsequent patch
> > (which builds error log) handles the locking part.
> > 
> > Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> > ---
> >  hw/ppc/spapr.c         |   18 ++++++++++++++
> >  hw/ppc/spapr_rtas.c    |   61 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  include/hw/ppc/spapr.h |    9 ++++++-
> >  3 files changed, 87 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index c56939a..6642cb5 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1805,6 +1805,11 @@ static void spapr_machine_reset(void)
> >      first_ppc_cpu->env.gpr[5] = 0;
> >  
> >      spapr->cas_reboot = false;
> > +
> > +    spapr->guest_machine_check_addr = -1;
> > +
> > +    /* Signal all vCPUs waiting on this condition */
> > +    qemu_cond_broadcast(&spapr->mc_delivery_cond);
> >  }
> >  
> >  static void spapr_create_nvram(SpaprMachineState *spapr)
> > @@ -2095,6 +2100,16 @@ static const VMStateDescription vmstate_spapr_dtb = {
> >      },
> >  };
> >  
> > +static const VMStateDescription vmstate_spapr_machine_check = {
> > +    .name = "spapr_machine_check",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),

Also this should use VMSTATE_UINTTL()

> > +        VMSTATE_END_OF_LIST()
> > +    },  
> 
> This VMState descriptor is missing a .needed field because we only want
> to migrate the subsection if the guest has called NMI register, ie.
> spapr->guest_machine_check_addr != (target_ulong) -1.
> 
> > +};
> > +
> >  static const VMStateDescription vmstate_spapr = {765cf442a8afe8e5c8c6896b5072066df5129077
> >      .name = "spapr",
> >      .version_id = 3,
> > @@ -2127,6 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
> >          &vmstate_spapr_dtb,
> >          &vmstate_spapr_cap_large_decr,
> >          &vmstate_spapr_cap_ccf_assist,
> > +        &vmstate_spapr_machine_check,
> >          NULL
> >      }
> >  };
> > @@ -3068,6 +3084,8 @@ static void spapr_machine_init(MachineState *machine)
> >  
> >          kvmppc_spapr_enable_inkernel_multitce();
> >      }
> > +
> > +    qemu_cond_init(&spapr->mc_delivery_cond);
> >  }
> >  
> >  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
> > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> > index ee24212..c2f3991 100644
> > --- a/hw/ppc/spapr_rtas.c
> > +++ b/hw/ppc/spapr_rtas.c
> > @@ -348,6 +348,39 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >      rtas_st(rets, 1, 100);
> >  }
> >  
> > +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> > +                                  SpaprMachineState *spapr,
> > +                                  uint32_t token, uint32_t nargs,
> > +                                  target_ulong args,
> > +                                  uint32_t nret, target_ulong rets)
> > +{
> > +    uint64_t rtas_addr = spapr_get_rtas_addr();
> > +
> > +    if (!rtas_addr) {
> > +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> > +        return;
> > +    }
> > +
> > +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> > +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> > +}
> > +
> > +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> > +                                   SpaprMachineState *spapr,
> > +                                   uint32_t token, uint32_t nargs,
> > +                                   target_ulong args,
> > +                                   uint32_t nret, target_ulong rets)
> > +{
> > +    if (!spapr->guest_machine_check_addr) {  
> 
> Hmm... the default value is -1. It looks like the check should rather be:
> 
>     if (spapr->guest_machine_check_addr == (target_ulong) -1) {
> 
> 
> > +        /* NMI register not called */
> > +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> > +    } else {
> > +        qemu_cond_signal(&spapr->mc_delivery_cond);
> > +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> > +    }
> > +}
> > +
> > +
> >  static struct rtas_call {
> >      const char *name;
> >      spapr_rtas_fn fn;
> > @@ -466,6 +499,30 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr)
> >      }
> >  }
> >  
> > +uint64_t spapr_get_rtas_addr(void)  
> 
> Shouldn't this be hwaddr instead of uint64_t ?
> 
> > +{
> > +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > +    int rtas_node;
> > +    const struct fdt_property *rtas_addr_prop;
> > +    void *fdt = spapr->fdt_blob;
> > +    uint32_t rtas_addr;
> > +
> > +    /* fetch rtas addr from fdt */
> > +    rtas_node = fdt_path_offset(fdt, "/rtas");
> > +    if (rtas_node == 0) {
> > +        return 0;
> > +    }
> > +
> > +    rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", NULL);
> > +    if (!rtas_addr_prop) {  
> 
> Just for curiosity: this is ok for linux, but what about other OSes (eg. AIX) ?
> 
> > +        return 0;
> > +    }
> > +
> > +    rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);  
> 
> Also this assumes the OS called RTAS instantiate-rtas, but some other
> OS might have called RTAS instantiate-rtas-64 instead. I guess it is
> ok for now because SLOF only provides the 32-bit variant, but a
> comment would certainly help IMHO.
> 
> > +    return (uint64_t)rtas_addr;
> > +}
> > +
> > +
> >  static void core_rtas_register_types(void)
> >  {
> >      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
> > @@ -489,6 +546,10 @@ static void core_rtas_register_types(void)
> >                          rtas_set_power_level);
> >      spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
> >                          rtas_get_power_level);
> > +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
> > +                        rtas_ibm_nmi_register);
> > +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
> > +                        rtas_ibm_nmi_interlock);
> >  }
> >  
> >  type_init(core_rtas_register_types)
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 7e32f30..ec6f33e 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -187,6 +187,10 @@ struct SpaprMachineState {
> >       * occurs during the unplug process. */
> >      QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
> >  
> > +    /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
> > +    target_ulong guest_machine_check_addr;
> > +    QemuCond mc_delivery_cond;
> > +
> >      /*< public >*/
> >      char *kvm_type;
> >      char *host_model;
> > @@ -623,8 +627,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
> >  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
> >  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
> >  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
> > +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
> > +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
> >  
> > -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
> > +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
> >  
> >  /* RTAS ibm,get-system-parameter token values */
> >  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
> > @@ -874,4 +880,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
> >  #define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
> >  
> >  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
> > +uint64_t spapr_get_rtas_addr(void);
> >  #endif /* HW_SPAPR_H */
> > 
> >   
> 
>
Aravinda Prasad May 13, 2019, 4:53 a.m. UTC | #6
On Friday 10 May 2019 02:36 PM, Greg Kurz wrote:
> On Mon, 22 Apr 2019 12:32:58 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>> This patch adds support in QEMU to handle "ibm,nmi-register"
>> and "ibm,nmi-interlock" RTAS calls.
>>
>> The machine check notification address is saved when the
>> OS issues "ibm,nmi-register" RTAS call.
>>
>> This patch also handles the case when multiple processors
>> experience machine check at or about the same time by
>> handling "ibm,nmi-interlock" call. In such cases, as per
>> PAPR, subsequent processors serialize waiting for the first
>> processor to issue the "ibm,nmi-interlock" call. The second
>> processor that also received a machine check error waits
>> till the first processor is done reading the error log.
>> The first processor issues "ibm,nmi-interlock" call
>> when the error log is consumed. This patch implements the
>> releasing part of the error-log while subsequent patch
>> (which builds error log) handles the locking part.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr.c         |   18 ++++++++++++++
>>  hw/ppc/spapr_rtas.c    |   61 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    9 ++++++-
>>  3 files changed, 87 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index c56939a..6642cb5 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1805,6 +1805,11 @@ static void spapr_machine_reset(void)
>>      first_ppc_cpu->env.gpr[5] = 0;
>>  
>>      spapr->cas_reboot = false;
>> +
>> +    spapr->guest_machine_check_addr = -1;
>> +
>> +    /* Signal all vCPUs waiting on this condition */
>> +    qemu_cond_broadcast(&spapr->mc_delivery_cond);
>>  }
>>  
>>  static void spapr_create_nvram(SpaprMachineState *spapr)
>> @@ -2095,6 +2100,16 @@ static const VMStateDescription vmstate_spapr_dtb = {
>>      },
>>  };
>>  
>> +static const VMStateDescription vmstate_spapr_machine_check = {
>> +    .name = "spapr_machine_check",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
>> +        VMSTATE_END_OF_LIST()
>> +    },
> 
> This VMState descriptor is missing a .needed field because we only want
> to migrate the subsection if the guest has called NMI register, ie.
> spapr->guest_machine_check_addr != (target_ulong) -1.

Ok.. let me check.

> 
>> +};
>> +
>>  static const VMStateDescription vmstate_spapr = {
>>      .name = "spapr",
>>      .version_id = 3,
>> @@ -2127,6 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
>>          &vmstate_spapr_dtb,
>>          &vmstate_spapr_cap_large_decr,
>>          &vmstate_spapr_cap_ccf_assist,
>> +        &vmstate_spapr_machine_check,
>>          NULL
>>      }
>>  };
>> @@ -3068,6 +3084,8 @@ static void spapr_machine_init(MachineState *machine)
>>  
>>          kvmppc_spapr_enable_inkernel_multitce();
>>      }
>> +
>> +    qemu_cond_init(&spapr->mc_delivery_cond);
>>  }
>>  
>>  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index ee24212..c2f3991 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -348,6 +348,39 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>      rtas_st(rets, 1, 100);
>>  }
>>  
>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>> +                                  SpaprMachineState *spapr,
>> +                                  uint32_t token, uint32_t nargs,
>> +                                  target_ulong args,
>> +                                  uint32_t nret, target_ulong rets)
>> +{
>> +    uint64_t rtas_addr = spapr_get_rtas_addr();
>> +
>> +    if (!rtas_addr) {
>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> +        return;
>> +    }
>> +
>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +}
>> +
>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>> +                                   SpaprMachineState *spapr,
>> +                                   uint32_t token, uint32_t nargs,
>> +                                   target_ulong args,
>> +                                   uint32_t nret, target_ulong rets)
>> +{
>> +    if (!spapr->guest_machine_check_addr) {
> 
> Hmm... the default value is -1. It looks like the check should rather be:
> 
>     if (spapr->guest_machine_check_addr == (target_ulong) -1) {

ok..

> 
> 
>> +        /* NMI register not called */
>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>> +    } else {
>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +    }
>> +}
>> +
>> +
>>  static struct rtas_call {
>>      const char *name;
>>      spapr_rtas_fn fn;
>> @@ -466,6 +499,30 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr)
>>      }
>>  }
>>  
>> +uint64_t spapr_get_rtas_addr(void)
> 
> Shouldn't this be hwaddr instead of uint64_t ?

Yes, I will change it.

> 
>> +{
>> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> +    int rtas_node;
>> +    const struct fdt_property *rtas_addr_prop;
>> +    void *fdt = spapr->fdt_blob;
>> +    uint32_t rtas_addr;
>> +
>> +    /* fetch rtas addr from fdt */
>> +    rtas_node = fdt_path_offset(fdt, "/rtas");
>> +    if (rtas_node == 0) {
>> +        return 0;
>> +    }
>> +
>> +    rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", NULL);
>> +    if (!rtas_addr_prop) {
> 
> Just for curiosity: this is ok for linux, but what about other OSes (eg. AIX) ?

Really not sure! Need to check.

> 
>> +        return 0;
>> +    }
>> +
>> +    rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);
> 
> Also this assumes the OS called RTAS instantiate-rtas, but some other
> OS might have called RTAS instantiate-rtas-64 instead. I guess it is
> ok for now because SLOF only provides the 32-bit variant, but a
> comment would certainly help IMHO.

Sure..

Regards,
Aravinda

> 
>> +    return (uint64_t)rtas_addr;
>> +}
>> +
>> +
>>  static void core_rtas_register_types(void)
>>  {
>>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
>> @@ -489,6 +546,10 @@ static void core_rtas_register_types(void)
>>                          rtas_set_power_level);
>>      spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
>>                          rtas_get_power_level);
>> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
>> +                        rtas_ibm_nmi_register);
>> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
>> +                        rtas_ibm_nmi_interlock);
>>  }
>>  
>>  type_init(core_rtas_register_types)
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 7e32f30..ec6f33e 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -187,6 +187,10 @@ struct SpaprMachineState {
>>       * occurs during the unplug process. */
>>      QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
>>  
>> +    /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
>> +    target_ulong guest_machine_check_addr;
>> +    QemuCond mc_delivery_cond;
>> +
>>      /*< public >*/
>>      char *kvm_type;
>>      char *host_model;
>> @@ -623,8 +627,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
>> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>>  
>> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>>  
>>  /* RTAS ibm,get-system-parameter token values */
>>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
>> @@ -874,4 +880,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>>  #define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
>>  
>>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>> +uint64_t spapr_get_rtas_addr(void);
>>  #endif /* HW_SPAPR_H */
>>
>>
>
Aravinda Prasad May 13, 2019, 4:57 a.m. UTC | #7
On Friday 10 May 2019 08:03 PM, Greg Kurz wrote:
> On Fri, 10 May 2019 11:06:04 +0200
> Greg Kurz <groug@kaod.org> wrote:
> 
>> On Mon, 22 Apr 2019 12:32:58 +0530
>> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
>>
>>> This patch adds support in QEMU to handle "ibm,nmi-register"
>>> and "ibm,nmi-interlock" RTAS calls.
>>>
>>> The machine check notification address is saved when the
>>> OS issues "ibm,nmi-register" RTAS call.
>>>
>>> This patch also handles the case when multiple processors
>>> experience machine check at or about the same time by
>>> handling "ibm,nmi-interlock" call. In such cases, as per
>>> PAPR, subsequent processors serialize waiting for the first
>>> processor to issue the "ibm,nmi-interlock" call. The second
>>> processor that also received a machine check error waits
>>> till the first processor is done reading the error log.
>>> The first processor issues "ibm,nmi-interlock" call
>>> when the error log is consumed. This patch implements the
>>> releasing part of the error-log while subsequent patch
>>> (which builds error log) handles the locking part.
>>>
>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>> ---
>>>  hw/ppc/spapr.c         |   18 ++++++++++++++
>>>  hw/ppc/spapr_rtas.c    |   61 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>  include/hw/ppc/spapr.h |    9 ++++++-
>>>  3 files changed, 87 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index c56939a..6642cb5 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -1805,6 +1805,11 @@ static void spapr_machine_reset(void)
>>>      first_ppc_cpu->env.gpr[5] = 0;
>>>  
>>>      spapr->cas_reboot = false;
>>> +
>>> +    spapr->guest_machine_check_addr = -1;
>>> +
>>> +    /* Signal all vCPUs waiting on this condition */
>>> +    qemu_cond_broadcast(&spapr->mc_delivery_cond);
>>>  }
>>>  
>>>  static void spapr_create_nvram(SpaprMachineState *spapr)
>>> @@ -2095,6 +2100,16 @@ static const VMStateDescription vmstate_spapr_dtb = {
>>>      },
>>>  };
>>>  
>>> +static const VMStateDescription vmstate_spapr_machine_check = {
>>> +    .name = "spapr_machine_check",
>>> +    .version_id = 1,
>>> +    .minimum_version_id = 1,
>>> +    .fields = (VMStateField[]) {
>>> +        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> 
> Also this should use VMSTATE_UINTTL()

sure..

Regards,
Aravinda

> 
>>> +        VMSTATE_END_OF_LIST()
>>> +    },  
>>
>> This VMState descriptor is missing a .needed field because we only want
>> to migrate the subsection if the guest has called NMI register, ie.
>> spapr->guest_machine_check_addr != (target_ulong) -1.
>>
>>> +};
>>> +
>>>  static const VMStateDescription vmstate_spapr = {765cf442a8afe8e5c8c6896b5072066df5129077
>>>      .name = "spapr",
>>>      .version_id = 3,
>>> @@ -2127,6 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
>>>          &vmstate_spapr_dtb,
>>>          &vmstate_spapr_cap_large_decr,
>>>          &vmstate_spapr_cap_ccf_assist,
>>> +        &vmstate_spapr_machine_check,
>>>          NULL
>>>      }
>>>  };
>>> @@ -3068,6 +3084,8 @@ static void spapr_machine_init(MachineState *machine)
>>>  
>>>          kvmppc_spapr_enable_inkernel_multitce();
>>>      }
>>> +
>>> +    qemu_cond_init(&spapr->mc_delivery_cond);
>>>  }
>>>  
>>>  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>>> index ee24212..c2f3991 100644
>>> --- a/hw/ppc/spapr_rtas.c
>>> +++ b/hw/ppc/spapr_rtas.c
>>> @@ -348,6 +348,39 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>      rtas_st(rets, 1, 100);
>>>  }
>>>  
>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>>> +                                  SpaprMachineState *spapr,
>>> +                                  uint32_t token, uint32_t nargs,
>>> +                                  target_ulong args,
>>> +                                  uint32_t nret, target_ulong rets)
>>> +{
>>> +    uint64_t rtas_addr = spapr_get_rtas_addr();
>>> +
>>> +    if (!rtas_addr) {
>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>> +        return;
>>> +    }
>>> +
>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>> +}
>>> +
>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>>> +                                   SpaprMachineState *spapr,
>>> +                                   uint32_t token, uint32_t nargs,
>>> +                                   target_ulong args,
>>> +                                   uint32_t nret, target_ulong rets)
>>> +{
>>> +    if (!spapr->guest_machine_check_addr) {  
>>
>> Hmm... the default value is -1. It looks like the check should rather be:
>>
>>     if (spapr->guest_machine_check_addr == (target_ulong) -1) {
>>
>>
>>> +        /* NMI register not called */
>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>>> +    } else {
>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>>> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>> +    }
>>> +}
>>> +
>>> +
>>>  static struct rtas_call {
>>>      const char *name;
>>>      spapr_rtas_fn fn;
>>> @@ -466,6 +499,30 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr)
>>>      }
>>>  }
>>>  
>>> +uint64_t spapr_get_rtas_addr(void)  
>>
>> Shouldn't this be hwaddr instead of uint64_t ?
>>
>>> +{
>>> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>>> +    int rtas_node;
>>> +    const struct fdt_property *rtas_addr_prop;
>>> +    void *fdt = spapr->fdt_blob;
>>> +    uint32_t rtas_addr;
>>> +
>>> +    /* fetch rtas addr from fdt */
>>> +    rtas_node = fdt_path_offset(fdt, "/rtas");
>>> +    if (rtas_node == 0) {
>>> +        return 0;
>>> +    }
>>> +
>>> +    rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", NULL);
>>> +    if (!rtas_addr_prop) {  
>>
>> Just for curiosity: this is ok for linux, but what about other OSes (eg. AIX) ?
>>
>>> +        return 0;
>>> +    }
>>> +
>>> +    rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);  
>>
>> Also this assumes the OS called RTAS instantiate-rtas, but some other
>> OS might have called RTAS instantiate-rtas-64 instead. I guess it is
>> ok for now because SLOF only provides the 32-bit variant, but a
>> comment would certainly help IMHO.
>>
>>> +    return (uint64_t)rtas_addr;
>>> +}
>>> +
>>> +
>>>  static void core_rtas_register_types(void)
>>>  {
>>>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
>>> @@ -489,6 +546,10 @@ static void core_rtas_register_types(void)
>>>                          rtas_set_power_level);
>>>      spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
>>>                          rtas_get_power_level);
>>> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
>>> +                        rtas_ibm_nmi_register);
>>> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
>>> +                        rtas_ibm_nmi_interlock);
>>>  }
>>>  
>>>  type_init(core_rtas_register_types)
>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>> index 7e32f30..ec6f33e 100644
>>> --- a/include/hw/ppc/spapr.h
>>> +++ b/include/hw/ppc/spapr.h
>>> @@ -187,6 +187,10 @@ struct SpaprMachineState {
>>>       * occurs during the unplug process. */
>>>      QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
>>>  
>>> +    /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
>>> +    target_ulong guest_machine_check_addr;
>>> +    QemuCond mc_delivery_cond;
>>> +
>>>      /*< public >*/
>>>      char *kvm_type;
>>>      char *host_model;
>>> @@ -623,8 +627,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>>>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>>>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>>>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
>>> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
>>> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>>>  
>>> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
>>> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>>>  
>>>  /* RTAS ibm,get-system-parameter token values */
>>>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
>>> @@ -874,4 +880,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>>>  #define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
>>>  
>>>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>>> +uint64_t spapr_get_rtas_addr(void);
>>>  #endif /* HW_SPAPR_H */
>>>
>>>   
>>
>>
>
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index c56939a..6642cb5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1805,6 +1805,11 @@  static void spapr_machine_reset(void)
     first_ppc_cpu->env.gpr[5] = 0;
 
     spapr->cas_reboot = false;
+
+    spapr->guest_machine_check_addr = -1;
+
+    /* Signal all vCPUs waiting on this condition */
+    qemu_cond_broadcast(&spapr->mc_delivery_cond);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -2095,6 +2100,16 @@  static const VMStateDescription vmstate_spapr_dtb = {
     },
 };
 
+static const VMStateDescription vmstate_spapr_machine_check = {
+    .name = "spapr_machine_check",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_spapr = {
     .name = "spapr",
     .version_id = 3,
@@ -2127,6 +2142,7 @@  static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_dtb,
         &vmstate_spapr_cap_large_decr,
         &vmstate_spapr_cap_ccf_assist,
+        &vmstate_spapr_machine_check,
         NULL
     }
 };
@@ -3068,6 +3084,8 @@  static void spapr_machine_init(MachineState *machine)
 
         kvmppc_spapr_enable_inkernel_multitce();
     }
+
+    qemu_cond_init(&spapr->mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index ee24212..c2f3991 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -348,6 +348,39 @@  static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
     rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+                                  SpaprMachineState *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args,
+                                  uint32_t nret, target_ulong rets)
+{
+    uint64_t rtas_addr = spapr_get_rtas_addr();
+
+    if (!rtas_addr) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    spapr->guest_machine_check_addr = rtas_ld(args, 1);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+                                   SpaprMachineState *spapr,
+                                   uint32_t token, uint32_t nargs,
+                                   target_ulong args,
+                                   uint32_t nret, target_ulong rets)
+{
+    if (!spapr->guest_machine_check_addr) {
+        /* NMI register not called */
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+    } else {
+        qemu_cond_signal(&spapr->mc_delivery_cond);
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    }
+}
+
+
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
@@ -466,6 +499,30 @@  void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr)
     }
 }
 
+uint64_t spapr_get_rtas_addr(void)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    int rtas_node;
+    const struct fdt_property *rtas_addr_prop;
+    void *fdt = spapr->fdt_blob;
+    uint32_t rtas_addr;
+
+    /* fetch rtas addr from fdt */
+    rtas_node = fdt_path_offset(fdt, "/rtas");
+    if (rtas_node == 0) {
+        return 0;
+    }
+
+    rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", NULL);
+    if (!rtas_addr_prop) {
+        return 0;
+    }
+
+    rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);
+    return (uint64_t)rtas_addr;
+}
+
+
 static void core_rtas_register_types(void)
 {
     spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
@@ -489,6 +546,10 @@  static void core_rtas_register_types(void)
                         rtas_set_power_level);
     spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
                         rtas_get_power_level);
+    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+                        rtas_ibm_nmi_register);
+    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+                        rtas_ibm_nmi_interlock);
 }
 
 type_init(core_rtas_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7e32f30..ec6f33e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -187,6 +187,10 @@  struct SpaprMachineState {
      * occurs during the unplug process. */
     QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
 
+    /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+    target_ulong guest_machine_check_addr;
+    QemuCond mc_delivery_cond;
+
     /*< public >*/
     char *kvm_type;
     char *host_model;
@@ -623,8 +627,10 @@  target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
 #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
+#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
 
-#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
 
 /* RTAS ibm,get-system-parameter token values */
 #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
@@ -874,4 +880,5 @@  void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
 #define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
 
 void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
+uint64_t spapr_get_rtas_addr(void);
 #endif /* HW_SPAPR_H */