diff mbox series

[v12,2/6] ppc: spapr: Introduce FWNMI capability

Message ID 156715642090.27761.17328167484986424722.stgit@aravinda
State New
Headers show
Series target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests | expand

Commit Message

Aravinda Prasad Aug. 30, 2019, 9:13 a.m. UTC
Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
the KVM causes guest exit with NMI as exit reason
when it encounters a machine check exception on the
address belonging to a guest. Without this capability
enabled, KVM redirects machine check exceptions to
guest's 0x200 vector.

This patch also introduces fwnmi-mce capability to
deal with the case when a guest with the
KVM_CAP_PPC_FWNMI capability enabled is attempted
to migrate to a host that does not support this
capability.

Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         |    1 +
 hw/ppc/spapr_caps.c    |   29 +++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |    4 +++-
 target/ppc/kvm.c       |   22 ++++++++++++++++++++++
 target/ppc/kvm_ppc.h   |   11 +++++++++++
 5 files changed, 66 insertions(+), 1 deletion(-)

Comments

Greg Kurz Aug. 30, 2019, 1:58 p.m. UTC | #1
On Fri, 30 Aug 2019 14:43:40 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
> the KVM causes guest exit with NMI as exit reason
> when it encounters a machine check exception on the
> address belonging to a guest. Without this capability
> enabled, KVM redirects machine check exceptions to
> guest's 0x200 vector.
> 
> This patch also introduces fwnmi-mce capability to
> deal with the case when a guest with the
> KVM_CAP_PPC_FWNMI capability enabled is attempted
> to migrate to a host that does not support this
> capability.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         |    1 +
>  hw/ppc/spapr_caps.c    |   29 +++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    4 +++-
>  target/ppc/kvm.c       |   22 ++++++++++++++++++++++
>  target/ppc/kvm_ppc.h   |   11 +++++++++++
>  5 files changed, 66 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index ea56499..8288e8b 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4487,6 +4487,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>      spapr_caps_add_properties(smc, &error_abort);
>      smc->irq = &spapr_irq_dual;
>      smc->dr_phb_enabled = true;
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 481dfd2..c11ff87 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -496,6 +496,25 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>      }
>  }
>  
> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> +                                Error **errp)
> +{
> +    if (!val) {
> +        return; /* Disabled by default */
> +    }
> +
> +    if (tcg_enabled()) {
> +        /*
> +         * TCG support may not be correct in some conditions (e.g., in case
> +         * of software injected faults like duplicate SLBs).
> +         */
> +        warn_report("Firmware Assisted Non-Maskable Interrupts not supported in TCG");
> +    } else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
> +        error_setg(errp,
> +"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try cap-fwnmi-mce=off");
> +    }
> +}
> +
>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>      [SPAPR_CAP_HTM] = {
>          .name = "htm",
> @@ -595,6 +614,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_ccf_assist_apply,
>      },
> +    [SPAPR_CAP_FWNMI_MCE] = {
> +        .name = "fwnmi-mce",
> +        .description = "Handle fwnmi machine check exceptions",
> +        .index = SPAPR_CAP_FWNMI_MCE,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_fwnmi_mce_apply,
> +    },
>  };
>  
>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> @@ -734,6 +762,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
>  
>  void spapr_caps_init(SpaprMachineState *spapr)
>  {
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 03111fd..66049ac 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -79,8 +79,10 @@ typedef enum {
>  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
>  /* Count Cache Flush Assist HW Instruction */
>  #define SPAPR_CAP_CCF_ASSIST            0x09
> +/* FWNMI machine check handling */
> +#define SPAPR_CAP_FWNMI_MCE             0x0A
>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI_MCE + 1)
>  
>  /*
>   * Capability Values
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 8c5b1f2..c055fc1 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -85,6 +85,7 @@ static int cap_ppc_safe_indirect_branch;
>  static int cap_ppc_count_cache_flush_assist;
>  static int cap_ppc_nested_kvm_hv;
>  static int cap_large_decr;
> +static int cap_ppc_fwnmi;
>  
>  static uint32_t debug_inst_opcode;
>  
> @@ -2055,6 +2056,22 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>      }
>  }
>  
> +int kvmppc_set_fwnmi(void)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> +    CPUState *cs = CPU(cpu);
> +    int ret;
> +
> +    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
> +    if (ret) {
> +        error_report("This KVM version does not support FWNMI");
> +        return ret;
> +    }
> +
> +    cap_ppc_fwnmi = 1;

Hmm... AFAICT the meaning of cap_ppc_fwnmi should just be "KVM knows about
FWNMI", not "FWNMI was successfully enabled in KVM". Your v10 used to set
cap_ppc_fwnmi in kvm_arch_init() just like the other guys... why this change ?

> +    return ret;
> +}
> +
>  int kvmppc_smt_threads(void)
>  {
>      return cap_ppc_smt ? cap_ppc_smt : 1;
> @@ -2355,6 +2372,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
>      return cap_mmu_hash_v3;
>  }
>  
> +bool kvmppc_has_cap_ppc_fwnmi(void)
> +{
> +    return cap_ppc_fwnmi;
> +}
> +
>  static bool kvmppc_power8_host(void)
>  {
>      bool ret = false;
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index 98bd7d5..2990898 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -27,6 +27,8 @@ void kvmppc_enable_h_page_init(void);
>  void kvmppc_set_papr(PowerPCCPU *cpu);
>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
> +int kvmppc_set_fwnmi(void);
> +bool kvmppc_has_cap_ppc_fwnmi(void);
>  int kvmppc_smt_threads(void);
>  void kvmppc_hint_smt_possible(Error **errp);
>  int kvmppc_set_smt_threads(int smt);
> @@ -159,6 +161,15 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>  {
>  }
>  
> +static inline int kvmppc_set_fwnmi(void)
> +{

Missing return -1;

> +}
> +
> +static inline bool kvmppc_has_cap_ppc_fwnmi(void)
> +{
> +    return false;
> +}
> +
>  static inline int kvmppc_smt_threads(void)
>  {
>      return 1;
>
Aravinda Prasad Sept. 3, 2019, 7:22 a.m. UTC | #2
On Friday 30 August 2019 07:28 PM, Greg Kurz wrote:
> On Fri, 30 Aug 2019 14:43:40 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
>> the KVM causes guest exit with NMI as exit reason
>> when it encounters a machine check exception on the
>> address belonging to a guest. Without this capability
>> enabled, KVM redirects machine check exceptions to
>> guest's 0x200 vector.
>>
>> This patch also introduces fwnmi-mce capability to
>> deal with the case when a guest with the
>> KVM_CAP_PPC_FWNMI capability enabled is attempted
>> to migrate to a host that does not support this
>> capability.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr.c         |    1 +
>>  hw/ppc/spapr_caps.c    |   29 +++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    4 +++-
>>  target/ppc/kvm.c       |   22 ++++++++++++++++++++++
>>  target/ppc/kvm_ppc.h   |   11 +++++++++++
>>  5 files changed, 66 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index ea56499..8288e8b 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -4487,6 +4487,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>      spapr_caps_add_properties(smc, &error_abort);
>>      smc->irq = &spapr_irq_dual;
>>      smc->dr_phb_enabled = true;
>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>> index 481dfd2..c11ff87 100644
>> --- a/hw/ppc/spapr_caps.c
>> +++ b/hw/ppc/spapr_caps.c
>> @@ -496,6 +496,25 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>>      }
>>  }
>>  
>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
>> +                                Error **errp)
>> +{
>> +    if (!val) {
>> +        return; /* Disabled by default */
>> +    }
>> +
>> +    if (tcg_enabled()) {
>> +        /*
>> +         * TCG support may not be correct in some conditions (e.g., in case
>> +         * of software injected faults like duplicate SLBs).
>> +         */
>> +        warn_report("Firmware Assisted Non-Maskable Interrupts not supported in TCG");
>> +    } else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
>> +        error_setg(errp,
>> +"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try cap-fwnmi-mce=off");
>> +    }
>> +}
>> +
>>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>      [SPAPR_CAP_HTM] = {
>>          .name = "htm",
>> @@ -595,6 +614,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>          .type = "bool",
>>          .apply = cap_ccf_assist_apply,
>>      },
>> +    [SPAPR_CAP_FWNMI_MCE] = {
>> +        .name = "fwnmi-mce",
>> +        .description = "Handle fwnmi machine check exceptions",
>> +        .index = SPAPR_CAP_FWNMI_MCE,
>> +        .get = spapr_cap_get_bool,
>> +        .set = spapr_cap_set_bool,
>> +        .type = "bool",
>> +        .apply = cap_fwnmi_mce_apply,
>> +    },
>>  };
>>  
>>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
>> @@ -734,6 +762,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>> +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
>>  
>>  void spapr_caps_init(SpaprMachineState *spapr)
>>  {
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 03111fd..66049ac 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -79,8 +79,10 @@ typedef enum {
>>  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
>>  /* Count Cache Flush Assist HW Instruction */
>>  #define SPAPR_CAP_CCF_ASSIST            0x09
>> +/* FWNMI machine check handling */
>> +#define SPAPR_CAP_FWNMI_MCE             0x0A
>>  /* Num Caps */
>> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
>> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI_MCE + 1)
>>  
>>  /*
>>   * Capability Values
>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>> index 8c5b1f2..c055fc1 100644
>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -85,6 +85,7 @@ static int cap_ppc_safe_indirect_branch;
>>  static int cap_ppc_count_cache_flush_assist;
>>  static int cap_ppc_nested_kvm_hv;
>>  static int cap_large_decr;
>> +static int cap_ppc_fwnmi;
>>  
>>  static uint32_t debug_inst_opcode;
>>  
>> @@ -2055,6 +2056,22 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>>      }
>>  }
>>  
>> +int kvmppc_set_fwnmi(void)
>> +{
>> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
>> +    CPUState *cs = CPU(cpu);
>> +    int ret;
>> +
>> +    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
>> +    if (ret) {
>> +        error_report("This KVM version does not support FWNMI");
>> +        return ret;
>> +    }
>> +
>> +    cap_ppc_fwnmi = 1;
> 
> Hmm... AFAICT the meaning of cap_ppc_fwnmi should just be "KVM knows about
> FWNMI", not "FWNMI was successfully enabled in KVM". Your v10 used to set
> cap_ppc_fwnmi in kvm_arch_init() just like the other guys... why this change ?

Even I thought that cap_* is for that, but cap_papr uses it the other
way. So I decided to use a similar convention for cap_ppc_fwnmi.

In v10, cap_ppc_fwnmi is set in kvm_arch_init() if FWNMI is available in
KVM and we try to enable FWNMI later in the boot phase when
rtas_ibm_nmi_register() is called. It's possible that
SPAPR_CAP_FWNMI_MCE is set and we may fail to enable FWNMI due to errors
in KVM or QEMU.

To avoid this, in v12, FWNMI is enabled if SPAPR_CAP_FWNMI_MCE is set
irrespective of rtas_ibm_nmi_register() is called or not. This way we
fail early if we are not able to enable FWNMI.

So, when SPAPR_CAP_FWNMI_MCE is set with FWNMI enabled in KVM, a machine
check error will always cause a guest exit. If the guest has registered
a handler by calling rtas_ibm_nmi_register(), then we will invoke the
guest registered handler else we will jump to 0x200 interrupt vector in
the guest.

With this change we don't need "KVM knows about FWNMI" information. So I
removed that part and used cap_ppc_fwnmi to track if FWNMI is enabled or
disabled in KVM. This also simplifies spapr_fwnmi_post_load() to avoid
enabling FWNMI multiple times (see patch 6).

> 
>> +    return ret;
>> +}
>> +
>>  int kvmppc_smt_threads(void)
>>  {
>>      return cap_ppc_smt ? cap_ppc_smt : 1;
>> @@ -2355,6 +2372,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
>>      return cap_mmu_hash_v3;
>>  }
>>  
>> +bool kvmppc_has_cap_ppc_fwnmi(void)
>> +{
>> +    return cap_ppc_fwnmi;
>> +}
>> +
>>  static bool kvmppc_power8_host(void)
>>  {
>>      bool ret = false;
>> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
>> index 98bd7d5..2990898 100644
>> --- a/target/ppc/kvm_ppc.h
>> +++ b/target/ppc/kvm_ppc.h
>> @@ -27,6 +27,8 @@ void kvmppc_enable_h_page_init(void);
>>  void kvmppc_set_papr(PowerPCCPU *cpu);
>>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
>> +int kvmppc_set_fwnmi(void);
>> +bool kvmppc_has_cap_ppc_fwnmi(void);
>>  int kvmppc_smt_threads(void);
>>  void kvmppc_hint_smt_possible(Error **errp);
>>  int kvmppc_set_smt_threads(int smt);
>> @@ -159,6 +161,15 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>>  {
>>  }
>>  
>> +static inline int kvmppc_set_fwnmi(void)
>> +{
> 
> Missing return -1;

Hmm.. missed it.

Regards,
Aravinda

> 
>> +}
>> +
>> +static inline bool kvmppc_has_cap_ppc_fwnmi(void)
>> +{
>> +    return false;
>> +}
>> +
>>  static inline int kvmppc_smt_threads(void)
>>  {
>>      return 1;
>>
>
Greg Kurz Sept. 3, 2019, 7:56 a.m. UTC | #3
On Tue, 3 Sep 2019 12:52:39 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> 
> 
> On Friday 30 August 2019 07:28 PM, Greg Kurz wrote:
> > On Fri, 30 Aug 2019 14:43:40 +0530
> > Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> > 
> >> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
> >> the KVM causes guest exit with NMI as exit reason
> >> when it encounters a machine check exception on the
> >> address belonging to a guest. Without this capability
> >> enabled, KVM redirects machine check exceptions to
> >> guest's 0x200 vector.
> >>
> >> This patch also introduces fwnmi-mce capability to
> >> deal with the case when a guest with the
> >> KVM_CAP_PPC_FWNMI capability enabled is attempted
> >> to migrate to a host that does not support this
> >> capability.
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr.c         |    1 +
> >>  hw/ppc/spapr_caps.c    |   29 +++++++++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    4 +++-
> >>  target/ppc/kvm.c       |   22 ++++++++++++++++++++++
> >>  target/ppc/kvm_ppc.h   |   11 +++++++++++
> >>  5 files changed, 66 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index ea56499..8288e8b 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -4487,6 +4487,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>      spapr_caps_add_properties(smc, &error_abort);
> >>      smc->irq = &spapr_irq_dual;
> >>      smc->dr_phb_enabled = true;
> >> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> >> index 481dfd2..c11ff87 100644
> >> --- a/hw/ppc/spapr_caps.c
> >> +++ b/hw/ppc/spapr_caps.c
> >> @@ -496,6 +496,25 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> >>      }
> >>  }
> >>  
> >> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> >> +                                Error **errp)
> >> +{
> >> +    if (!val) {
> >> +        return; /* Disabled by default */
> >> +    }
> >> +
> >> +    if (tcg_enabled()) {
> >> +        /*
> >> +         * TCG support may not be correct in some conditions (e.g., in case
> >> +         * of software injected faults like duplicate SLBs).
> >> +         */
> >> +        warn_report("Firmware Assisted Non-Maskable Interrupts not supported in TCG");
> >> +    } else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
> >> +        error_setg(errp,
> >> +"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try cap-fwnmi-mce=off");
> >> +    }
> >> +}
> >> +
> >>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >>      [SPAPR_CAP_HTM] = {
> >>          .name = "htm",
> >> @@ -595,6 +614,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >>          .type = "bool",
> >>          .apply = cap_ccf_assist_apply,
> >>      },
> >> +    [SPAPR_CAP_FWNMI_MCE] = {
> >> +        .name = "fwnmi-mce",
> >> +        .description = "Handle fwnmi machine check exceptions",
> >> +        .index = SPAPR_CAP_FWNMI_MCE,
> >> +        .get = spapr_cap_get_bool,
> >> +        .set = spapr_cap_set_bool,
> >> +        .type = "bool",
> >> +        .apply = cap_fwnmi_mce_apply,
> >> +    },
> >>  };
> >>  
> >>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> >> @@ -734,6 +762,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
> >>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> >>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
> >>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> >> +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
> >>  
> >>  void spapr_caps_init(SpaprMachineState *spapr)
> >>  {
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index 03111fd..66049ac 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -79,8 +79,10 @@ typedef enum {
> >>  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
> >>  /* Count Cache Flush Assist HW Instruction */
> >>  #define SPAPR_CAP_CCF_ASSIST            0x09
> >> +/* FWNMI machine check handling */
> >> +#define SPAPR_CAP_FWNMI_MCE             0x0A
> >>  /* Num Caps */
> >> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> >> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI_MCE + 1)
> >>  
> >>  /*
> >>   * Capability Values
> >> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> >> index 8c5b1f2..c055fc1 100644
> >> --- a/target/ppc/kvm.c
> >> +++ b/target/ppc/kvm.c
> >> @@ -85,6 +85,7 @@ static int cap_ppc_safe_indirect_branch;
> >>  static int cap_ppc_count_cache_flush_assist;
> >>  static int cap_ppc_nested_kvm_hv;
> >>  static int cap_large_decr;
> >> +static int cap_ppc_fwnmi;
> >>  
> >>  static uint32_t debug_inst_opcode;
> >>  
> >> @@ -2055,6 +2056,22 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
> >>      }
> >>  }
> >>  
> >> +int kvmppc_set_fwnmi(void)
> >> +{
> >> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> >> +    CPUState *cs = CPU(cpu);
> >> +    int ret;
> >> +
> >> +    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
> >> +    if (ret) {
> >> +        error_report("This KVM version does not support FWNMI");
> >> +        return ret;
> >> +    }
> >> +
> >> +    cap_ppc_fwnmi = 1;
> > 
> > Hmm... AFAICT the meaning of cap_ppc_fwnmi should just be "KVM knows about
> > FWNMI", not "FWNMI was successfully enabled in KVM". Your v10 used to set
> > cap_ppc_fwnmi in kvm_arch_init() just like the other guys... why this change ?
> 
> Even I thought that cap_* is for that, but cap_papr uses it the other
> way. So I decided to use a similar convention for cap_ppc_fwnmi.
> 
> In v10, cap_ppc_fwnmi is set in kvm_arch_init() if FWNMI is available in
> KVM and we try to enable FWNMI later in the boot phase when
> rtas_ibm_nmi_register() is called. It's possible that
> SPAPR_CAP_FWNMI_MCE is set and we may fail to enable FWNMI due to errors
> in KVM or QEMU.
> 
> To avoid this, in v12, FWNMI is enabled if SPAPR_CAP_FWNMI_MCE is set
> irrespective of rtas_ibm_nmi_register() is called or not. This way we
> fail early if we are not able to enable FWNMI.
> 

Ok. Then maybe you should add a comment in kvm_arch_init(), similar to the
cap_papr one ?

> So, when SPAPR_CAP_FWNMI_MCE is set with FWNMI enabled in KVM, a machine
> check error will always cause a guest exit. If the guest has registered
> a handler by calling rtas_ibm_nmi_register(), then we will invoke the
> guest registered handler else we will jump to 0x200 interrupt vector in
> the guest.
> 

Yeah, this seems to be handled in patch 4... which I haven't read in detail
yet.

+    if (spapr->guest_machine_check_addr == -1) {
+        /*
+         * This implies that we have hit a machine check between system
+         * reset and "ibm,nmi-register". Fall back to the old machine
+         * check behavior in such cases.
+         */
+        cs->exception_index = POWERPC_EXCP_MCHECK;
+        ppc_cpu_do_interrupt(cs);
+        return;
+    }

Thanks for the clarification.

> With this change we don't need "KVM knows about FWNMI" information. So I
> removed that part and used cap_ppc_fwnmi to track if FWNMI is enabled or
> disabled in KVM. This also simplifies spapr_fwnmi_post_load() to avoid
> enabling FWNMI multiple times (see patch 6).
> 
> > 
> >> +    return ret;
> >> +}
> >> +
> >>  int kvmppc_smt_threads(void)
> >>  {
> >>      return cap_ppc_smt ? cap_ppc_smt : 1;
> >> @@ -2355,6 +2372,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
> >>      return cap_mmu_hash_v3;
> >>  }
> >>  
> >> +bool kvmppc_has_cap_ppc_fwnmi(void)
> >> +{
> >> +    return cap_ppc_fwnmi;
> >> +}
> >> +
> >>  static bool kvmppc_power8_host(void)
> >>  {
> >>      bool ret = false;
> >> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> >> index 98bd7d5..2990898 100644
> >> --- a/target/ppc/kvm_ppc.h
> >> +++ b/target/ppc/kvm_ppc.h
> >> @@ -27,6 +27,8 @@ void kvmppc_enable_h_page_init(void);
> >>  void kvmppc_set_papr(PowerPCCPU *cpu);
> >>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
> >>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
> >> +int kvmppc_set_fwnmi(void);
> >> +bool kvmppc_has_cap_ppc_fwnmi(void);
> >>  int kvmppc_smt_threads(void);
> >>  void kvmppc_hint_smt_possible(Error **errp);
> >>  int kvmppc_set_smt_threads(int smt);
> >> @@ -159,6 +161,15 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
> >>  {
> >>  }
> >>  
> >> +static inline int kvmppc_set_fwnmi(void)
> >> +{
> > 
> > Missing return -1;
> 
> Hmm.. missed it.
> 
> Regards,
> Aravinda
> 
> > 
> >> +}
> >> +
> >> +static inline bool kvmppc_has_cap_ppc_fwnmi(void)
> >> +{
> >> +    return false;
> >> +}
> >> +
> >>  static inline int kvmppc_smt_threads(void)
> >>  {
> >>      return 1;
> >>
> > 
>
Aravinda Prasad Sept. 3, 2019, 8:27 a.m. UTC | #4
On Tuesday 03 September 2019 01:26 PM, Greg Kurz wrote:
> On Tue, 3 Sep 2019 12:52:39 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>>
>>
>> On Friday 30 August 2019 07:28 PM, Greg Kurz wrote:
>>> On Fri, 30 Aug 2019 14:43:40 +0530
>>> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
>>>
>>>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
>>>> the KVM causes guest exit with NMI as exit reason
>>>> when it encounters a machine check exception on the
>>>> address belonging to a guest. Without this capability
>>>> enabled, KVM redirects machine check exceptions to
>>>> guest's 0x200 vector.
>>>>
>>>> This patch also introduces fwnmi-mce capability to
>>>> deal with the case when a guest with the
>>>> KVM_CAP_PPC_FWNMI capability enabled is attempted
>>>> to migrate to a host that does not support this
>>>> capability.
>>>>
>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>> ---
>>>>  hw/ppc/spapr.c         |    1 +
>>>>  hw/ppc/spapr_caps.c    |   29 +++++++++++++++++++++++++++++
>>>>  include/hw/ppc/spapr.h |    4 +++-
>>>>  target/ppc/kvm.c       |   22 ++++++++++++++++++++++
>>>>  target/ppc/kvm_ppc.h   |   11 +++++++++++
>>>>  5 files changed, 66 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index ea56499..8288e8b 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -4487,6 +4487,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>      smc->irq = &spapr_irq_dual;
>>>>      smc->dr_phb_enabled = true;
>>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>>>> index 481dfd2..c11ff87 100644
>>>> --- a/hw/ppc/spapr_caps.c
>>>> +++ b/hw/ppc/spapr_caps.c
>>>> @@ -496,6 +496,25 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>>>>      }
>>>>  }
>>>>  
>>>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
>>>> +                                Error **errp)
>>>> +{
>>>> +    if (!val) {
>>>> +        return; /* Disabled by default */
>>>> +    }
>>>> +
>>>> +    if (tcg_enabled()) {
>>>> +        /*
>>>> +         * TCG support may not be correct in some conditions (e.g., in case
>>>> +         * of software injected faults like duplicate SLBs).
>>>> +         */
>>>> +        warn_report("Firmware Assisted Non-Maskable Interrupts not supported in TCG");
>>>> +    } else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
>>>> +        error_setg(errp,
>>>> +"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try cap-fwnmi-mce=off");
>>>> +    }
>>>> +}
>>>> +
>>>>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>>>      [SPAPR_CAP_HTM] = {
>>>>          .name = "htm",
>>>> @@ -595,6 +614,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>>>          .type = "bool",
>>>>          .apply = cap_ccf_assist_apply,
>>>>      },
>>>> +    [SPAPR_CAP_FWNMI_MCE] = {
>>>> +        .name = "fwnmi-mce",
>>>> +        .description = "Handle fwnmi machine check exceptions",
>>>> +        .index = SPAPR_CAP_FWNMI_MCE,
>>>> +        .get = spapr_cap_get_bool,
>>>> +        .set = spapr_cap_set_bool,
>>>> +        .type = "bool",
>>>> +        .apply = cap_fwnmi_mce_apply,
>>>> +    },
>>>>  };
>>>>  
>>>>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
>>>> @@ -734,6 +762,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>>>>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>>>>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>>>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>>>> +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
>>>>  
>>>>  void spapr_caps_init(SpaprMachineState *spapr)
>>>>  {
>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>> index 03111fd..66049ac 100644
>>>> --- a/include/hw/ppc/spapr.h
>>>> +++ b/include/hw/ppc/spapr.h
>>>> @@ -79,8 +79,10 @@ typedef enum {
>>>>  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
>>>>  /* Count Cache Flush Assist HW Instruction */
>>>>  #define SPAPR_CAP_CCF_ASSIST            0x09
>>>> +/* FWNMI machine check handling */
>>>> +#define SPAPR_CAP_FWNMI_MCE             0x0A
>>>>  /* Num Caps */
>>>> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
>>>> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI_MCE + 1)
>>>>  
>>>>  /*
>>>>   * Capability Values
>>>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>>>> index 8c5b1f2..c055fc1 100644
>>>> --- a/target/ppc/kvm.c
>>>> +++ b/target/ppc/kvm.c
>>>> @@ -85,6 +85,7 @@ static int cap_ppc_safe_indirect_branch;
>>>>  static int cap_ppc_count_cache_flush_assist;
>>>>  static int cap_ppc_nested_kvm_hv;
>>>>  static int cap_large_decr;
>>>> +static int cap_ppc_fwnmi;
>>>>  
>>>>  static uint32_t debug_inst_opcode;
>>>>  
>>>> @@ -2055,6 +2056,22 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>>>>      }
>>>>  }
>>>>  
>>>> +int kvmppc_set_fwnmi(void)
>>>> +{
>>>> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
>>>> +    CPUState *cs = CPU(cpu);
>>>> +    int ret;
>>>> +
>>>> +    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
>>>> +    if (ret) {
>>>> +        error_report("This KVM version does not support FWNMI");
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    cap_ppc_fwnmi = 1;
>>>
>>> Hmm... AFAICT the meaning of cap_ppc_fwnmi should just be "KVM knows about
>>> FWNMI", not "FWNMI was successfully enabled in KVM". Your v10 used to set
>>> cap_ppc_fwnmi in kvm_arch_init() just like the other guys... why this change ?
>>
>> Even I thought that cap_* is for that, but cap_papr uses it the other
>> way. So I decided to use a similar convention for cap_ppc_fwnmi.
>>
>> In v10, cap_ppc_fwnmi is set in kvm_arch_init() if FWNMI is available in
>> KVM and we try to enable FWNMI later in the boot phase when
>> rtas_ibm_nmi_register() is called. It's possible that
>> SPAPR_CAP_FWNMI_MCE is set and we may fail to enable FWNMI due to errors
>> in KVM or QEMU.
>>
>> To avoid this, in v12, FWNMI is enabled if SPAPR_CAP_FWNMI_MCE is set
>> irrespective of rtas_ibm_nmi_register() is called or not. This way we
>> fail early if we are not able to enable FWNMI.
>>
> 
> Ok. Then maybe you should add a comment in kvm_arch_init(), similar to the
> cap_papr one ?

Sure..

Regards,
Aravinda

> 
>> So, when SPAPR_CAP_FWNMI_MCE is set with FWNMI enabled in KVM, a machine
>> check error will always cause a guest exit. If the guest has registered
>> a handler by calling rtas_ibm_nmi_register(), then we will invoke the
>> guest registered handler else we will jump to 0x200 interrupt vector in
>> the guest.
>>
> 
> Yeah, this seems to be handled in patch 4... which I haven't read in detail
> yet.
> 
> +    if (spapr->guest_machine_check_addr == -1) {
> +        /*
> +         * This implies that we have hit a machine check between system
> +         * reset and "ibm,nmi-register". Fall back to the old machine
> +         * check behavior in such cases.
> +         */
> +        cs->exception_index = POWERPC_EXCP_MCHECK;
> +        ppc_cpu_do_interrupt(cs);
> +        return;
> +    }
> 
> Thanks for the clarification.
> 
>> With this change we don't need "KVM knows about FWNMI" information. So I
>> removed that part and used cap_ppc_fwnmi to track if FWNMI is enabled or
>> disabled in KVM. This also simplifies spapr_fwnmi_post_load() to avoid
>> enabling FWNMI multiple times (see patch 6).
>>
>>>
>>>> +    return ret;
>>>> +}
>>>> +
>>>>  int kvmppc_smt_threads(void)
>>>>  {
>>>>      return cap_ppc_smt ? cap_ppc_smt : 1;
>>>> @@ -2355,6 +2372,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
>>>>      return cap_mmu_hash_v3;
>>>>  }
>>>>  
>>>> +bool kvmppc_has_cap_ppc_fwnmi(void)
>>>> +{
>>>> +    return cap_ppc_fwnmi;
>>>> +}
>>>> +
>>>>  static bool kvmppc_power8_host(void)
>>>>  {
>>>>      bool ret = false;
>>>> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
>>>> index 98bd7d5..2990898 100644
>>>> --- a/target/ppc/kvm_ppc.h
>>>> +++ b/target/ppc/kvm_ppc.h
>>>> @@ -27,6 +27,8 @@ void kvmppc_enable_h_page_init(void);
>>>>  void kvmppc_set_papr(PowerPCCPU *cpu);
>>>>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>>>>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
>>>> +int kvmppc_set_fwnmi(void);
>>>> +bool kvmppc_has_cap_ppc_fwnmi(void);
>>>>  int kvmppc_smt_threads(void);
>>>>  void kvmppc_hint_smt_possible(Error **errp);
>>>>  int kvmppc_set_smt_threads(int smt);
>>>> @@ -159,6 +161,15 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>>>>  {
>>>>  }
>>>>  
>>>> +static inline int kvmppc_set_fwnmi(void)
>>>> +{
>>>
>>> Missing return -1;
>>
>> Hmm.. missed it.
>>
>> Regards,
>> Aravinda
>>
>>>
>>>> +}
>>>> +
>>>> +static inline bool kvmppc_has_cap_ppc_fwnmi(void)
>>>> +{
>>>> +    return false;
>>>> +}
>>>> +
>>>>  static inline int kvmppc_smt_threads(void)
>>>>  {
>>>>      return 1;
>>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ea56499..8288e8b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4487,6 +4487,7 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
     spapr_caps_add_properties(smc, &error_abort);
     smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2..c11ff87 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,25 @@  static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
     }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+                                Error **errp)
+{
+    if (!val) {
+        return; /* Disabled by default */
+    }
+
+    if (tcg_enabled()) {
+        /*
+         * TCG support may not be correct in some conditions (e.g., in case
+         * of software injected faults like duplicate SLBs).
+         */
+        warn_report("Firmware Assisted Non-Maskable Interrupts not supported in TCG");
+    } else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
+        error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try cap-fwnmi-mce=off");
+    }
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -595,6 +614,15 @@  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_ccf_assist_apply,
     },
+    [SPAPR_CAP_FWNMI_MCE] = {
+        .name = "fwnmi-mce",
+        .description = "Handle fwnmi machine check exceptions",
+        .index = SPAPR_CAP_FWNMI_MCE,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_fwnmi_mce_apply,
+    },
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +762,7 @@  SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 03111fd..66049ac 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@  typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER     0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST            0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE             0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 8c5b1f2..c055fc1 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -85,6 +85,7 @@  static int cap_ppc_safe_indirect_branch;
 static int cap_ppc_count_cache_flush_assist;
 static int cap_ppc_nested_kvm_hv;
 static int cap_large_decr;
+static int cap_ppc_fwnmi;
 
 static uint32_t debug_inst_opcode;
 
@@ -2055,6 +2056,22 @@  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
     }
 }
 
+int kvmppc_set_fwnmi(void)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    CPUState *cs = CPU(cpu);
+    int ret;
+
+    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+    if (ret) {
+        error_report("This KVM version does not support FWNMI");
+        return ret;
+    }
+
+    cap_ppc_fwnmi = 1;
+    return ret;
+}
+
 int kvmppc_smt_threads(void)
 {
     return cap_ppc_smt ? cap_ppc_smt : 1;
@@ -2355,6 +2372,11 @@  bool kvmppc_has_cap_mmu_hash_v3(void)
     return cap_mmu_hash_v3;
 }
 
+bool kvmppc_has_cap_ppc_fwnmi(void)
+{
+    return cap_ppc_fwnmi;
+}
+
 static bool kvmppc_power8_host(void)
 {
     bool ret = false;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 98bd7d5..2990898 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,8 @@  void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_set_fwnmi(void);
+bool kvmppc_has_cap_ppc_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_hint_smt_possible(Error **errp);
 int kvmppc_set_smt_threads(int smt);
@@ -159,6 +161,15 @@  static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
 {
 }
 
+static inline int kvmppc_set_fwnmi(void)
+{
+}
+
+static inline bool kvmppc_has_cap_ppc_fwnmi(void)
+{
+    return false;
+}
+
 static inline int kvmppc_smt_threads(void)
 {
     return 1;