diff mbox series

[v4,14/15] spapr: nested: Introduce cap-nested-papr for Nested PAPR API

Message ID 20240220083609.748325-15-harshpb@linux.ibm.com
State New
Headers show
Series Nested PAPR API (KVM on PowerVM) | expand

Commit Message

Harsh Prateek Bora Feb. 20, 2024, 8:36 a.m. UTC
Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
API for nested guests. This new API is to enable support for KVM on PowerVM
and the support in Linux kernel has already merged upstream.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
---
 include/hw/ppc/spapr.h |  6 ++++-
 hw/ppc/spapr.c         |  2 ++
 hw/ppc/spapr_caps.c    | 56 ++++++++++++++++++++++++++++++++++++++++++
 hw/ppc/spapr_nested.c  | 19 ++++++++++++--
 4 files changed, 80 insertions(+), 3 deletions(-)

Comments

Nicholas Piggin Feb. 27, 2024, 10:22 a.m. UTC | #1
On Tue Feb 20, 2024 at 6:36 PM AEST, Harsh Prateek Bora wrote:
> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
> API for nested guests. This new API is to enable support for KVM on PowerVM
> and the support in Linux kernel has already merged upstream.
>
> Signed-off-by: Michael Neuling <mikey@neuling.org>
> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
> ---
>  include/hw/ppc/spapr.h |  6 ++++-
>  hw/ppc/spapr.c         |  2 ++
>  hw/ppc/spapr_caps.c    | 56 ++++++++++++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_nested.c  | 19 ++++++++++++--
>  4 files changed, 80 insertions(+), 3 deletions(-)
>
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 036a7db2bc..1b1d37123a 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -81,8 +81,10 @@ typedef enum {
>  #define SPAPR_CAP_RPT_INVALIDATE        0x0B
>  /* Support for AIL modes */
>  #define SPAPR_CAP_AIL_MODE_3            0x0C
> +/* Nested PAPR */
> +#define SPAPR_CAP_NESTED_PAPR           0x0D
>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
>  
>  /*
>   * Capability Values
> @@ -994,6 +996,7 @@ extern const VMStateDescription vmstate_spapr_cap_sbbc;
>  extern const VMStateDescription vmstate_spapr_cap_ibs;
>  extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> +extern const VMStateDescription vmstate_spapr_cap_nested_papr;
>  extern const VMStateDescription vmstate_spapr_cap_large_decr;
>  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>  extern const VMStateDescription vmstate_spapr_cap_fwnmi;
> @@ -1041,5 +1044,6 @@ void spapr_watchdog_init(SpaprMachineState *spapr);
>  void spapr_register_nested_hv(void);
>  void spapr_unregister_nested_hv(void);
>  void spapr_register_nested_papr(void);
> +void spapr_unregister_nested_papr(void);
>  
>  #endif /* HW_SPAPR_H */
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 3453b30a57..cb556ae6a8 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2120,6 +2120,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_cap_fwnmi,
>          &vmstate_spapr_fwnmi,
>          &vmstate_spapr_cap_rpt_invalidate,
> +        &vmstate_spapr_cap_nested_papr,
>          NULL
>      }
>  };
> @@ -4688,6 +4689,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
>      smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 721ddad23b..9a29ce1872 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -487,12 +487,58 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
>              error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
>                                      "or use threads=1 with -smp\n");
>          }
> +        if (spapr->nested.api) {
> +            warn_report("nested.api already set as %d, re-init to kvm-hv",
> +                        spapr->nested.api);
> +        }

Does this warning trigger when you reset the machine?

It's trying to catch both caps enabled? I would make that an error and
fail and tell user to enable only one or the other.

(In a future patch I think we should try permit both to be enabled at
the same time, but for now restricting it is fine)

>          spapr->nested.api = NESTED_API_KVM_HV;
>          spapr_unregister_nested_hv(); /* reset across reboots */
>          spapr_register_nested_hv();
>      }
>  }
>  
> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
> +                                    uint8_t val, Error **errp)
> +{
> +    ERRP_GUARD();
> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> +    CPUPPCState *env = &cpu->env;
> +
> +    if (!val) {
> +        /* capability disabled by default */
> +        return;
> +    }
> +
> +    if (tcg_enabled()) {
> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
> +            error_setg(errp, "Nested-PAPR only supported on POWER9 and later");
> +            error_append_hint(errp,
> +                              "Try appending -machine cap-nested-papr=off\n");
> +            return;
> +        }
> +    } else if (kvm_enabled()) {
> +        /*
> +         * this gets executed in L1 qemu when L2 is launched,
> +         * needs kvm-hv support in L1 kernel.
> +         */
> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
> +            error_setg(errp,
> +                       "KVM implementation does not support Nested-HV");
> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
> +            error_setg(errp, "Error enabling Nested-HV with KVM");
> +        }
> +    }
> +    if (spapr->nested.api) {
> +        warn_report("nested.api already set as %d, re-init to nested-papr",
> +                    spapr->nested.api);
> +    }
> +    spapr->nested.api = NESTED_API_PAPR;
> +    spapr->nested.capabilities_set = false;
> +    spapr_unregister_nested_papr(); /* reset across reboots */
> +    spapr_register_nested_papr();
> +    spapr_nested_gsb_init();
> +}
> +
>  static void cap_large_decr_apply(SpaprMachineState *spapr,
>                                   uint8_t val, Error **errp)
>  {
> @@ -738,6 +784,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_nested_kvm_hv_apply,
>      },
> +    [SPAPR_CAP_NESTED_PAPR] = {
> +        .name = "nested-papr",
> +        .description = "Allow Nested HV (PAPR API)",
> +        .index = SPAPR_CAP_NESTED_PAPR,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_nested_papr_apply,
> +    },
>      [SPAPR_CAP_LARGE_DECREMENTER] = {
>          .name = "large-decr",
>          .description = "Allow Large Decrementer",
> @@ -922,6 +977,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>  SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>  SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>  SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
> index db1c59a8f5..6e6a90616e 100644
> --- a/hw/ppc/spapr_nested.c
> +++ b/hw/ppc/spapr_nested.c
> @@ -13,8 +13,6 @@
>  void spapr_nested_init(SpaprMachineState *spapr)
>  {
>      spapr->nested.api = 0;
> -    spapr->nested.capabilities_set = false;
> -    spapr_nested_gsb_init();
>  }
>  
>  uint8_t spapr_nested_api(SpaprMachineState *spapr)
> @@ -1821,6 +1819,18 @@ void spapr_register_nested_papr(void)
>      spapr_register_hypercall(H_GUEST_RUN_VCPU        , h_guest_run_vcpu);
>  }
>  
> +void spapr_unregister_nested_papr(void)
> +{
> +    spapr_unregister_hypercall(H_GUEST_GET_CAPABILITIES);
> +    spapr_unregister_hypercall(H_GUEST_SET_CAPABILITIES);
> +    spapr_unregister_hypercall(H_GUEST_CREATE);
> +    spapr_unregister_hypercall(H_GUEST_DELETE);
> +    spapr_unregister_hypercall(H_GUEST_CREATE_VCPU);
> +    spapr_unregister_hypercall(H_GUEST_SET_STATE);
> +    spapr_unregister_hypercall(H_GUEST_GET_STATE);
> +    spapr_unregister_hypercall(H_GUEST_RUN_VCPU);
> +}

Oh they all came at once here.

And... you're not doing the same thing with the register_hypercall I
guess because then you have function defined but not used warnings?
I would just add the unregister in the same patches that add the
register.

Thanks,
Nick
Harsh Prateek Bora March 5, 2024, 8:24 a.m. UTC | #2
On 2/27/24 15:52, Nicholas Piggin wrote:
> On Tue Feb 20, 2024 at 6:36 PM AEST, Harsh Prateek Bora wrote:
>> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
>> API for nested guests. This new API is to enable support for KVM on PowerVM
>> and the support in Linux kernel has already merged upstream.
>>
>> Signed-off-by: Michael Neuling <mikey@neuling.org>
>> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
>> ---
>>   include/hw/ppc/spapr.h |  6 ++++-
>>   hw/ppc/spapr.c         |  2 ++
>>   hw/ppc/spapr_caps.c    | 56 ++++++++++++++++++++++++++++++++++++++++++
>>   hw/ppc/spapr_nested.c  | 19 ++++++++++++--
>>   4 files changed, 80 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 036a7db2bc..1b1d37123a 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -81,8 +81,10 @@ typedef enum {
>>   #define SPAPR_CAP_RPT_INVALIDATE        0x0B
>>   /* Support for AIL modes */
>>   #define SPAPR_CAP_AIL_MODE_3            0x0C
>> +/* Nested PAPR */
>> +#define SPAPR_CAP_NESTED_PAPR           0x0D
>>   /* Num Caps */
>> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
>> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
>>   
>>   /*
>>    * Capability Values
>> @@ -994,6 +996,7 @@ extern const VMStateDescription vmstate_spapr_cap_sbbc;
>>   extern const VMStateDescription vmstate_spapr_cap_ibs;
>>   extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>>   extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
>> +extern const VMStateDescription vmstate_spapr_cap_nested_papr;
>>   extern const VMStateDescription vmstate_spapr_cap_large_decr;
>>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>> @@ -1041,5 +1044,6 @@ void spapr_watchdog_init(SpaprMachineState *spapr);
>>   void spapr_register_nested_hv(void);
>>   void spapr_unregister_nested_hv(void);
>>   void spapr_register_nested_papr(void);
>> +void spapr_unregister_nested_papr(void);
>>   
>>   #endif /* HW_SPAPR_H */
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 3453b30a57..cb556ae6a8 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2120,6 +2120,7 @@ static const VMStateDescription vmstate_spapr = {
>>           &vmstate_spapr_cap_fwnmi,
>>           &vmstate_spapr_fwnmi,
>>           &vmstate_spapr_cap_rpt_invalidate,
>> +        &vmstate_spapr_cap_nested_papr,
>>           NULL
>>       }
>>   };
>> @@ -4688,6 +4689,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>       smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
>>       smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
>>       smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>> +    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
>>       smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>       smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>>       smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>> index 721ddad23b..9a29ce1872 100644
>> --- a/hw/ppc/spapr_caps.c
>> +++ b/hw/ppc/spapr_caps.c
>> @@ -487,12 +487,58 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
>>               error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
>>                                       "or use threads=1 with -smp\n");
>>           }
>> +        if (spapr->nested.api) {
>> +            warn_report("nested.api already set as %d, re-init to kvm-hv",
>> +                        spapr->nested.api);
>> +        }
> 
> Does this warning trigger when you reset the machine?
> 
> It's trying to catch both caps enabled? I would make that an error and
> fail and tell user to enable only one or the other.
> 
> (In a future patch I think we should try permit both to be enabled at
> the same time, but for now restricting it is fine)

Yeh, we had kept it mutually exclusive initially in v1, and looks like 
we want it to be exclusive for now. Future possibilities can be explored 
later as suggested.


> 
>>           spapr->nested.api = NESTED_API_KVM_HV;
>>           spapr_unregister_nested_hv(); /* reset across reboots */
>>           spapr_register_nested_hv();
>>       }
>>   }
>>   
>> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
>> +                                    uint8_t val, Error **errp)
>> +{
>> +    ERRP_GUARD();
>> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
>> +    CPUPPCState *env = &cpu->env;
>> +
>> +    if (!val) {
>> +        /* capability disabled by default */
>> +        return;
>> +    }
>> +
>> +    if (tcg_enabled()) {
>> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
>> +            error_setg(errp, "Nested-PAPR only supported on POWER9 and later");
>> +            error_append_hint(errp,
>> +                              "Try appending -machine cap-nested-papr=off\n");
>> +            return;
>> +        }
>> +    } else if (kvm_enabled()) {
>> +        /*
>> +         * this gets executed in L1 qemu when L2 is launched,
>> +         * needs kvm-hv support in L1 kernel.
>> +         */
>> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
>> +            error_setg(errp,
>> +                       "KVM implementation does not support Nested-HV");
>> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
>> +            error_setg(errp, "Error enabling Nested-HV with KVM");
>> +        }
>> +    }
>> +    if (spapr->nested.api) {
>> +        warn_report("nested.api already set as %d, re-init to nested-papr",
>> +                    spapr->nested.api);
>> +    }
>> +    spapr->nested.api = NESTED_API_PAPR;
>> +    spapr->nested.capabilities_set = false;
>> +    spapr_unregister_nested_papr(); /* reset across reboots */
>> +    spapr_register_nested_papr();
>> +    spapr_nested_gsb_init();
>> +}
>> +
>>   static void cap_large_decr_apply(SpaprMachineState *spapr,
>>                                    uint8_t val, Error **errp)
>>   {
>> @@ -738,6 +784,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>           .type = "bool",
>>           .apply = cap_nested_kvm_hv_apply,
>>       },
>> +    [SPAPR_CAP_NESTED_PAPR] = {
>> +        .name = "nested-papr",
>> +        .description = "Allow Nested HV (PAPR API)",
>> +        .index = SPAPR_CAP_NESTED_PAPR,
>> +        .get = spapr_cap_get_bool,
>> +        .set = spapr_cap_set_bool,
>> +        .type = "bool",
>> +        .apply = cap_nested_papr_apply,
>> +    },
>>       [SPAPR_CAP_LARGE_DECREMENTER] = {
>>           .name = "large-decr",
>>           .description = "Allow Large Decrementer",
>> @@ -922,6 +977,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>>   SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>>   SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>>   SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>> +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
>>   SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>   SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>>   SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
>> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
>> index db1c59a8f5..6e6a90616e 100644
>> --- a/hw/ppc/spapr_nested.c
>> +++ b/hw/ppc/spapr_nested.c
>> @@ -13,8 +13,6 @@
>>   void spapr_nested_init(SpaprMachineState *spapr)
>>   {
>>       spapr->nested.api = 0;
>> -    spapr->nested.capabilities_set = false;
>> -    spapr_nested_gsb_init();
>>   }
>>   
>>   uint8_t spapr_nested_api(SpaprMachineState *spapr)
>> @@ -1821,6 +1819,18 @@ void spapr_register_nested_papr(void)
>>       spapr_register_hypercall(H_GUEST_RUN_VCPU        , h_guest_run_vcpu);
>>   }
>>   
>> +void spapr_unregister_nested_papr(void)
>> +{
>> +    spapr_unregister_hypercall(H_GUEST_GET_CAPABILITIES);
>> +    spapr_unregister_hypercall(H_GUEST_SET_CAPABILITIES);
>> +    spapr_unregister_hypercall(H_GUEST_CREATE);
>> +    spapr_unregister_hypercall(H_GUEST_DELETE);
>> +    spapr_unregister_hypercall(H_GUEST_CREATE_VCPU);
>> +    spapr_unregister_hypercall(H_GUEST_SET_STATE);
>> +    spapr_unregister_hypercall(H_GUEST_GET_STATE);
>> +    spapr_unregister_hypercall(H_GUEST_RUN_VCPU);
>> +}
> 
> Oh they all came at once here.
> 
> And... you're not doing the same thing with the register_hypercall I
> guess because then you have function defined but not used warnings?
> I would just add the unregister in the same patches that add the
> register.

Yeh, v5 shall have unregistrations and registrations for each hcall 
together.

regards,
Harsh
> 
> Thanks,
> Nick
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 036a7db2bc..1b1d37123a 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -81,8 +81,10 @@  typedef enum {
 #define SPAPR_CAP_RPT_INVALIDATE        0x0B
 /* Support for AIL modes */
 #define SPAPR_CAP_AIL_MODE_3            0x0C
+/* Nested PAPR */
+#define SPAPR_CAP_NESTED_PAPR           0x0D
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
 
 /*
  * Capability Values
@@ -994,6 +996,7 @@  extern const VMStateDescription vmstate_spapr_cap_sbbc;
 extern const VMStateDescription vmstate_spapr_cap_ibs;
 extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
+extern const VMStateDescription vmstate_spapr_cap_nested_papr;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
 extern const VMStateDescription vmstate_spapr_cap_fwnmi;
@@ -1041,5 +1044,6 @@  void spapr_watchdog_init(SpaprMachineState *spapr);
 void spapr_register_nested_hv(void);
 void spapr_unregister_nested_hv(void);
 void spapr_register_nested_papr(void);
+void spapr_unregister_nested_papr(void);
 
 #endif /* HW_SPAPR_H */
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 3453b30a57..cb556ae6a8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2120,6 +2120,7 @@  static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_fwnmi,
         &vmstate_spapr_fwnmi,
         &vmstate_spapr_cap_rpt_invalidate,
+        &vmstate_spapr_cap_nested_papr,
         NULL
     }
 };
@@ -4688,6 +4689,7 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
     smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 721ddad23b..9a29ce1872 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -487,12 +487,58 @@  static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
             error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
                                     "or use threads=1 with -smp\n");
         }
+        if (spapr->nested.api) {
+            warn_report("nested.api already set as %d, re-init to kvm-hv",
+                        spapr->nested.api);
+        }
         spapr->nested.api = NESTED_API_KVM_HV;
         spapr_unregister_nested_hv(); /* reset across reboots */
         spapr_register_nested_hv();
     }
 }
 
+static void cap_nested_papr_apply(SpaprMachineState *spapr,
+                                    uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (!val) {
+        /* capability disabled by default */
+        return;
+    }
+
+    if (tcg_enabled()) {
+        if (!(env->insns_flags2 & PPC2_ISA300)) {
+            error_setg(errp, "Nested-PAPR only supported on POWER9 and later");
+            error_append_hint(errp,
+                              "Try appending -machine cap-nested-papr=off\n");
+            return;
+        }
+    } else if (kvm_enabled()) {
+        /*
+         * this gets executed in L1 qemu when L2 is launched,
+         * needs kvm-hv support in L1 kernel.
+         */
+        if (!kvmppc_has_cap_nested_kvm_hv()) {
+            error_setg(errp,
+                       "KVM implementation does not support Nested-HV");
+        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
+            error_setg(errp, "Error enabling Nested-HV with KVM");
+        }
+    }
+    if (spapr->nested.api) {
+        warn_report("nested.api already set as %d, re-init to nested-papr",
+                    spapr->nested.api);
+    }
+    spapr->nested.api = NESTED_API_PAPR;
+    spapr->nested.capabilities_set = false;
+    spapr_unregister_nested_papr(); /* reset across reboots */
+    spapr_register_nested_papr();
+    spapr_nested_gsb_init();
+}
+
 static void cap_large_decr_apply(SpaprMachineState *spapr,
                                  uint8_t val, Error **errp)
 {
@@ -738,6 +784,15 @@  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_nested_kvm_hv_apply,
     },
+    [SPAPR_CAP_NESTED_PAPR] = {
+        .name = "nested-papr",
+        .description = "Allow Nested HV (PAPR API)",
+        .index = SPAPR_CAP_NESTED_PAPR,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_nested_papr_apply,
+    },
     [SPAPR_CAP_LARGE_DECREMENTER] = {
         .name = "large-decr",
         .description = "Allow Large Decrementer",
@@ -922,6 +977,7 @@  SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
 SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
 SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
+SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
 SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
index db1c59a8f5..6e6a90616e 100644
--- a/hw/ppc/spapr_nested.c
+++ b/hw/ppc/spapr_nested.c
@@ -13,8 +13,6 @@ 
 void spapr_nested_init(SpaprMachineState *spapr)
 {
     spapr->nested.api = 0;
-    spapr->nested.capabilities_set = false;
-    spapr_nested_gsb_init();
 }
 
 uint8_t spapr_nested_api(SpaprMachineState *spapr)
@@ -1821,6 +1819,18 @@  void spapr_register_nested_papr(void)
     spapr_register_hypercall(H_GUEST_RUN_VCPU        , h_guest_run_vcpu);
 }
 
+void spapr_unregister_nested_papr(void)
+{
+    spapr_unregister_hypercall(H_GUEST_GET_CAPABILITIES);
+    spapr_unregister_hypercall(H_GUEST_SET_CAPABILITIES);
+    spapr_unregister_hypercall(H_GUEST_CREATE);
+    spapr_unregister_hypercall(H_GUEST_DELETE);
+    spapr_unregister_hypercall(H_GUEST_CREATE_VCPU);
+    spapr_unregister_hypercall(H_GUEST_SET_STATE);
+    spapr_unregister_hypercall(H_GUEST_GET_STATE);
+    spapr_unregister_hypercall(H_GUEST_RUN_VCPU);
+}
+
 #else
 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
 {
@@ -1854,6 +1864,11 @@  void spapr_register_nested_papr(void)
     /* DO NOTHING */
 }
 
+void spapr_unregister_nested_papr(void)
+{
+    /* DO NOTHING */
+}
+
 void spapr_nested_gsb_init(void)
 {
     /* DO NOTHING */