diff mbox

[for-2.9,2/5] pseries: Stubs for HPT resizing

Message ID 20161209022314.14399-3-david@gibson.dropbear.id.au
State New
Headers show

Commit Message

David Gibson Dec. 9, 2016, 2:23 a.m. UTC
This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
extension to allow run time resizing of a guest's hash page table.  It
also adds a new machine property for controlling whether this new
facility is available, and logic to check that against availability
with KVM (only supported with KVM PR for now).

Finally, it adds a new string to the hypertas property in the device
tree, advertising to the guest the availability of the HPT resizing
hypercalls.  This is a tentative suggested value, and would need to be
standardized by PAPR before being merged.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/ppc/spapr_hcall.c   | 36 +++++++++++++++++++++++++++
 hw/ppc/trace-events    |  2 ++
 include/hw/ppc/spapr.h | 11 +++++++++
 target-ppc/kvm.c       | 26 ++++++++++++++++++++
 target-ppc/kvm_ppc.h   |  5 ++++
 6 files changed, 146 insertions(+)

Comments

Thomas Huth Dec. 9, 2016, 8:18 a.m. UTC | #1
On 09.12.2016 03:23, David Gibson wrote:
> This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
> H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
> extension to allow run time resizing of a guest's hash page table.  It
> also adds a new machine property for controlling whether this new
> facility is available, and logic to check that against availability
> with KVM (only supported with KVM PR for now).
> 
> Finally, it adds a new string to the hypertas property in the device
> tree, advertising to the guest the availability of the HPT resizing
> hypercalls.  This is a tentative suggested value, and would need to be
> standardized by PAPR before being merged.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_hcall.c   | 36 +++++++++++++++++++++++++++
>  hw/ppc/trace-events    |  2 ++
>  include/hw/ppc/spapr.h | 11 +++++++++
>  target-ppc/kvm.c       | 26 ++++++++++++++++++++
>  target-ppc/kvm_ppc.h   |  5 ++++
>  6 files changed, 146 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 0f25e83..ecb0822 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -760,6 +760,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
>      if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
>          add_str(hypertas, "hcall-multi-tce");
>      }
> +
> +    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
> +        add_str(hypertas, "hcall-hpt-resize");
> +    }
> +
>      _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
>                       hypertas->str, hypertas->len));
>      g_string_free(hypertas, TRUE);
> @@ -1839,11 +1844,31 @@ static void ppc_spapr_init(MachineState *machine)
>      long load_limit, fw_size;
>      char *filename;
>      int smt = kvmppc_smt_threads();
> +    Error *resize_hpt_err = NULL;
>  
>      msi_nonbroken = true;
>  
>      QLIST_INIT(&spapr->phbs);
>  
> +    /* Check HPT resizing availability */
> +    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
> +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
> +        if (resize_hpt_err) {
> +            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> +            error_free(resize_hpt_err);
> +            resize_hpt_err = NULL;
> +        } else {
> +            spapr->resize_hpt = smc->resize_hpt_default;
> +        }
> +    }
> +
> +    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
> +
> +    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
> +        error_report_err(resize_hpt_err);
> +        exit(1);
> +    }

The logic here is IMHO a little bit hard to understand. Why do you need
the SPAPR_RESIZE_HPT_DEFAULT state here at all? Wouldn't it be
sufficient to start with SPAPR_RESIZE_HPT_DISABLED by default and to
only enable it when the extension is available? ... at least some
explaining comments in the code about SPAPR_RESIZE_HPT_DEFAULT could
help here.

> @@ -2236,6 +2261,40 @@ static void spapr_set_modern_hotplug_events(Object *obj, bool value,
>      spapr->use_hotplug_event_source = value;
>  }
>  
> +static char *spapr_get_resize_hpt(Object *obj, Error **errp)
> +{
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> +
> +    switch (spapr->resize_hpt) {
> +    case SPAPR_RESIZE_HPT_DEFAULT:
> +        return g_strdup("default");
> +    case SPAPR_RESIZE_HPT_DISABLED:
> +        return g_strdup("disabled");
> +    case SPAPR_RESIZE_HPT_ENABLED:
> +        return g_strdup("enabled");
> +    case SPAPR_RESIZE_HPT_REQUIRED:
> +        return g_strdup("required");
> +    }
> +    assert(0);
> +}
> +
> +static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
> +{
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> +
> +    if (strcmp(value, "default") == 0) {
> +        spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
> +    } else if (strcmp(value, "disabled") == 0) {
> +        spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> +    } else if (strcmp(value, "enabled") == 0) {
> +        spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
> +    } else if (strcmp(value, "required") == 0) {
> +        spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
> +    } else {
> +        error_setg(errp, "Bad value for \"resize-hpt\" property");
> +    }
> +}
> +
>  static void spapr_machine_initfn(Object *obj)
>  {
>      sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> @@ -2256,6 +2315,12 @@ static void spapr_machine_initfn(Object *obj)
>                                      " place of standard EPOW events when possible"
>                                      " (required for memory hot-unplug support)",
>                                      NULL);
> +
> +    object_property_add_str(obj, "resize-hpt",
> +                            spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
> +    object_property_set_description(obj, "resize-hpt",
> +                                    "Resizing of the Hash Page Table (enabled, disabled, required)",
> +                                    NULL);

... and "default" ?

>  }
>  
>  static void spapr_machine_finalizefn(Object *obj)
> @@ -2707,6 +2772,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>  
>      smc->dr_lmb_enabled = true;
>      smc->tcg_default_cpu = "POWER8";
> +    smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
>      mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
>      fwc->get_dev_path = spapr_get_fw_dev_path;
>      nc->nmi_monitor_handler = spapr_nmi;
...
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 15e12f3..dcd8cef 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -22,6 +22,7 @@
>  #include <linux/kvm.h>
>  
>  #include "qemu-common.h"
> +#include "qapi/error.h"
>  #include "qemu/error-report.h"
>  #include "cpu.h"
>  #include "qemu/timer.h"
> @@ -2269,6 +2270,7 @@ int kvmppc_reset_htab(int shift_hint)
>          /* Full emulation, tell caller to allocate htab itself */
>          return 0;
>      }
> +

Unnecessary white space change.

>      if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
>          int ret;
>          ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
> @@ -2672,3 +2674,27 @@ int kvmppc_enable_hwrng(void)
>  
>      return kvmppc_enable_hcall(kvm_state, H_RANDOM);
>  }
> +
> +void kvmppc_check_papr_resize_hpt(Error **errp)
> +{
> +    if (!kvm_enabled()) {
> +        return;
> +    }
> +
> +    /* TODO: Check specific capabilities for HPT resize aware host kernels */
> +
> +    /*
> +     * It's tempting to try to check directly if the HPT is under our
> +     * control or KVM's, which is what's really relevant here.
> +     * Unfortunately, in order to correctly size the HPT, we need to
> +     * know if we can do resizing, _before_ we attempt to allocate it
> +     * with KVM.  Before that point, we don't officially know whether
> +     * we'll control the HPT or not.  So we have to use a fallback
> +     * test for PR vs HV KVM to predict that.
> +     */
> +    if (kvmppc_is_pr(kvm_state)) {
> +        return;
> +    }

Couldn't we get HPT resizing on KVM-PR one day, too?

> +    error_setg(errp, "Hash page table resizing not available with this KVM version");
> +}
> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> index 841a29b..3e852ba 100644
> --- a/target-ppc/kvm_ppc.h
> +++ b/target-ppc/kvm_ppc.h
> @@ -59,6 +59,7 @@ bool kvmppc_has_cap_htm(void);
>  int kvmppc_enable_hwrng(void);
>  int kvmppc_put_books_sregs(PowerPCCPU *cpu);
>  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
> +void kvmppc_check_papr_resize_hpt(Error **errp);
>  
>  #else
>  
> @@ -270,6 +271,10 @@ static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
>      return NULL;
>  }
>  
> +static inline void kvmppc_check_papr_resize_hpt(Error **errp)
> +{
> +    return;
> +}
>  #endif

 Thomas
David Gibson Dec. 9, 2016, 9:09 a.m. UTC | #2
On Fri, Dec 09, 2016 at 09:18:51AM +0100, Thomas Huth wrote:
> On 09.12.2016 03:23, David Gibson wrote:
> > This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
> > H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
> > extension to allow run time resizing of a guest's hash page table.  It
> > also adds a new machine property for controlling whether this new
> > facility is available, and logic to check that against availability
> > with KVM (only supported with KVM PR for now).
> > 
> > Finally, it adds a new string to the hypertas property in the device
> > tree, advertising to the guest the availability of the HPT resizing
> > hypercalls.  This is a tentative suggested value, and would need to be
> > standardized by PAPR before being merged.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/ppc/spapr_hcall.c   | 36 +++++++++++++++++++++++++++
> >  hw/ppc/trace-events    |  2 ++
> >  include/hw/ppc/spapr.h | 11 +++++++++
> >  target-ppc/kvm.c       | 26 ++++++++++++++++++++
> >  target-ppc/kvm_ppc.h   |  5 ++++
> >  6 files changed, 146 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 0f25e83..ecb0822 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -760,6 +760,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
> >      if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
> >          add_str(hypertas, "hcall-multi-tce");
> >      }
> > +
> > +    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
> > +        add_str(hypertas, "hcall-hpt-resize");
> > +    }
> > +
> >      _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
> >                       hypertas->str, hypertas->len));
> >      g_string_free(hypertas, TRUE);
> > @@ -1839,11 +1844,31 @@ static void ppc_spapr_init(MachineState *machine)
> >      long load_limit, fw_size;
> >      char *filename;
> >      int smt = kvmppc_smt_threads();
> > +    Error *resize_hpt_err = NULL;
> >  
> >      msi_nonbroken = true;
> >  
> >      QLIST_INIT(&spapr->phbs);
> >  
> > +    /* Check HPT resizing availability */
> > +    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
> > +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
> > +        if (resize_hpt_err) {
> > +            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> > +            error_free(resize_hpt_err);
> > +            resize_hpt_err = NULL;
> > +        } else {
> > +            spapr->resize_hpt = smc->resize_hpt_default;
> > +        }
> > +    }
> > +
> > +    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
> > +
> > +    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
> > +        error_report_err(resize_hpt_err);
> > +        exit(1);
> > +    }
> 
> The logic here is IMHO a little bit hard to understand. Why do you need
> the SPAPR_RESIZE_HPT_DEFAULT state here at all? Wouldn't it be
> sufficient to start with SPAPR_RESIZE_HPT_DISABLED by default and to
> only enable it when the extension is available?

Because if the user explicitly sets something, we should never
override itin the logic here.  In order to determine whether the user
has set something explicitly, we need another value that means "not
set explicitly".

> ... at least some
> explaining comments in the code about SPAPR_RESIZE_HPT_DEFAULT could
> help here.

Hm, maybe.

> > @@ -2236,6 +2261,40 @@ static void spapr_set_modern_hotplug_events(Object *obj, bool value,
> >      spapr->use_hotplug_event_source = value;
> >  }
> >  
> > +static char *spapr_get_resize_hpt(Object *obj, Error **errp)
> > +{
> > +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > +
> > +    switch (spapr->resize_hpt) {
> > +    case SPAPR_RESIZE_HPT_DEFAULT:
> > +        return g_strdup("default");
> > +    case SPAPR_RESIZE_HPT_DISABLED:
> > +        return g_strdup("disabled");
> > +    case SPAPR_RESIZE_HPT_ENABLED:
> > +        return g_strdup("enabled");
> > +    case SPAPR_RESIZE_HPT_REQUIRED:
> > +        return g_strdup("required");
> > +    }
> > +    assert(0);
> > +}
> > +
> > +static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
> > +{
> > +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > +
> > +    if (strcmp(value, "default") == 0) {
> > +        spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
> > +    } else if (strcmp(value, "disabled") == 0) {
> > +        spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> > +    } else if (strcmp(value, "enabled") == 0) {
> > +        spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
> > +    } else if (strcmp(value, "required") == 0) {
> > +        spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
> > +    } else {
> > +        error_setg(errp, "Bad value for \"resize-hpt\" property");
> > +    }
> > +}
> > +
> >  static void spapr_machine_initfn(Object *obj)
> >  {
> >      sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > @@ -2256,6 +2315,12 @@ static void spapr_machine_initfn(Object *obj)
> >                                      " place of standard EPOW events when possible"
> >                                      " (required for memory hot-unplug support)",
> >                                      NULL);
> > +
> > +    object_property_add_str(obj, "resize-hpt",
> > +                            spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
> > +    object_property_set_description(obj, "resize-hpt",
> > +                                    "Resizing of the Hash Page Table (enabled, disabled, required)",
> > +                                    NULL);
> 
> ... and "default" ?
> 
> >  }
> >  
> >  static void spapr_machine_finalizefn(Object *obj)
> > @@ -2707,6 +2772,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >  
> >      smc->dr_lmb_enabled = true;
> >      smc->tcg_default_cpu = "POWER8";
> > +    smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
> >      mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
> >      fwc->get_dev_path = spapr_get_fw_dev_path;
> >      nc->nmi_monitor_handler = spapr_nmi;
> ...
> > diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> > index 15e12f3..dcd8cef 100644
> > --- a/target-ppc/kvm.c
> > +++ b/target-ppc/kvm.c
> > @@ -22,6 +22,7 @@
> >  #include <linux/kvm.h>
> >  
> >  #include "qemu-common.h"
> > +#include "qapi/error.h"
> >  #include "qemu/error-report.h"
> >  #include "cpu.h"
> >  #include "qemu/timer.h"
> > @@ -2269,6 +2270,7 @@ int kvmppc_reset_htab(int shift_hint)
> >          /* Full emulation, tell caller to allocate htab itself */
> >          return 0;
> >      }
> > +
> 
> Unnecessary white space change.
> 
> >      if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
> >          int ret;
> >          ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
> > @@ -2672,3 +2674,27 @@ int kvmppc_enable_hwrng(void)
> >  
> >      return kvmppc_enable_hcall(kvm_state, H_RANDOM);
> >  }
> > +
> > +void kvmppc_check_papr_resize_hpt(Error **errp)
> > +{
> > +    if (!kvm_enabled()) {
> > +        return;
> > +    }
> > +
> > +    /* TODO: Check specific capabilities for HPT resize aware host kernels */
> > +
> > +    /*
> > +     * It's tempting to try to check directly if the HPT is under our
> > +     * control or KVM's, which is what's really relevant here.
> > +     * Unfortunately, in order to correctly size the HPT, we need to
> > +     * know if we can do resizing, _before_ we attempt to allocate it
> > +     * with KVM.  Before that point, we don't officially know whether
> > +     * we'll control the HPT or not.  So we have to use a fallback
> > +     * test for PR vs HV KVM to predict that.
> > +     */
> > +    if (kvmppc_is_pr(kvm_state)) {
> > +        return;
> > +    }
> 
> Couldn't we get HPT resizing on KVM-PR one day, too?

I guess I need to reword that.  HPT resizing should work on PR right
now (but I haven't been able to test it yet) - because qemu still
allocates and manages the HPT on PR, the qemu side logic is
sufficient.

> > +    error_setg(errp, "Hash page table resizing not available with this KVM version");
> > +}
> > diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> > index 841a29b..3e852ba 100644
> > --- a/target-ppc/kvm_ppc.h
> > +++ b/target-ppc/kvm_ppc.h
> > @@ -59,6 +59,7 @@ bool kvmppc_has_cap_htm(void);
> >  int kvmppc_enable_hwrng(void);
> >  int kvmppc_put_books_sregs(PowerPCCPU *cpu);
> >  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
> > +void kvmppc_check_papr_resize_hpt(Error **errp);
> >  
> >  #else
> >  
> > @@ -270,6 +271,10 @@ static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
> >      return NULL;
> >  }
> >  
> > +static inline void kvmppc_check_papr_resize_hpt(Error **errp)
> > +{
> > +    return;
> > +}
> >  #endif
> 
>  Thomas
>
Thomas Huth Dec. 9, 2016, 9:18 a.m. UTC | #3
On 09.12.2016 10:09, David Gibson wrote:
> On Fri, Dec 09, 2016 at 09:18:51AM +0100, Thomas Huth wrote:
>> On 09.12.2016 03:23, David Gibson wrote:
>>> This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
>>> H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
>>> extension to allow run time resizing of a guest's hash page table.  It
>>> also adds a new machine property for controlling whether this new
>>> facility is available, and logic to check that against availability
>>> with KVM (only supported with KVM PR for now).
>>>
>>> Finally, it adds a new string to the hypertas property in the device
>>> tree, advertising to the guest the availability of the HPT resizing
>>> hypercalls.  This is a tentative suggested value, and would need to be
>>> standardized by PAPR before being merged.
>>>
>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>> ---
>>>  hw/ppc/spapr.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  hw/ppc/spapr_hcall.c   | 36 +++++++++++++++++++++++++++
>>>  hw/ppc/trace-events    |  2 ++
>>>  include/hw/ppc/spapr.h | 11 +++++++++
>>>  target-ppc/kvm.c       | 26 ++++++++++++++++++++
>>>  target-ppc/kvm_ppc.h   |  5 ++++
>>>  6 files changed, 146 insertions(+)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index 0f25e83..ecb0822 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -760,6 +760,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
>>>      if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
>>>          add_str(hypertas, "hcall-multi-tce");
>>>      }
>>> +
>>> +    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
>>> +        add_str(hypertas, "hcall-hpt-resize");
>>> +    }
>>> +
>>>      _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
>>>                       hypertas->str, hypertas->len));
>>>      g_string_free(hypertas, TRUE);
>>> @@ -1839,11 +1844,31 @@ static void ppc_spapr_init(MachineState *machine)
>>>      long load_limit, fw_size;
>>>      char *filename;
>>>      int smt = kvmppc_smt_threads();
>>> +    Error *resize_hpt_err = NULL;
>>>  
>>>      msi_nonbroken = true;
>>>  
>>>      QLIST_INIT(&spapr->phbs);
>>>  
>>> +    /* Check HPT resizing availability */
>>> +    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
>>> +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
>>> +        if (resize_hpt_err) {
>>> +            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
>>> +            error_free(resize_hpt_err);
>>> +            resize_hpt_err = NULL;
>>> +        } else {
>>> +            spapr->resize_hpt = smc->resize_hpt_default;
>>> +        }
>>> +    }
>>> +
>>> +    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
>>> +
>>> +    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
>>> +        error_report_err(resize_hpt_err);
>>> +        exit(1);
>>> +    }
>>
>> The logic here is IMHO a little bit hard to understand. Why do you need
>> the SPAPR_RESIZE_HPT_DEFAULT state here at all? Wouldn't it be
>> sufficient to start with SPAPR_RESIZE_HPT_DISABLED by default and to
>> only enable it when the extension is available?
> 
> Because if the user explicitly sets something, we should never
> override itin the logic here.  In order to determine whether the user
> has set something explicitly, we need another value that means "not
> set explicitly".

Hmm, ok, I think I slowly get it. Another question: Does this also take
care of the handling if the user migrates from a machine that supports
HPT resizing to a machine that does not support this feature?

 Thomas
Michael Roth Dec. 9, 2016, 5:08 p.m. UTC | #4
Quoting David Gibson (2016-12-08 20:23:11)
> This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
> H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
> extension to allow run time resizing of a guest's hash page table.  It
> also adds a new machine property for controlling whether this new
> facility is available, and logic to check that against availability
> with KVM (only supported with KVM PR for now).
> 
> Finally, it adds a new string to the hypertas property in the device
> tree, advertising to the guest the availability of the HPT resizing
> hypercalls.  This is a tentative suggested value, and would need to be
> standardized by PAPR before being merged.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_hcall.c   | 36 +++++++++++++++++++++++++++
>  hw/ppc/trace-events    |  2 ++
>  include/hw/ppc/spapr.h | 11 +++++++++
>  target-ppc/kvm.c       | 26 ++++++++++++++++++++
>  target-ppc/kvm_ppc.h   |  5 ++++
>  6 files changed, 146 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 0f25e83..ecb0822 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -760,6 +760,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
>      if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
>          add_str(hypertas, "hcall-multi-tce");
>      }
> +
> +    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
> +        add_str(hypertas, "hcall-hpt-resize");
> +    }
> +
>      _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
>                       hypertas->str, hypertas->len));
>      g_string_free(hypertas, TRUE);
> @@ -1839,11 +1844,31 @@ static void ppc_spapr_init(MachineState *machine)
>      long load_limit, fw_size;
>      char *filename;
>      int smt = kvmppc_smt_threads();
> +    Error *resize_hpt_err = NULL;
> 
>      msi_nonbroken = true;
> 
>      QLIST_INIT(&spapr->phbs);
> 
> +    /* Check HPT resizing availability */
> +    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
> +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
> +        if (resize_hpt_err) {
> +            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> +            error_free(resize_hpt_err);
> +            resize_hpt_err = NULL;
> +        } else {
> +            spapr->resize_hpt = smc->resize_hpt_default;
> +        }
> +    }
> +
> +    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
> +
> +    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
> +        error_report_err(resize_hpt_err);
> +        exit(1);
> +    }

I'm also a bit confused by this. Aren't semantics that HPT_ENABLED will allow
a fallback to non-resizable HPTs if the feature isn't available (whereas
HPT_REQUIRED won't)? I don't understand how such a fallback can ever be reached
with this check in place.

> +
>      /* Allocate RMA if necessary */
>      rma_alloc_size = kvmppc_alloc_rma(&rma);
> 
> @@ -2236,6 +2261,40 @@ static void spapr_set_modern_hotplug_events(Object *obj, bool value,
>      spapr->use_hotplug_event_source = value;
>  }
> 
> +static char *spapr_get_resize_hpt(Object *obj, Error **errp)
> +{
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> +
> +    switch (spapr->resize_hpt) {
> +    case SPAPR_RESIZE_HPT_DEFAULT:
> +        return g_strdup("default");
> +    case SPAPR_RESIZE_HPT_DISABLED:
> +        return g_strdup("disabled");
> +    case SPAPR_RESIZE_HPT_ENABLED:
> +        return g_strdup("enabled");
> +    case SPAPR_RESIZE_HPT_REQUIRED:
> +        return g_strdup("required");
> +    }
> +    assert(0);
> +}
> +
> +static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
> +{
> +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> +
> +    if (strcmp(value, "default") == 0) {
> +        spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
> +    } else if (strcmp(value, "disabled") == 0) {
> +        spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> +    } else if (strcmp(value, "enabled") == 0) {
> +        spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
> +    } else if (strcmp(value, "required") == 0) {
> +        spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
> +    } else {
> +        error_setg(errp, "Bad value for \"resize-hpt\" property");
> +    }
> +}
> +
>  static void spapr_machine_initfn(Object *obj)
>  {
>      sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> @@ -2256,6 +2315,12 @@ static void spapr_machine_initfn(Object *obj)
>                                      " place of standard EPOW events when possible"
>                                      " (required for memory hot-unplug support)",
>                                      NULL);
> +
> +    object_property_add_str(obj, "resize-hpt",
> +                            spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
> +    object_property_set_description(obj, "resize-hpt",
> +                                    "Resizing of the Hash Page Table (enabled, disabled, required)",
> +                                    NULL);
>  }
> 
>  static void spapr_machine_finalizefn(Object *obj)
> @@ -2707,6 +2772,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> 
>      smc->dr_lmb_enabled = true;
>      smc->tcg_default_cpu = "POWER8";
> +    smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
>      mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
>      fwc->get_dev_path = spapr_get_fw_dev_path;
>      nc->nmi_monitor_handler = spapr_nmi;
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index fd9f1d4..72a9c4d 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -355,6 +355,38 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>      return H_SUCCESS;
>  }
> 
> +static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
> +                                         sPAPRMachineState *spapr,
> +                                         target_ulong opcode,
> +                                         target_ulong *args)
> +{
> +    target_ulong flags = args[0];
> +    target_ulong shift = args[1];
> +
> +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
> +        return H_AUTHORITY;
> +    }
> +
> +    trace_spapr_h_resize_hpt_prepare(flags, shift);
> +    return H_HARDWARE;
> +}
> +
> +static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
> +                                        sPAPRMachineState *spapr,
> +                                        target_ulong opcode,
> +                                        target_ulong *args)
> +{
> +    target_ulong flags = args[0];
> +    target_ulong shift = args[1];
> +
> +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
> +        return H_AUTHORITY;
> +    }
> +
> +    trace_spapr_h_resize_hpt_commit(flags, shift);
> +    return H_HARDWARE;
> +}
> +
>  static target_ulong h_set_sprg0(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>                                  target_ulong opcode, target_ulong *args)
>  {
> @@ -1134,6 +1166,10 @@ static void hypercall_register_types(void)
>      /* hcall-bulk */
>      spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
> 
> +    /* hcall-hpt-resize */
> +    spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
> +    spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit);
> +
>      /* hcall-splpar */
>      spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
>      spapr_register_hypercall(H_CEDE, h_cede);
> diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
> index 2297ead..bf59a8f 100644
> --- a/hw/ppc/trace-events
> +++ b/hw/ppc/trace-events
> @@ -16,6 +16,8 @@ spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes"
>  # hw/ppc/spapr_hcall.c
>  spapr_cas_pvr_try(uint32_t pvr) "%x"
>  spapr_cas_pvr(uint32_t cur_pvr, bool cpu_match, uint32_t new_pvr, uint64_t pcr) "current=%x, cpu_match=%u, new=%x, compat flags=%"PRIx64
> +spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
> +spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
> 
>  # hw/ppc/spapr_iommu.c
>  spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index a2d8964..d2c758b 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -31,6 +31,13 @@ typedef struct sPAPRMachineState sPAPRMachineState;
>  #define SPAPR_MACHINE_CLASS(klass) \
>      OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE)
> 
> +typedef enum {
> +    SPAPR_RESIZE_HPT_DEFAULT = 0,
> +    SPAPR_RESIZE_HPT_DISABLED,
> +    SPAPR_RESIZE_HPT_ENABLED,
> +    SPAPR_RESIZE_HPT_REQUIRED,
> +} sPAPRResizeHPT;
> +
>  /**
>   * sPAPRMachineClass:
>   */
> @@ -46,6 +53,7 @@ struct sPAPRMachineClass {
>                            uint64_t *buid, hwaddr *pio, 
>                            hwaddr *mmio32, hwaddr *mmio64,
>                            unsigned n_dma, uint32_t *liobns, Error **errp);
> +    sPAPRResizeHPT resize_hpt_default;
>  };
> 
>  /**
> @@ -61,6 +69,7 @@ struct sPAPRMachineState {
>      XICSState *xics;
>      DeviceState *rtc;
> 
> +    sPAPRResizeHPT resize_hpt;
>      void *htab;
>      uint32_t htab_shift;
>      hwaddr rma_size;
> @@ -347,6 +356,8 @@ struct sPAPRMachineState {
>  #define H_XIRR_X                0x2FC
>  #define H_RANDOM                0x300
>  #define H_SET_MODE              0x31C
> +#define H_RESIZE_HPT_PREPARE    0x36C
> +#define H_RESIZE_HPT_COMMIT     0x370
>  #define H_SIGNAL_SYS_RESET      0x380
>  #define MAX_HCALL_OPCODE        H_SIGNAL_SYS_RESET
> 
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 15e12f3..dcd8cef 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -22,6 +22,7 @@
>  #include <linux/kvm.h>
> 
>  #include "qemu-common.h"
> +#include "qapi/error.h"
>  #include "qemu/error-report.h"
>  #include "cpu.h"
>  #include "qemu/timer.h"
> @@ -2269,6 +2270,7 @@ int kvmppc_reset_htab(int shift_hint)
>          /* Full emulation, tell caller to allocate htab itself */
>          return 0;
>      }
> +
>      if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
>          int ret;
>          ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
> @@ -2672,3 +2674,27 @@ int kvmppc_enable_hwrng(void)
> 
>      return kvmppc_enable_hcall(kvm_state, H_RANDOM);
>  }
> +
> +void kvmppc_check_papr_resize_hpt(Error **errp)
> +{
> +    if (!kvm_enabled()) {
> +        return;
> +    }
> +
> +    /* TODO: Check specific capabilities for HPT resize aware host kernels */
> +
> +    /*
> +     * It's tempting to try to check directly if the HPT is under our
> +     * control or KVM's, which is what's really relevant here.
> +     * Unfortunately, in order to correctly size the HPT, we need to
> +     * know if we can do resizing, _before_ we attempt to allocate it
> +     * with KVM.  Before that point, we don't officially know whether
> +     * we'll control the HPT or not.  So we have to use a fallback
> +     * test for PR vs HV KVM to predict that.
> +     */
> +    if (kvmppc_is_pr(kvm_state)) {
> +        return;
> +    }
> +
> +    error_setg(errp, "Hash page table resizing not available with this KVM version");
> +}
> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> index 841a29b..3e852ba 100644
> --- a/target-ppc/kvm_ppc.h
> +++ b/target-ppc/kvm_ppc.h
> @@ -59,6 +59,7 @@ bool kvmppc_has_cap_htm(void);
>  int kvmppc_enable_hwrng(void);
>  int kvmppc_put_books_sregs(PowerPCCPU *cpu);
>  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
> +void kvmppc_check_papr_resize_hpt(Error **errp);
> 
>  #else
> 
> @@ -270,6 +271,10 @@ static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
>      return NULL;
>  }
> 
> +static inline void kvmppc_check_papr_resize_hpt(Error **errp)
> +{
> +    return;
> +}
>  #endif
> 
>  #ifndef CONFIG_KVM
> -- 
> 2.9.3
>
David Gibson Dec. 10, 2016, 9:10 a.m. UTC | #5
On Fri, Dec 09, 2016 at 11:08:02AM -0600, Michael Roth wrote:
> Quoting David Gibson (2016-12-08 20:23:11)
> > This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
> > H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
> > extension to allow run time resizing of a guest's hash page table.  It
> > also adds a new machine property for controlling whether this new
> > facility is available, and logic to check that against availability
> > with KVM (only supported with KVM PR for now).
> > 
> > Finally, it adds a new string to the hypertas property in the device
> > tree, advertising to the guest the availability of the HPT resizing
> > hypercalls.  This is a tentative suggested value, and would need to be
> > standardized by PAPR before being merged.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/ppc/spapr_hcall.c   | 36 +++++++++++++++++++++++++++
> >  hw/ppc/trace-events    |  2 ++
> >  include/hw/ppc/spapr.h | 11 +++++++++
> >  target-ppc/kvm.c       | 26 ++++++++++++++++++++
> >  target-ppc/kvm_ppc.h   |  5 ++++
> >  6 files changed, 146 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 0f25e83..ecb0822 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -760,6 +760,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
> >      if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
> >          add_str(hypertas, "hcall-multi-tce");
> >      }
> > +
> > +    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
> > +        add_str(hypertas, "hcall-hpt-resize");
> > +    }
> > +
> >      _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
> >                       hypertas->str, hypertas->len));
> >      g_string_free(hypertas, TRUE);
> > @@ -1839,11 +1844,31 @@ static void ppc_spapr_init(MachineState *machine)
> >      long load_limit, fw_size;
> >      char *filename;
> >      int smt = kvmppc_smt_threads();
> > +    Error *resize_hpt_err = NULL;
> > 
> >      msi_nonbroken = true;
> > 
> >      QLIST_INIT(&spapr->phbs);
> > 
> > +    /* Check HPT resizing availability */
> > +    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
> > +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
> > +        if (resize_hpt_err) {
> > +            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> > +            error_free(resize_hpt_err);
> > +            resize_hpt_err = NULL;
> > +        } else {
> > +            spapr->resize_hpt = smc->resize_hpt_default;
> > +        }
> > +    }
> > +
> > +    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
> > +
> > +    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
> > +        error_report_err(resize_hpt_err);
> > +        exit(1);
> > +    }
> 
> I'm also a bit confused by this. Aren't semantics that HPT_ENABLED will allow
> a fallback to non-resizable HPTs if the feature isn't available (whereas
> HPT_REQUIRED won't)? I don't understand how such a fallback can ever be reached
> with this check in place.

resize-hpt=enabled means we'll fall back if the guest doesn't support
it.  An error here means the _host_ can't support it.  I think not
falling back is the better option in this case - if you've explicitly
tried to enable it from it should be available or fail.  In particular
this will make it safer for migrations where the source is already
using hpt resizing.

> > +
> >      /* Allocate RMA if necessary */
> >      rma_alloc_size = kvmppc_alloc_rma(&rma);
> > 
> > @@ -2236,6 +2261,40 @@ static void spapr_set_modern_hotplug_events(Object *obj, bool value,
> >      spapr->use_hotplug_event_source = value;
> >  }
> > 
> > +static char *spapr_get_resize_hpt(Object *obj, Error **errp)
> > +{
> > +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > +
> > +    switch (spapr->resize_hpt) {
> > +    case SPAPR_RESIZE_HPT_DEFAULT:
> > +        return g_strdup("default");
> > +    case SPAPR_RESIZE_HPT_DISABLED:
> > +        return g_strdup("disabled");
> > +    case SPAPR_RESIZE_HPT_ENABLED:
> > +        return g_strdup("enabled");
> > +    case SPAPR_RESIZE_HPT_REQUIRED:
> > +        return g_strdup("required");
> > +    }
> > +    assert(0);
> > +}
> > +
> > +static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
> > +{
> > +    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > +
> > +    if (strcmp(value, "default") == 0) {
> > +        spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
> > +    } else if (strcmp(value, "disabled") == 0) {
> > +        spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> > +    } else if (strcmp(value, "enabled") == 0) {
> > +        spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
> > +    } else if (strcmp(value, "required") == 0) {
> > +        spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
> > +    } else {
> > +        error_setg(errp, "Bad value for \"resize-hpt\" property");
> > +    }
> > +}
> > +
> >  static void spapr_machine_initfn(Object *obj)
> >  {
> >      sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > @@ -2256,6 +2315,12 @@ static void spapr_machine_initfn(Object *obj)
> >                                      " place of standard EPOW events when possible"
> >                                      " (required for memory hot-unplug support)",
> >                                      NULL);
> > +
> > +    object_property_add_str(obj, "resize-hpt",
> > +                            spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
> > +    object_property_set_description(obj, "resize-hpt",
> > +                                    "Resizing of the Hash Page Table (enabled, disabled, required)",
> > +                                    NULL);
> >  }
> > 
> >  static void spapr_machine_finalizefn(Object *obj)
> > @@ -2707,6 +2772,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> > 
> >      smc->dr_lmb_enabled = true;
> >      smc->tcg_default_cpu = "POWER8";
> > +    smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
> >      mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
> >      fwc->get_dev_path = spapr_get_fw_dev_path;
> >      nc->nmi_monitor_handler = spapr_nmi;
> > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > index fd9f1d4..72a9c4d 100644
> > --- a/hw/ppc/spapr_hcall.c
> > +++ b/hw/ppc/spapr_hcall.c
> > @@ -355,6 +355,38 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >      return H_SUCCESS;
> >  }
> > 
> > +static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
> > +                                         sPAPRMachineState *spapr,
> > +                                         target_ulong opcode,
> > +                                         target_ulong *args)
> > +{
> > +    target_ulong flags = args[0];
> > +    target_ulong shift = args[1];
> > +
> > +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
> > +        return H_AUTHORITY;
> > +    }
> > +
> > +    trace_spapr_h_resize_hpt_prepare(flags, shift);
> > +    return H_HARDWARE;
> > +}
> > +
> > +static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
> > +                                        sPAPRMachineState *spapr,
> > +                                        target_ulong opcode,
> > +                                        target_ulong *args)
> > +{
> > +    target_ulong flags = args[0];
> > +    target_ulong shift = args[1];
> > +
> > +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
> > +        return H_AUTHORITY;
> > +    }
> > +
> > +    trace_spapr_h_resize_hpt_commit(flags, shift);
> > +    return H_HARDWARE;
> > +}
> > +
> >  static target_ulong h_set_sprg0(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> >                                  target_ulong opcode, target_ulong *args)
> >  {
> > @@ -1134,6 +1166,10 @@ static void hypercall_register_types(void)
> >      /* hcall-bulk */
> >      spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
> > 
> > +    /* hcall-hpt-resize */
> > +    spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
> > +    spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit);
> > +
> >      /* hcall-splpar */
> >      spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
> >      spapr_register_hypercall(H_CEDE, h_cede);
> > diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
> > index 2297ead..bf59a8f 100644
> > --- a/hw/ppc/trace-events
> > +++ b/hw/ppc/trace-events
> > @@ -16,6 +16,8 @@ spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes"
> >  # hw/ppc/spapr_hcall.c
> >  spapr_cas_pvr_try(uint32_t pvr) "%x"
> >  spapr_cas_pvr(uint32_t cur_pvr, bool cpu_match, uint32_t new_pvr, uint64_t pcr) "current=%x, cpu_match=%u, new=%x, compat flags=%"PRIx64
> > +spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
> > +spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
> > 
> >  # hw/ppc/spapr_iommu.c
> >  spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index a2d8964..d2c758b 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -31,6 +31,13 @@ typedef struct sPAPRMachineState sPAPRMachineState;
> >  #define SPAPR_MACHINE_CLASS(klass) \
> >      OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE)
> > 
> > +typedef enum {
> > +    SPAPR_RESIZE_HPT_DEFAULT = 0,
> > +    SPAPR_RESIZE_HPT_DISABLED,
> > +    SPAPR_RESIZE_HPT_ENABLED,
> > +    SPAPR_RESIZE_HPT_REQUIRED,
> > +} sPAPRResizeHPT;
> > +
> >  /**
> >   * sPAPRMachineClass:
> >   */
> > @@ -46,6 +53,7 @@ struct sPAPRMachineClass {
> >                            uint64_t *buid, hwaddr *pio, 
> >                            hwaddr *mmio32, hwaddr *mmio64,
> >                            unsigned n_dma, uint32_t *liobns, Error **errp);
> > +    sPAPRResizeHPT resize_hpt_default;
> >  };
> > 
> >  /**
> > @@ -61,6 +69,7 @@ struct sPAPRMachineState {
> >      XICSState *xics;
> >      DeviceState *rtc;
> > 
> > +    sPAPRResizeHPT resize_hpt;
> >      void *htab;
> >      uint32_t htab_shift;
> >      hwaddr rma_size;
> > @@ -347,6 +356,8 @@ struct sPAPRMachineState {
> >  #define H_XIRR_X                0x2FC
> >  #define H_RANDOM                0x300
> >  #define H_SET_MODE              0x31C
> > +#define H_RESIZE_HPT_PREPARE    0x36C
> > +#define H_RESIZE_HPT_COMMIT     0x370
> >  #define H_SIGNAL_SYS_RESET      0x380
> >  #define MAX_HCALL_OPCODE        H_SIGNAL_SYS_RESET
> > 
> > diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> > index 15e12f3..dcd8cef 100644
> > --- a/target-ppc/kvm.c
> > +++ b/target-ppc/kvm.c
> > @@ -22,6 +22,7 @@
> >  #include <linux/kvm.h>
> > 
> >  #include "qemu-common.h"
> > +#include "qapi/error.h"
> >  #include "qemu/error-report.h"
> >  #include "cpu.h"
> >  #include "qemu/timer.h"
> > @@ -2269,6 +2270,7 @@ int kvmppc_reset_htab(int shift_hint)
> >          /* Full emulation, tell caller to allocate htab itself */
> >          return 0;
> >      }
> > +
> >      if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
> >          int ret;
> >          ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
> > @@ -2672,3 +2674,27 @@ int kvmppc_enable_hwrng(void)
> > 
> >      return kvmppc_enable_hcall(kvm_state, H_RANDOM);
> >  }
> > +
> > +void kvmppc_check_papr_resize_hpt(Error **errp)
> > +{
> > +    if (!kvm_enabled()) {
> > +        return;
> > +    }
> > +
> > +    /* TODO: Check specific capabilities for HPT resize aware host kernels */
> > +
> > +    /*
> > +     * It's tempting to try to check directly if the HPT is under our
> > +     * control or KVM's, which is what's really relevant here.
> > +     * Unfortunately, in order to correctly size the HPT, we need to
> > +     * know if we can do resizing, _before_ we attempt to allocate it
> > +     * with KVM.  Before that point, we don't officially know whether
> > +     * we'll control the HPT or not.  So we have to use a fallback
> > +     * test for PR vs HV KVM to predict that.
> > +     */
> > +    if (kvmppc_is_pr(kvm_state)) {
> > +        return;
> > +    }
> > +
> > +    error_setg(errp, "Hash page table resizing not available with this KVM version");
> > +}
> > diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> > index 841a29b..3e852ba 100644
> > --- a/target-ppc/kvm_ppc.h
> > +++ b/target-ppc/kvm_ppc.h
> > @@ -59,6 +59,7 @@ bool kvmppc_has_cap_htm(void);
> >  int kvmppc_enable_hwrng(void);
> >  int kvmppc_put_books_sregs(PowerPCCPU *cpu);
> >  PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
> > +void kvmppc_check_papr_resize_hpt(Error **errp);
> > 
> >  #else
> > 
> > @@ -270,6 +271,10 @@ static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
> >      return NULL;
> >  }
> > 
> > +static inline void kvmppc_check_papr_resize_hpt(Error **errp)
> > +{
> > +    return;
> > +}
> >  #endif
> > 
> >  #ifndef CONFIG_KVM
>
David Gibson Dec. 12, 2016, 12:40 a.m. UTC | #6
On Fri, Dec 09, 2016 at 11:08:02AM -0600, Michael Roth wrote:
> Quoting David Gibson (2016-12-08 20:23:11)
> > This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
> > H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
> > extension to allow run time resizing of a guest's hash page table.  It
> > also adds a new machine property for controlling whether this new
> > facility is available, and logic to check that against availability
> > with KVM (only supported with KVM PR for now).
> > 
> > Finally, it adds a new string to the hypertas property in the device
> > tree, advertising to the guest the availability of the HPT resizing
> > hypercalls.  This is a tentative suggested value, and would need to be
> > standardized by PAPR before being merged.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/ppc/spapr_hcall.c   | 36 +++++++++++++++++++++++++++
> >  hw/ppc/trace-events    |  2 ++
> >  include/hw/ppc/spapr.h | 11 +++++++++
> >  target-ppc/kvm.c       | 26 ++++++++++++++++++++
> >  target-ppc/kvm_ppc.h   |  5 ++++
> >  6 files changed, 146 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 0f25e83..ecb0822 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -760,6 +760,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
> >      if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
> >          add_str(hypertas, "hcall-multi-tce");
> >      }
> > +
> > +    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
> > +        add_str(hypertas, "hcall-hpt-resize");
> > +    }
> > +
> >      _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
> >                       hypertas->str, hypertas->len));
> >      g_string_free(hypertas, TRUE);
> > @@ -1839,11 +1844,31 @@ static void ppc_spapr_init(MachineState *machine)
> >      long load_limit, fw_size;
> >      char *filename;
> >      int smt = kvmppc_smt_threads();
> > +    Error *resize_hpt_err = NULL;
> > 
> >      msi_nonbroken = true;
> > 
> >      QLIST_INIT(&spapr->phbs);
> > 
> > +    /* Check HPT resizing availability */
> > +    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
> > +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
> > +        if (resize_hpt_err) {
> > +            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> > +            error_free(resize_hpt_err);
> > +            resize_hpt_err = NULL;
> > +        } else {
> > +            spapr->resize_hpt = smc->resize_hpt_default;
> > +        }
> > +    }
> > +
> > +    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
> > +
> > +    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
> > +        error_report_err(resize_hpt_err);
> > +        exit(1);
> > +    }
> 
> I'm also a bit confused by this. Aren't semantics that HPT_ENABLED will allow
> a fallback to non-resizable HPTs if the feature isn't available (whereas
> HPT_REQUIRED won't)? I don't understand how such a fallback can ever be reached
> with this check in place.

We fall back if the _guest_ doesn't support HPT resizing.  This error
path is if the _host_ can't support HPT resizing.  Some extra
information about this in the reply I'll shortly send to Thomas.
David Gibson Dec. 12, 2016, 1:06 a.m. UTC | #7
On Fri, Dec 09, 2016 at 10:18:59AM +0100, Thomas Huth wrote:
> On 09.12.2016 10:09, David Gibson wrote:
> > On Fri, Dec 09, 2016 at 09:18:51AM +0100, Thomas Huth wrote:
> >> On 09.12.2016 03:23, David Gibson wrote:
> >>> This introduces stub implementations of the H_RESIZE_HPT_PREPARE and
> >>> H_RESIZE_HPT_COMMIT hypercalls which we hope to add in a PAPR
> >>> extension to allow run time resizing of a guest's hash page table.  It
> >>> also adds a new machine property for controlling whether this new
> >>> facility is available, and logic to check that against availability
> >>> with KVM (only supported with KVM PR for now).
> >>>
> >>> Finally, it adds a new string to the hypertas property in the device
> >>> tree, advertising to the guest the availability of the HPT resizing
> >>> hypercalls.  This is a tentative suggested value, and would need to be
> >>> standardized by PAPR before being merged.
> >>>
> >>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >>> ---
> >>>  hw/ppc/spapr.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>  hw/ppc/spapr_hcall.c   | 36 +++++++++++++++++++++++++++
> >>>  hw/ppc/trace-events    |  2 ++
> >>>  include/hw/ppc/spapr.h | 11 +++++++++
> >>>  target-ppc/kvm.c       | 26 ++++++++++++++++++++
> >>>  target-ppc/kvm_ppc.h   |  5 ++++
> >>>  6 files changed, 146 insertions(+)
> >>>
> >>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>> index 0f25e83..ecb0822 100644
> >>> --- a/hw/ppc/spapr.c
> >>> +++ b/hw/ppc/spapr.c
> >>> @@ -760,6 +760,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
> >>>      if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
> >>>          add_str(hypertas, "hcall-multi-tce");
> >>>      }
> >>> +
> >>> +    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
> >>> +        add_str(hypertas, "hcall-hpt-resize");
> >>> +    }
> >>> +
> >>>      _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
> >>>                       hypertas->str, hypertas->len));
> >>>      g_string_free(hypertas, TRUE);
> >>> @@ -1839,11 +1844,31 @@ static void ppc_spapr_init(MachineState *machine)
> >>>      long load_limit, fw_size;
> >>>      char *filename;
> >>>      int smt = kvmppc_smt_threads();
> >>> +    Error *resize_hpt_err = NULL;
> >>>  
> >>>      msi_nonbroken = true;
> >>>  
> >>>      QLIST_INIT(&spapr->phbs);
> >>>  
> >>> +    /* Check HPT resizing availability */
> >>> +    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
> >>> +    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
> >>> +        if (resize_hpt_err) {
> >>> +            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
> >>> +            error_free(resize_hpt_err);
> >>> +            resize_hpt_err = NULL;
> >>> +        } else {
> >>> +            spapr->resize_hpt = smc->resize_hpt_default;
> >>> +        }
> >>> +    }
> >>> +
> >>> +    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
> >>> +
> >>> +    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
> >>> +        error_report_err(resize_hpt_err);
> >>> +        exit(1);
> >>> +    }
> >>
> >> The logic here is IMHO a little bit hard to understand. Why do you need
> >> the SPAPR_RESIZE_HPT_DEFAULT state here at all? Wouldn't it be
> >> sufficient to start with SPAPR_RESIZE_HPT_DISABLED by default and to
> >> only enable it when the extension is available?
> > 
> > Because if the user explicitly sets something, we should never
> > override itin the logic here.  In order to determine whether the user
> > has set something explicitly, we need another value that means "not
> > set explicitly".
> 
> Hmm, ok, I think I slowly get it. Another question: Does this also take
> care of the handling if the user migrates from a machine that supports
> HPT resizing to a machine that does not support this feature?

Ok, so there are a fair few cases here.  The key point, though is that
even before now, we helpefully transferred the size of the HPT in the
migration stream before sending its contents (that was done so we
could update the sizing heuristics without breaking migration).

For TCG and KVM PR:

Here the ability to HPT resize depends purely on the qemu version.
Assuming this gets merged for qemu-2.9, pseries-2.9 TCG or PR guests
should always support resizing so that should be fine.

If you explicitly enabled HPT resizing for a pseries-2.8 guest running
under qemu-2.9 to an older qemu, then things could get odd.  The
destination guest would get an HPT of the size it had at the source at
the point of migration.  Any further attempts to resize would fail.
Given you have to explicitly set up this poor situation and the
failure mode isn't _that_ bad, I think that's ok.

For KVM HV:

Assuming we're migrating from a host kernel which can support resize
to one which can't:

If the resize-hpt is (explicitly) set differently on the the two ends
then, well, don't do that.  So assume it's the same on each end:

If resize-hpt=disabled, things are fine, obviously.

If resize=hpt=enabled|required, then qemu will fail to start on the
destination because the host kernel doesn't support it.  So you should
know something's wrong before even attempting migration, which is the
best we can do.

If resize-hpt=default is the messiest case.  In theory we could get a
failure case to the one mentioned above for TCG guests.  In practice,
however, the migration will fail before completing: kernels without
the HPT resizing support in fact don't even allow the guest HPT to be
resized at reset time.  The initial setup of the destination will
initialize the in-kernel HPT based on maxmem size (because resize is
unavailable on this end).  When the migration stream comes in, we'll
try to reallocate the in-kernel HPT to the incoming size and fail
because the host kernel won't allow us to.


I think this behaviour is as good as we can reasonably get.
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0f25e83..ecb0822 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -760,6 +760,11 @@  static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
     if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
         add_str(hypertas, "hcall-multi-tce");
     }
+
+    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
+        add_str(hypertas, "hcall-hpt-resize");
+    }
+
     _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
                      hypertas->str, hypertas->len));
     g_string_free(hypertas, TRUE);
@@ -1839,11 +1844,31 @@  static void ppc_spapr_init(MachineState *machine)
     long load_limit, fw_size;
     char *filename;
     int smt = kvmppc_smt_threads();
+    Error *resize_hpt_err = NULL;
 
     msi_nonbroken = true;
 
     QLIST_INIT(&spapr->phbs);
 
+    /* Check HPT resizing availability */
+    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
+        if (resize_hpt_err) {
+            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
+            error_free(resize_hpt_err);
+            resize_hpt_err = NULL;
+        } else {
+            spapr->resize_hpt = smc->resize_hpt_default;
+        }
+    }
+
+    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
+
+    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
+        error_report_err(resize_hpt_err);
+        exit(1);
+    }
+
     /* Allocate RMA if necessary */
     rma_alloc_size = kvmppc_alloc_rma(&rma);
 
@@ -2236,6 +2261,40 @@  static void spapr_set_modern_hotplug_events(Object *obj, bool value,
     spapr->use_hotplug_event_source = value;
 }
 
+static char *spapr_get_resize_hpt(Object *obj, Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    switch (spapr->resize_hpt) {
+    case SPAPR_RESIZE_HPT_DEFAULT:
+        return g_strdup("default");
+    case SPAPR_RESIZE_HPT_DISABLED:
+        return g_strdup("disabled");
+    case SPAPR_RESIZE_HPT_ENABLED:
+        return g_strdup("enabled");
+    case SPAPR_RESIZE_HPT_REQUIRED:
+        return g_strdup("required");
+    }
+    assert(0);
+}
+
+static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    if (strcmp(value, "default") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
+    } else if (strcmp(value, "disabled") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
+    } else if (strcmp(value, "enabled") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
+    } else if (strcmp(value, "required") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
+    } else {
+        error_setg(errp, "Bad value for \"resize-hpt\" property");
+    }
+}
+
 static void spapr_machine_initfn(Object *obj)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
@@ -2256,6 +2315,12 @@  static void spapr_machine_initfn(Object *obj)
                                     " place of standard EPOW events when possible"
                                     " (required for memory hot-unplug support)",
                                     NULL);
+
+    object_property_add_str(obj, "resize-hpt",
+                            spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
+    object_property_set_description(obj, "resize-hpt",
+                                    "Resizing of the Hash Page Table (enabled, disabled, required)",
+                                    NULL);
 }
 
 static void spapr_machine_finalizefn(Object *obj)
@@ -2707,6 +2772,7 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
 
     smc->dr_lmb_enabled = true;
     smc->tcg_default_cpu = "POWER8";
+    smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
     mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
     fwc->get_dev_path = spapr_get_fw_dev_path;
     nc->nmi_monitor_handler = spapr_nmi;
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index fd9f1d4..72a9c4d 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -355,6 +355,38 @@  static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     return H_SUCCESS;
 }
 
+static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
+                                         sPAPRMachineState *spapr,
+                                         target_ulong opcode,
+                                         target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong shift = args[1];
+
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+        return H_AUTHORITY;
+    }
+
+    trace_spapr_h_resize_hpt_prepare(flags, shift);
+    return H_HARDWARE;
+}
+
+static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
+                                        sPAPRMachineState *spapr,
+                                        target_ulong opcode,
+                                        target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong shift = args[1];
+
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+        return H_AUTHORITY;
+    }
+
+    trace_spapr_h_resize_hpt_commit(flags, shift);
+    return H_HARDWARE;
+}
+
 static target_ulong h_set_sprg0(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                 target_ulong opcode, target_ulong *args)
 {
@@ -1134,6 +1166,10 @@  static void hypercall_register_types(void)
     /* hcall-bulk */
     spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
 
+    /* hcall-hpt-resize */
+    spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
+    spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit);
+
     /* hcall-splpar */
     spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
     spapr_register_hypercall(H_CEDE, h_cede);
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index 2297ead..bf59a8f 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -16,6 +16,8 @@  spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes"
 # hw/ppc/spapr_hcall.c
 spapr_cas_pvr_try(uint32_t pvr) "%x"
 spapr_cas_pvr(uint32_t cur_pvr, bool cpu_match, uint32_t new_pvr, uint64_t pcr) "current=%x, cpu_match=%u, new=%x, compat flags=%"PRIx64
+spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
+spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
 
 # hw/ppc/spapr_iommu.c
 spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index a2d8964..d2c758b 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -31,6 +31,13 @@  typedef struct sPAPRMachineState sPAPRMachineState;
 #define SPAPR_MACHINE_CLASS(klass) \
     OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE)
 
+typedef enum {
+    SPAPR_RESIZE_HPT_DEFAULT = 0,
+    SPAPR_RESIZE_HPT_DISABLED,
+    SPAPR_RESIZE_HPT_ENABLED,
+    SPAPR_RESIZE_HPT_REQUIRED,
+} sPAPRResizeHPT;
+
 /**
  * sPAPRMachineClass:
  */
@@ -46,6 +53,7 @@  struct sPAPRMachineClass {
                           uint64_t *buid, hwaddr *pio, 
                           hwaddr *mmio32, hwaddr *mmio64,
                           unsigned n_dma, uint32_t *liobns, Error **errp);
+    sPAPRResizeHPT resize_hpt_default;
 };
 
 /**
@@ -61,6 +69,7 @@  struct sPAPRMachineState {
     XICSState *xics;
     DeviceState *rtc;
 
+    sPAPRResizeHPT resize_hpt;
     void *htab;
     uint32_t htab_shift;
     hwaddr rma_size;
@@ -347,6 +356,8 @@  struct sPAPRMachineState {
 #define H_XIRR_X                0x2FC
 #define H_RANDOM                0x300
 #define H_SET_MODE              0x31C
+#define H_RESIZE_HPT_PREPARE    0x36C
+#define H_RESIZE_HPT_COMMIT     0x370
 #define H_SIGNAL_SYS_RESET      0x380
 #define MAX_HCALL_OPCODE        H_SIGNAL_SYS_RESET
 
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 15e12f3..dcd8cef 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -22,6 +22,7 @@ 
 #include <linux/kvm.h>
 
 #include "qemu-common.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "cpu.h"
 #include "qemu/timer.h"
@@ -2269,6 +2270,7 @@  int kvmppc_reset_htab(int shift_hint)
         /* Full emulation, tell caller to allocate htab itself */
         return 0;
     }
+
     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
         int ret;
         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
@@ -2672,3 +2674,27 @@  int kvmppc_enable_hwrng(void)
 
     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
 }
+
+void kvmppc_check_papr_resize_hpt(Error **errp)
+{
+    if (!kvm_enabled()) {
+        return;
+    }
+
+    /* TODO: Check specific capabilities for HPT resize aware host kernels */
+
+    /*
+     * It's tempting to try to check directly if the HPT is under our
+     * control or KVM's, which is what's really relevant here.
+     * Unfortunately, in order to correctly size the HPT, we need to
+     * know if we can do resizing, _before_ we attempt to allocate it
+     * with KVM.  Before that point, we don't officially know whether
+     * we'll control the HPT or not.  So we have to use a fallback
+     * test for PR vs HV KVM to predict that.
+     */
+    if (kvmppc_is_pr(kvm_state)) {
+        return;
+    }
+
+    error_setg(errp, "Hash page table resizing not available with this KVM version");
+}
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 841a29b..3e852ba 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -59,6 +59,7 @@  bool kvmppc_has_cap_htm(void);
 int kvmppc_enable_hwrng(void);
 int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
+void kvmppc_check_papr_resize_hpt(Error **errp);
 
 #else
 
@@ -270,6 +271,10 @@  static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
     return NULL;
 }
 
+static inline void kvmppc_check_papr_resize_hpt(Error **errp)
+{
+    return;
+}
 #endif
 
 #ifndef CONFIG_KVM