diff mbox series

spapr: Improve handling of memory unplug with old guests

Message ID 161012708715.801107.11418801796987916516.stgit@bahia.lan
State New
Headers show
Series spapr: Improve handling of memory unplug with old guests | expand

Commit Message

Greg Kurz Jan. 8, 2021, 5:31 p.m. UTC
Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
trying to unplug memory from a guest that doesn't support it (eg. rhel6)
no longer generates an error like it used to. Instead, it leaves the
memory around : only a subsequent reboot or manual use of drmgr within
the guest can complete the hot-unplug sequence. A flag was added to
SpaprMachineClass so that this new behavior only applies to the default
machine type.

We can do better. CAS processes all pending hot-unplug requests. This
means that we don't really care about what the guest supports if
the hot-unplug request happens before CAS.

All guests that we care for, even old ones, set enough bits in OV5
that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a
heuristic to decide if CAS has already occured or not.

Always accept unplug requests that happen before CAS since CAS will
process them. Restore the previous behavior of rejecting them after
CAS when we know that the guest doesn't support memory hot-unplug.

This behavior is suitable for all machine types : this allows to
drop the pre_6_0_memory_unplug flag.

Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
Signed-off-by: Greg Kurz <groug@kaod.org>
---
 hw/ppc/spapr.c              |   24 +++++++++++++-----------
 hw/ppc/spapr_events.c       |    3 +--
 hw/ppc/spapr_ovec.c         |    7 +++++++
 include/hw/ppc/spapr.h      |    2 +-
 include/hw/ppc/spapr_ovec.h |    1 +
 5 files changed, 23 insertions(+), 14 deletions(-)

Comments

Daniel Henrique Barboza Jan. 9, 2021, 12:11 a.m. UTC | #1
On 1/8/21 2:31 PM, Greg Kurz wrote:
> Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> trying to unplug memory from a guest that doesn't support it (eg. rhel6)
> no longer generates an error like it used to. Instead, it leaves the
> memory around : only a subsequent reboot or manual use of drmgr within
> the guest can complete the hot-unplug sequence. A flag was added to
> SpaprMachineClass so that this new behavior only applies to the default
> machine type.
> 
> We can do better. CAS processes all pending hot-unplug requests. This
> means that we don't really care about what the guest supports if
> the hot-unplug request happens before CAS.
> 
> All guests that we care for, even old ones, set enough bits in OV5
> that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a
> heuristic to decide if CAS has already occured or not.
> 
> Always accept unplug requests that happen before CAS since CAS will
> process them. Restore the previous behavior of rejecting them after
> CAS when we know that the guest doesn't support memory hot-unplug.
> 
> This behavior is suitable for all machine types : this allows to
> drop the pre_6_0_memory_unplug flag.
> 
> Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---

Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>

>   hw/ppc/spapr.c              |   24 +++++++++++++-----------
>   hw/ppc/spapr_events.c       |    3 +--
>   hw/ppc/spapr_ovec.c         |    7 +++++++
>   include/hw/ppc/spapr.h      |    2 +-
>   include/hw/ppc/spapr_ovec.h |    1 +
>   5 files changed, 23 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 2c403b574e37..6c47466fc2f1 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4048,6 +4048,18 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
>       }
>   }
>   
> +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr)
> +{
> +    return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) ||
> +        /*
> +         * CAS will process all pending unplug requests.
> +         *
> +         * HACK: a guest could theoretically have cleared all bits in OV5,
> +         * but none of the guests we care for do.
> +         */
> +        spapr_ovec_empty(spapr->ov5_cas);
> +}
> +
>   static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
>                                                   DeviceState *dev, Error **errp)
>   {
> @@ -4056,16 +4068,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
>       SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
>   
>       if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> -        if (!smc->pre_6_0_memory_unplug ||
> -            spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
> +        if (spapr_memory_hot_unplug_supported(sms)) {
>               spapr_memory_unplug_request(hotplug_dev, dev, errp);
>           } else {
> -            /* NOTE: this means there is a window after guest reset, prior to
> -             * CAS negotiation, where unplug requests will fail due to the
> -             * capability not being detected yet. This is a bit different than
> -             * the case with PCI unplug, where the events will be queued and
> -             * eventually handled by the guest after boot
> -             */
>               error_setg(errp, "Memory hot unplug not supported for this guest");
>           }
>       } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
> @@ -4543,11 +4548,8 @@ DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
>    */
>   static void spapr_machine_5_2_class_options(MachineClass *mc)
>   {
> -    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> -
>       spapr_machine_6_0_class_options(mc);
>       compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
> -    smc->pre_6_0_memory_unplug = true;
>   }
>   
>   DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 6aedd988b3d0..d51daedfa6e0 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -658,8 +658,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>           /* we should not be using count_indexed value unless the guest
>            * supports dedicated hotplug event source
>            */
> -        g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug ||
> -                 spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT));
> +        g_assert(spapr_memory_hot_unplug_supported(spapr));
>           hp->drc_id.count_indexed.count =
>               cpu_to_be32(drc_id->count_indexed.count);
>           hp->drc_id.count_indexed.index =
> diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
> index dd003f1763fd..b2567caa5cf4 100644
> --- a/hw/ppc/spapr_ovec.c
> +++ b/hw/ppc/spapr_ovec.c
> @@ -125,6 +125,13 @@ bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr)
>       return test_bit(bitnr, ov->bitmap) ? true : false;
>   }
>   
> +bool spapr_ovec_empty(SpaprOptionVector *ov)
> +{
> +    g_assert(ov);
> +
> +    return bitmap_empty(ov->bitmap, OV_MAXBITS);
> +}
> +
>   static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
>                                    long bitmap_offset)
>   {
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 1cc19575f548..3ad2ff713279 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -142,7 +142,6 @@ struct SpaprMachineClass {
>       hwaddr rma_limit;          /* clamp the RMA to this size */
>       bool pre_5_1_assoc_refpoints;
>       bool pre_5_2_numa_associativity;
> -    bool pre_6_0_memory_unplug;
>   
>       bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
>                             uint64_t *buid, hwaddr *pio,
> @@ -950,4 +949,5 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>   
>   void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>   hwaddr spapr_get_rtas_addr(void);
> +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
>   #endif /* HW_SPAPR_H */
> diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
> index d4dee9e06a01..48b716a060c2 100644
> --- a/include/hw/ppc/spapr_ovec.h
> +++ b/include/hw/ppc/spapr_ovec.h
> @@ -71,6 +71,7 @@ void spapr_ovec_cleanup(SpaprOptionVector *ov);
>   void spapr_ovec_set(SpaprOptionVector *ov, long bitnr);
>   void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr);
>   bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr);
> +bool spapr_ovec_empty(SpaprOptionVector *ov);
>   SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector);
>   int spapr_dt_ovec(void *fdt, int fdt_offset,
>                     SpaprOptionVector *ov, const char *name);
> 
> 
>
David Gibson Jan. 13, 2021, 1:20 a.m. UTC | #2
On Fri, Jan 08, 2021 at 06:31:27PM +0100, Greg Kurz wrote:
> Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> trying to unplug memory from a guest that doesn't support it (eg. rhel6)
> no longer generates an error like it used to. Instead, it leaves the
> memory around : only a subsequent reboot or manual use of drmgr within
> the guest can complete the hot-unplug sequence. A flag was added to
> SpaprMachineClass so that this new behavior only applies to the default
> machine type.
> 
> We can do better. CAS processes all pending hot-unplug requests. This
> means that we don't really care about what the guest supports if
> the hot-unplug request happens before CAS.
> 
> All guests that we care for, even old ones, set enough bits in OV5
> that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a
> heuristic to decide if CAS has already occured or not.
> 
> Always accept unplug requests that happen before CAS since CAS will
> process them. Restore the previous behavior of rejecting them after
> CAS when we know that the guest doesn't support memory hot-unplug.
> 
> This behavior is suitable for all machine types : this allows to
> drop the pre_6_0_memory_unplug flag.
> 
> Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> Signed-off-by: Greg Kurz <groug@kaod.org>

Applied, sorry it too me so long.

> ---
>  hw/ppc/spapr.c              |   24 +++++++++++++-----------
>  hw/ppc/spapr_events.c       |    3 +--
>  hw/ppc/spapr_ovec.c         |    7 +++++++
>  include/hw/ppc/spapr.h      |    2 +-
>  include/hw/ppc/spapr_ovec.h |    1 +
>  5 files changed, 23 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 2c403b574e37..6c47466fc2f1 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4048,6 +4048,18 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
>      }
>  }
>  
> +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr)
> +{
> +    return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) ||
> +        /*
> +         * CAS will process all pending unplug requests.
> +         *
> +         * HACK: a guest could theoretically have cleared all bits in OV5,
> +         * but none of the guests we care for do.
> +         */

Hrm.  This is pretty ugly - I thought we had a better canonical way of
determining if CAS had already happened this boot, but it appears
not.  I don't want to delay this patch, since it is an important fix,
but it would be nice if you could do a later cleanup to have a nicer
way of detecting CAS-hasn't-happened.

> +        spapr_ovec_empty(spapr->ov5_cas);
> +}
> +
>  static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
>                                                  DeviceState *dev, Error **errp)
>  {
> @@ -4056,16 +4068,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
>      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
>  
>      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> -        if (!smc->pre_6_0_memory_unplug ||
> -            spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
> +        if (spapr_memory_hot_unplug_supported(sms)) {
>              spapr_memory_unplug_request(hotplug_dev, dev, errp);
>          } else {
> -            /* NOTE: this means there is a window after guest reset, prior to
> -             * CAS negotiation, where unplug requests will fail due to the
> -             * capability not being detected yet. This is a bit different than
> -             * the case with PCI unplug, where the events will be queued and
> -             * eventually handled by the guest after boot
> -             */
>              error_setg(errp, "Memory hot unplug not supported for this guest");
>          }
>      } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
> @@ -4543,11 +4548,8 @@ DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
>   */
>  static void spapr_machine_5_2_class_options(MachineClass *mc)
>  {
> -    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> -
>      spapr_machine_6_0_class_options(mc);
>      compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
> -    smc->pre_6_0_memory_unplug = true;
>  }
>  
>  DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 6aedd988b3d0..d51daedfa6e0 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -658,8 +658,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>          /* we should not be using count_indexed value unless the guest
>           * supports dedicated hotplug event source
>           */
> -        g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug ||
> -                 spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT));
> +        g_assert(spapr_memory_hot_unplug_supported(spapr));
>          hp->drc_id.count_indexed.count =
>              cpu_to_be32(drc_id->count_indexed.count);
>          hp->drc_id.count_indexed.index =
> diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
> index dd003f1763fd..b2567caa5cf4 100644
> --- a/hw/ppc/spapr_ovec.c
> +++ b/hw/ppc/spapr_ovec.c
> @@ -125,6 +125,13 @@ bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr)
>      return test_bit(bitnr, ov->bitmap) ? true : false;
>  }
>  
> +bool spapr_ovec_empty(SpaprOptionVector *ov)
> +{
> +    g_assert(ov);
> +
> +    return bitmap_empty(ov->bitmap, OV_MAXBITS);
> +}
> +
>  static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
>                                   long bitmap_offset)
>  {
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 1cc19575f548..3ad2ff713279 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -142,7 +142,6 @@ struct SpaprMachineClass {
>      hwaddr rma_limit;          /* clamp the RMA to this size */
>      bool pre_5_1_assoc_refpoints;
>      bool pre_5_2_numa_associativity;
> -    bool pre_6_0_memory_unplug;
>  
>      bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
>                            uint64_t *buid, hwaddr *pio, 
> @@ -950,4 +949,5 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>  
>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>  hwaddr spapr_get_rtas_addr(void);
> +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
>  #endif /* HW_SPAPR_H */
> diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
> index d4dee9e06a01..48b716a060c2 100644
> --- a/include/hw/ppc/spapr_ovec.h
> +++ b/include/hw/ppc/spapr_ovec.h
> @@ -71,6 +71,7 @@ void spapr_ovec_cleanup(SpaprOptionVector *ov);
>  void spapr_ovec_set(SpaprOptionVector *ov, long bitnr);
>  void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr);
>  bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr);
> +bool spapr_ovec_empty(SpaprOptionVector *ov);
>  SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector);
>  int spapr_dt_ovec(void *fdt, int fdt_offset,
>                    SpaprOptionVector *ov, const char *name);
> 
>
Greg Kurz Jan. 13, 2021, 5:01 p.m. UTC | #3
On Wed, 13 Jan 2021 12:20:58 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Jan 08, 2021 at 06:31:27PM +0100, Greg Kurz wrote:
> > Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> > trying to unplug memory from a guest that doesn't support it (eg. rhel6)
> > no longer generates an error like it used to. Instead, it leaves the
> > memory around : only a subsequent reboot or manual use of drmgr within
> > the guest can complete the hot-unplug sequence. A flag was added to
> > SpaprMachineClass so that this new behavior only applies to the default
> > machine type.
> > 
> > We can do better. CAS processes all pending hot-unplug requests. This
> > means that we don't really care about what the guest supports if
> > the hot-unplug request happens before CAS.
> > 
> > All guests that we care for, even old ones, set enough bits in OV5
> > that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a
> > heuristic to decide if CAS has already occured or not.
> > 
> > Always accept unplug requests that happen before CAS since CAS will
> > process them. Restore the previous behavior of rejecting them after
> > CAS when we know that the guest doesn't support memory hot-unplug.
> > 
> > This behavior is suitable for all machine types : this allows to
> > drop the pre_6_0_memory_unplug flag.
> > 
> > Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> 
> Applied, sorry it too me so long.
> 

No problem. Any estimate for your next PR ?

> > ---
> >  hw/ppc/spapr.c              |   24 +++++++++++++-----------
> >  hw/ppc/spapr_events.c       |    3 +--
> >  hw/ppc/spapr_ovec.c         |    7 +++++++
> >  include/hw/ppc/spapr.h      |    2 +-
> >  include/hw/ppc/spapr_ovec.h |    1 +
> >  5 files changed, 23 insertions(+), 14 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 2c403b574e37..6c47466fc2f1 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -4048,6 +4048,18 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
> >      }
> >  }
> >  
> > +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr)
> > +{
> > +    return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) ||
> > +        /*
> > +         * CAS will process all pending unplug requests.
> > +         *
> > +         * HACK: a guest could theoretically have cleared all bits in OV5,
> > +         * but none of the guests we care for do.
> > +         */
> 
> Hrm.  This is pretty ugly - I thought we had a better canonical way of
> determining if CAS had already happened this boot, but it appears
> not.  I don't want to delay this patch, since it is an important fix,
> but it would be nice if you could do a later cleanup to have a nicer
> way of detecting CAS-hasn't-happened.
> 

Yeah, I fully agree this is ugly. I'll try to find something nicer later.

Thanks for taking it anyway !

Cheers,

--
Greg

> > +        spapr_ovec_empty(spapr->ov5_cas);
> > +}
> > +
> >  static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
> >                                                  DeviceState *dev, Error **errp)
> >  {
> > @@ -4056,16 +4068,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
> >      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> >  
> >      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> > -        if (!smc->pre_6_0_memory_unplug ||
> > -            spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
> > +        if (spapr_memory_hot_unplug_supported(sms)) {
> >              spapr_memory_unplug_request(hotplug_dev, dev, errp);
> >          } else {
> > -            /* NOTE: this means there is a window after guest reset, prior to
> > -             * CAS negotiation, where unplug requests will fail due to the
> > -             * capability not being detected yet. This is a bit different than
> > -             * the case with PCI unplug, where the events will be queued and
> > -             * eventually handled by the guest after boot
> > -             */
> >              error_setg(errp, "Memory hot unplug not supported for this guest");
> >          }
> >      } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
> > @@ -4543,11 +4548,8 @@ DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
> >   */
> >  static void spapr_machine_5_2_class_options(MachineClass *mc)
> >  {
> > -    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> > -
> >      spapr_machine_6_0_class_options(mc);
> >      compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
> > -    smc->pre_6_0_memory_unplug = true;
> >  }
> >  
> >  DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
> > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > index 6aedd988b3d0..d51daedfa6e0 100644
> > --- a/hw/ppc/spapr_events.c
> > +++ b/hw/ppc/spapr_events.c
> > @@ -658,8 +658,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
> >          /* we should not be using count_indexed value unless the guest
> >           * supports dedicated hotplug event source
> >           */
> > -        g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug ||
> > -                 spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT));
> > +        g_assert(spapr_memory_hot_unplug_supported(spapr));
> >          hp->drc_id.count_indexed.count =
> >              cpu_to_be32(drc_id->count_indexed.count);
> >          hp->drc_id.count_indexed.index =
> > diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
> > index dd003f1763fd..b2567caa5cf4 100644
> > --- a/hw/ppc/spapr_ovec.c
> > +++ b/hw/ppc/spapr_ovec.c
> > @@ -125,6 +125,13 @@ bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr)
> >      return test_bit(bitnr, ov->bitmap) ? true : false;
> >  }
> >  
> > +bool spapr_ovec_empty(SpaprOptionVector *ov)
> > +{
> > +    g_assert(ov);
> > +
> > +    return bitmap_empty(ov->bitmap, OV_MAXBITS);
> > +}
> > +
> >  static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
> >                                   long bitmap_offset)
> >  {
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 1cc19575f548..3ad2ff713279 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -142,7 +142,6 @@ struct SpaprMachineClass {
> >      hwaddr rma_limit;          /* clamp the RMA to this size */
> >      bool pre_5_1_assoc_refpoints;
> >      bool pre_5_2_numa_associativity;
> > -    bool pre_6_0_memory_unplug;
> >  
> >      bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
> >                            uint64_t *buid, hwaddr *pio, 
> > @@ -950,4 +949,5 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
> >  
> >  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
> >  hwaddr spapr_get_rtas_addr(void);
> > +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
> >  #endif /* HW_SPAPR_H */
> > diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
> > index d4dee9e06a01..48b716a060c2 100644
> > --- a/include/hw/ppc/spapr_ovec.h
> > +++ b/include/hw/ppc/spapr_ovec.h
> > @@ -71,6 +71,7 @@ void spapr_ovec_cleanup(SpaprOptionVector *ov);
> >  void spapr_ovec_set(SpaprOptionVector *ov, long bitnr);
> >  void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr);
> >  bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr);
> > +bool spapr_ovec_empty(SpaprOptionVector *ov);
> >  SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector);
> >  int spapr_dt_ovec(void *fdt, int fdt_offset,
> >                    SpaprOptionVector *ov, const char *name);
> > 
> > 
>
David Gibson Jan. 18, 2021, 6:31 a.m. UTC | #4
On Wed, Jan 13, 2021 at 06:01:27PM +0100, Greg Kurz wrote:
> On Wed, 13 Jan 2021 12:20:58 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Fri, Jan 08, 2021 at 06:31:27PM +0100, Greg Kurz wrote:
> > > Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> > > trying to unplug memory from a guest that doesn't support it (eg. rhel6)
> > > no longer generates an error like it used to. Instead, it leaves the
> > > memory around : only a subsequent reboot or manual use of drmgr within
> > > the guest can complete the hot-unplug sequence. A flag was added to
> > > SpaprMachineClass so that this new behavior only applies to the default
> > > machine type.
> > > 
> > > We can do better. CAS processes all pending hot-unplug requests. This
> > > means that we don't really care about what the guest supports if
> > > the hot-unplug request happens before CAS.
> > > 
> > > All guests that we care for, even old ones, set enough bits in OV5
> > > that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a
> > > heuristic to decide if CAS has already occured or not.
> > > 
> > > Always accept unplug requests that happen before CAS since CAS will
> > > process them. Restore the previous behavior of rejecting them after
> > > CAS when we know that the guest doesn't support memory hot-unplug.
> > > 
> > > This behavior is suitable for all machine types : this allows to
> > > drop the pre_6_0_memory_unplug flag.
> > > 
> > > Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > 
> > Applied, sorry it too me so long.
> > 
> 
> No problem. Any estimate for your next PR ?

Intending to do it tomorrow (Tuesday 19th).

> 
> > > ---
> > >  hw/ppc/spapr.c              |   24 +++++++++++++-----------
> > >  hw/ppc/spapr_events.c       |    3 +--
> > >  hw/ppc/spapr_ovec.c         |    7 +++++++
> > >  include/hw/ppc/spapr.h      |    2 +-
> > >  include/hw/ppc/spapr_ovec.h |    1 +
> > >  5 files changed, 23 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 2c403b574e37..6c47466fc2f1 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -4048,6 +4048,18 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
> > >      }
> > >  }
> > >  
> > > +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr)
> > > +{
> > > +    return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) ||
> > > +        /*
> > > +         * CAS will process all pending unplug requests.
> > > +         *
> > > +         * HACK: a guest could theoretically have cleared all bits in OV5,
> > > +         * but none of the guests we care for do.
> > > +         */
> > 
> > Hrm.  This is pretty ugly - I thought we had a better canonical way of
> > determining if CAS had already happened this boot, but it appears
> > not.  I don't want to delay this patch, since it is an important fix,
> > but it would be nice if you could do a later cleanup to have a nicer
> > way of detecting CAS-hasn't-happened.
> > 
> 
> Yeah, I fully agree this is ugly. I'll try to find something nicer later.
> 
> Thanks for taking it anyway !
> 
> Cheers,
>
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2c403b574e37..6c47466fc2f1 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4048,6 +4048,18 @@  static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
     }
 }
 
+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr)
+{
+    return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) ||
+        /*
+         * CAS will process all pending unplug requests.
+         *
+         * HACK: a guest could theoretically have cleared all bits in OV5,
+         * but none of the guests we care for do.
+         */
+        spapr_ovec_empty(spapr->ov5_cas);
+}
+
 static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
                                                 DeviceState *dev, Error **errp)
 {
@@ -4056,16 +4068,9 @@  static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
     SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
 
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        if (!smc->pre_6_0_memory_unplug ||
-            spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
+        if (spapr_memory_hot_unplug_supported(sms)) {
             spapr_memory_unplug_request(hotplug_dev, dev, errp);
         } else {
-            /* NOTE: this means there is a window after guest reset, prior to
-             * CAS negotiation, where unplug requests will fail due to the
-             * capability not being detected yet. This is a bit different than
-             * the case with PCI unplug, where the events will be queued and
-             * eventually handled by the guest after boot
-             */
             error_setg(errp, "Memory hot unplug not supported for this guest");
         }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
@@ -4543,11 +4548,8 @@  DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
  */
 static void spapr_machine_5_2_class_options(MachineClass *mc)
 {
-    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
-
     spapr_machine_6_0_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
-    smc->pre_6_0_memory_unplug = true;
 }
 
 DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 6aedd988b3d0..d51daedfa6e0 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -658,8 +658,7 @@  static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
         /* we should not be using count_indexed value unless the guest
          * supports dedicated hotplug event source
          */
-        g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug ||
-                 spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT));
+        g_assert(spapr_memory_hot_unplug_supported(spapr));
         hp->drc_id.count_indexed.count =
             cpu_to_be32(drc_id->count_indexed.count);
         hp->drc_id.count_indexed.index =
diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
index dd003f1763fd..b2567caa5cf4 100644
--- a/hw/ppc/spapr_ovec.c
+++ b/hw/ppc/spapr_ovec.c
@@ -125,6 +125,13 @@  bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr)
     return test_bit(bitnr, ov->bitmap) ? true : false;
 }
 
+bool spapr_ovec_empty(SpaprOptionVector *ov)
+{
+    g_assert(ov);
+
+    return bitmap_empty(ov->bitmap, OV_MAXBITS);
+}
+
 static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
                                  long bitmap_offset)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 1cc19575f548..3ad2ff713279 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -142,7 +142,6 @@  struct SpaprMachineClass {
     hwaddr rma_limit;          /* clamp the RMA to this size */
     bool pre_5_1_assoc_refpoints;
     bool pre_5_2_numa_associativity;
-    bool pre_6_0_memory_unplug;
 
     bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
                           uint64_t *buid, hwaddr *pio, 
@@ -950,4 +949,5 @@  bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
 
 void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
 hwaddr spapr_get_rtas_addr(void);
+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
 #endif /* HW_SPAPR_H */
diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
index d4dee9e06a01..48b716a060c2 100644
--- a/include/hw/ppc/spapr_ovec.h
+++ b/include/hw/ppc/spapr_ovec.h
@@ -71,6 +71,7 @@  void spapr_ovec_cleanup(SpaprOptionVector *ov);
 void spapr_ovec_set(SpaprOptionVector *ov, long bitnr);
 void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr);
 bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr);
+bool spapr_ovec_empty(SpaprOptionVector *ov);
 SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector);
 int spapr_dt_ovec(void *fdt, int fdt_offset,
                   SpaprOptionVector *ov, const char *name);