diff mbox series

[2/6] spapr: Fix reset of transient DR connectors

Message ID 20201218103400.689660-3-groug@kaod.org
State New
Headers show
Series spapr: Fix visibility and traversal of DR connectors | expand

Commit Message

Greg Kurz Dec. 18, 2020, 10:33 a.m. UTC
Documentation of object_property_iter_init() clearly stipulates that
"it is forbidden to modify the property list while iterating". But this
is exactly what we do when resetting transient DR connectors during CAS.
The call to spapr_drc_reset() can finalize the hot-unplug sequence of a
PHB or a PCI bridge, both of which will then in turn destroy their PCI
DRCs. This could potentially invalidate the iterator. It is pure luck
that this haven't caused any issues so far.

Change spapr_drc_reset() to return true if it caused a device to be
removed. Restart from scratch in this case. This can potentially
increase the overall DRC reset time, especially with a high maxmem
which generates a lot of LMB DRCs. But this kind of setup is rare,
and so is the use case of rebooting a guest while doing hot-unplug.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 include/hw/ppc/spapr_drc.h | 3 ++-
 hw/ppc/spapr_drc.c         | 6 +++++-
 hw/ppc/spapr_hcall.c       | 8 +++++++-
 3 files changed, 14 insertions(+), 3 deletions(-)

Comments

Daniel Henrique Barboza Dec. 21, 2020, 8:34 p.m. UTC | #1
On 12/18/20 7:33 AM, Greg Kurz wrote:
> Documentation of object_property_iter_init() clearly stipulates that
> "it is forbidden to modify the property list while iterating". But this
> is exactly what we do when resetting transient DR connectors during CAS.
> The call to spapr_drc_reset() can finalize the hot-unplug sequence of a
> PHB or a PCI bridge, both of which will then in turn destroy their PCI
> DRCs. This could potentially invalidate the iterator. It is pure luck
> that this haven't caused any issues so far.
> 
> Change spapr_drc_reset() to return true if it caused a device to be
> removed. Restart from scratch in this case. This can potentially
> increase the overall DRC reset time, especially with a high maxmem
> which generates a lot of LMB DRCs. But this kind of setup is rare,
> and so is the use case of rebooting a guest while doing hot-unplug.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---

Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>

>   include/hw/ppc/spapr_drc.h | 3 ++-
>   hw/ppc/spapr_drc.c         | 6 +++++-
>   hw/ppc/spapr_hcall.c       | 8 +++++++-
>   3 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> index cff5e707d0d9..5d80019f82e2 100644
> --- a/include/hw/ppc/spapr_drc.h
> +++ b/include/hw/ppc/spapr_drc.h
> @@ -224,7 +224,8 @@ static inline bool spapr_drc_hotplugged(DeviceState *dev)
>       return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE);
>   }
>   
> -void spapr_drc_reset(SpaprDrc *drc);
> +/* Returns true if an unplug request completed */
> +bool spapr_drc_reset(SpaprDrc *drc);
>   
>   uint32_t spapr_drc_index(SpaprDrc *drc);
>   SpaprDrcType spapr_drc_type(SpaprDrc *drc);
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 8d62f55066b6..5b5e2ac58a7e 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -417,9 +417,10 @@ void spapr_drc_detach(SpaprDrc *drc)
>       spapr_drc_release(drc);
>   }
>   
> -void spapr_drc_reset(SpaprDrc *drc)
> +bool spapr_drc_reset(SpaprDrc *drc)
>   {
>       SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    bool unplug_completed = false;
>   
>       trace_spapr_drc_reset(spapr_drc_index(drc));
>   
> @@ -428,6 +429,7 @@ void spapr_drc_reset(SpaprDrc *drc)
>        */
>       if (drc->unplug_requested) {
>           spapr_drc_release(drc);
> +        unplug_completed = true;
>       }
>   
>       if (drc->dev) {
> @@ -444,6 +446,8 @@ void spapr_drc_reset(SpaprDrc *drc)
>           drc->ccs_offset = -1;
>           drc->ccs_depth = -1;
>       }
> +
> +    return unplug_completed;
>   }
>   
>   static bool spapr_drc_unplug_requested_needed(void *opaque)
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 4e9d50c254f0..aa22830ac4bd 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1639,6 +1639,7 @@ static void spapr_handle_transient_dev_before_cas(SpaprMachineState *spapr)
>       ObjectPropertyIterator iter;
>   
>       drc_container = container_get(object_get_root(), "/dr-connector");
> +restart:
>       object_property_iter_init(&iter, drc_container);
>       while ((prop = object_property_iter_next(&iter))) {
>           SpaprDrc *drc;
> @@ -1652,8 +1653,13 @@ static void spapr_handle_transient_dev_before_cas(SpaprMachineState *spapr)
>   
>           /*
>            * This will complete any pending plug/unplug requests.
> +         * In case of a unplugged PHB or PCI bridge, this will
> +         * cause some DRCs to be destroyed and thus potentially
> +         * invalidate the iterator.
>            */
> -        spapr_drc_reset(drc);
> +        if (spapr_drc_reset(drc)) {
> +            goto restart;
> +        }
>       }
>   
>       spapr_clear_pending_hotplug_events(spapr);
>
David Gibson Dec. 28, 2020, 7:24 a.m. UTC | #2
On Fri, Dec 18, 2020 at 11:33:56AM +0100, Greg Kurz wrote:
> Documentation of object_property_iter_init() clearly stipulates that
> "it is forbidden to modify the property list while iterating". But this
> is exactly what we do when resetting transient DR connectors during CAS.
> The call to spapr_drc_reset() can finalize the hot-unplug sequence of a
> PHB or a PCI bridge, both of which will then in turn destroy their PCI
> DRCs. This could potentially invalidate the iterator. It is pure luck
> that this haven't caused any issues so far.
> 
> Change spapr_drc_reset() to return true if it caused a device to be
> removed. Restart from scratch in this case. This can potentially
> increase the overall DRC reset time, especially with a high maxmem
> which generates a lot of LMB DRCs. But this kind of setup is rare,
> and so is the use case of rebooting a guest while doing hot-unplug.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>

Applied, thanks.

> ---
>  include/hw/ppc/spapr_drc.h | 3 ++-
>  hw/ppc/spapr_drc.c         | 6 +++++-
>  hw/ppc/spapr_hcall.c       | 8 +++++++-
>  3 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> index cff5e707d0d9..5d80019f82e2 100644
> --- a/include/hw/ppc/spapr_drc.h
> +++ b/include/hw/ppc/spapr_drc.h
> @@ -224,7 +224,8 @@ static inline bool spapr_drc_hotplugged(DeviceState *dev)
>      return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE);
>  }
>  
> -void spapr_drc_reset(SpaprDrc *drc);
> +/* Returns true if an unplug request completed */
> +bool spapr_drc_reset(SpaprDrc *drc);
>  
>  uint32_t spapr_drc_index(SpaprDrc *drc);
>  SpaprDrcType spapr_drc_type(SpaprDrc *drc);
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 8d62f55066b6..5b5e2ac58a7e 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -417,9 +417,10 @@ void spapr_drc_detach(SpaprDrc *drc)
>      spapr_drc_release(drc);
>  }
>  
> -void spapr_drc_reset(SpaprDrc *drc)
> +bool spapr_drc_reset(SpaprDrc *drc)
>  {
>      SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    bool unplug_completed = false;
>  
>      trace_spapr_drc_reset(spapr_drc_index(drc));
>  
> @@ -428,6 +429,7 @@ void spapr_drc_reset(SpaprDrc *drc)
>       */
>      if (drc->unplug_requested) {
>          spapr_drc_release(drc);
> +        unplug_completed = true;
>      }
>  
>      if (drc->dev) {
> @@ -444,6 +446,8 @@ void spapr_drc_reset(SpaprDrc *drc)
>          drc->ccs_offset = -1;
>          drc->ccs_depth = -1;
>      }
> +
> +    return unplug_completed;
>  }
>  
>  static bool spapr_drc_unplug_requested_needed(void *opaque)
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 4e9d50c254f0..aa22830ac4bd 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1639,6 +1639,7 @@ static void spapr_handle_transient_dev_before_cas(SpaprMachineState *spapr)
>      ObjectPropertyIterator iter;
>  
>      drc_container = container_get(object_get_root(), "/dr-connector");
> +restart:
>      object_property_iter_init(&iter, drc_container);
>      while ((prop = object_property_iter_next(&iter))) {
>          SpaprDrc *drc;
> @@ -1652,8 +1653,13 @@ static void spapr_handle_transient_dev_before_cas(SpaprMachineState *spapr)
>  
>          /*
>           * This will complete any pending plug/unplug requests.
> +         * In case of a unplugged PHB or PCI bridge, this will
> +         * cause some DRCs to be destroyed and thus potentially
> +         * invalidate the iterator.
>           */
> -        spapr_drc_reset(drc);
> +        if (spapr_drc_reset(drc)) {
> +            goto restart;
> +        }
>      }
>  
>      spapr_clear_pending_hotplug_events(spapr);
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
index cff5e707d0d9..5d80019f82e2 100644
--- a/include/hw/ppc/spapr_drc.h
+++ b/include/hw/ppc/spapr_drc.h
@@ -224,7 +224,8 @@  static inline bool spapr_drc_hotplugged(DeviceState *dev)
     return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE);
 }
 
-void spapr_drc_reset(SpaprDrc *drc);
+/* Returns true if an unplug request completed */
+bool spapr_drc_reset(SpaprDrc *drc);
 
 uint32_t spapr_drc_index(SpaprDrc *drc);
 SpaprDrcType spapr_drc_type(SpaprDrc *drc);
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 8d62f55066b6..5b5e2ac58a7e 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -417,9 +417,10 @@  void spapr_drc_detach(SpaprDrc *drc)
     spapr_drc_release(drc);
 }
 
-void spapr_drc_reset(SpaprDrc *drc)
+bool spapr_drc_reset(SpaprDrc *drc)
 {
     SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    bool unplug_completed = false;
 
     trace_spapr_drc_reset(spapr_drc_index(drc));
 
@@ -428,6 +429,7 @@  void spapr_drc_reset(SpaprDrc *drc)
      */
     if (drc->unplug_requested) {
         spapr_drc_release(drc);
+        unplug_completed = true;
     }
 
     if (drc->dev) {
@@ -444,6 +446,8 @@  void spapr_drc_reset(SpaprDrc *drc)
         drc->ccs_offset = -1;
         drc->ccs_depth = -1;
     }
+
+    return unplug_completed;
 }
 
 static bool spapr_drc_unplug_requested_needed(void *opaque)
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 4e9d50c254f0..aa22830ac4bd 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1639,6 +1639,7 @@  static void spapr_handle_transient_dev_before_cas(SpaprMachineState *spapr)
     ObjectPropertyIterator iter;
 
     drc_container = container_get(object_get_root(), "/dr-connector");
+restart:
     object_property_iter_init(&iter, drc_container);
     while ((prop = object_property_iter_next(&iter))) {
         SpaprDrc *drc;
@@ -1652,8 +1653,13 @@  static void spapr_handle_transient_dev_before_cas(SpaprMachineState *spapr)
 
         /*
          * This will complete any pending plug/unplug requests.
+         * In case of a unplugged PHB or PCI bridge, this will
+         * cause some DRCs to be destroyed and thus potentially
+         * invalidate the iterator.
          */
-        spapr_drc_reset(drc);
+        if (spapr_drc_reset(drc)) {
+            goto restart;
+        }
     }
 
     spapr_clear_pending_hotplug_events(spapr);