diff mbox

[PATCHv2,2/8] spapr: Remove 'awaiting_allocation' DRC flag

Message ID 20170712055317.26225-3-david@gibson.dropbear.id.au
State New
Headers show

Commit Message

David Gibson July 12, 2017, 5:53 a.m. UTC
The awaiting_allocation flag in the DRC was introduced by aab9913
"spapr_drc: Prevent detach racing against attach for CPU DR", allegedly to
prevent a guest crash on racing attach and detach.  Except.. information
from the BZ actually suggests a qemu crash, not a guest crash.  And there
shouldn't be a problem here anyway: if the guest has already moved the DRC
away from UNUSABLE state, the detach would already be deferred, and if it
hadn't it should be safe to detach it (the guest should fail gracefully
when it attempts to change the allocation state).

I think this was probably just a bandaid for some other problem in the
state management.  So, remove awaiting_allocation and associated code.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_drc.c         | 25 +++----------------------
 include/hw/ppc/spapr_drc.h |  1 -
 2 files changed, 3 insertions(+), 23 deletions(-)

Comments

Laurent Vivier July 12, 2017, 9:38 a.m. UTC | #1
On 12/07/2017 07:53, David Gibson wrote:
> The awaiting_allocation flag in the DRC was introduced by aab9913
> "spapr_drc: Prevent detach racing against attach for CPU DR", allegedly to
> prevent a guest crash on racing attach and detach.  Except.. information
> from the BZ actually suggests a qemu crash, not a guest crash.  And there
> shouldn't be a problem here anyway: if the guest has already moved the DRC
> away from UNUSABLE state, the detach would already be deferred, and if it
> hadn't it should be safe to detach it (the guest should fail gracefully
> when it attempts to change the allocation state).
> 
> I think this was probably just a bandaid for some other problem in the
> state management.  So, remove awaiting_allocation and associated code.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

Reviewed-by: Laurent Vivier <lvivier@redhat.com>

> ---
>  hw/ppc/spapr_drc.c         | 25 +++----------------------
>  include/hw/ppc/spapr_drc.h |  1 -
>  2 files changed, 3 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 9b07f80..89ba3d6 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -170,19 +170,13 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc)
>      if (!drc->dev) {
>          return RTAS_OUT_NO_SUCH_INDICATOR;
>      }
> -    if (drc->awaiting_release && drc->awaiting_allocation) {
> -        /* kernel is acknowledging a previous hotplug event
> -         * while we are already removing it.
> -         * it's safe to ignore awaiting_allocation here since we know the
> -         * situation is predicated on the guest either already having done
> -         * so (boot-time hotplug), or never being able to acquire in the
> -         * first place (hotplug followed by immediate unplug).
> -         */
> +    if (drc->awaiting_release) {
> +        /* Don't allow the guest to move a device away from UNUSABLE
> +         * state when we want to unplug it */
>          return RTAS_OUT_NO_SUCH_INDICATOR;
>      }
>  
>      drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
> -    drc->awaiting_allocation = false;
>  
>      return RTAS_OUT_SUCCESS;
>  }
> @@ -357,10 +351,6 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
>      drc->fdt = fdt;
>      drc->fdt_start_offset = fdt_start_offset;
>  
> -    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
> -        drc->awaiting_allocation = true;
> -    }
> -
>      object_property_add_link(OBJECT(drc), "device",
>                               object_get_typename(OBJECT(drc->dev)),
>                               (Object **)(&drc->dev),
> @@ -398,12 +388,6 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
>          return;
>      }
>  
> -    if (drc->awaiting_allocation) {
> -        drc->awaiting_release = true;
> -        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
> -        return;
> -    }
> -
>      spapr_drc_release(drc);
>  }
>  
> @@ -426,8 +410,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
>          spapr_drc_release(drc);
>      }
>  
> -    drc->awaiting_allocation = false;
> -
>      if (drc->dev) {
>          /* A device present at reset is coldplugged */
>          drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
> @@ -493,7 +475,6 @@ static const VMStateDescription vmstate_spapr_drc = {
>          VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
>          VMSTATE_BOOL(configured, sPAPRDRConnector),
>          VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
> -        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
>          VMSTATE_END_OF_LIST()
>      }
>  };
> diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> index 715016b..18a196e 100644
> --- a/include/hw/ppc/spapr_drc.h
> +++ b/include/hw/ppc/spapr_drc.h
> @@ -200,7 +200,6 @@ typedef struct sPAPRDRConnector {
>      sPAPRConfigureConnectorState *ccs;
>  
>      bool awaiting_release;
> -    bool awaiting_allocation;
>  
>      /* device pointer, via link property */
>      DeviceState *dev;
>
Greg Kurz July 12, 2017, 10 a.m. UTC | #2
On Wed, 12 Jul 2017 15:53:11 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:

> The awaiting_allocation flag in the DRC was introduced by aab9913
> "spapr_drc: Prevent detach racing against attach for CPU DR", allegedly to
> prevent a guest crash on racing attach and detach.  Except.. information
> from the BZ actually suggests a qemu crash, not a guest crash.  And there

Can you share an url to the BZ ?

> shouldn't be a problem here anyway: if the guest has already moved the DRC
> away from UNUSABLE state, the detach would already be deferred, and if it
> hadn't it should be safe to detach it (the guest should fail gracefully
> when it attempts to change the allocation state).
> 
> I think this was probably just a bandaid for some other problem in the
> state management.  So, remove awaiting_allocation and associated code.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr_drc.c         | 25 +++----------------------
>  include/hw/ppc/spapr_drc.h |  1 -
>  2 files changed, 3 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 9b07f80..89ba3d6 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -170,19 +170,13 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc)
>      if (!drc->dev) {
>          return RTAS_OUT_NO_SUCH_INDICATOR;
>      }
> -    if (drc->awaiting_release && drc->awaiting_allocation) {
> -        /* kernel is acknowledging a previous hotplug event
> -         * while we are already removing it.
> -         * it's safe to ignore awaiting_allocation here since we know the
> -         * situation is predicated on the guest either already having done
> -         * so (boot-time hotplug), or never being able to acquire in the
> -         * first place (hotplug followed by immediate unplug).
> -         */
> +    if (drc->awaiting_release) {
> +        /* Don't allow the guest to move a device away from UNUSABLE
> +         * state when we want to unplug it */
>          return RTAS_OUT_NO_SUCH_INDICATOR;
>      }
>  
>      drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
> -    drc->awaiting_allocation = false;
>  
>      return RTAS_OUT_SUCCESS;
>  }
> @@ -357,10 +351,6 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
>      drc->fdt = fdt;
>      drc->fdt_start_offset = fdt_start_offset;
>  
> -    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
> -        drc->awaiting_allocation = true;
> -    }
> -
>      object_property_add_link(OBJECT(drc), "device",
>                               object_get_typename(OBJECT(drc->dev)),
>                               (Object **)(&drc->dev),
> @@ -398,12 +388,6 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
>          return;
>      }
>  
> -    if (drc->awaiting_allocation) {
> -        drc->awaiting_release = true;
> -        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
> -        return;
> -    }
> -
>      spapr_drc_release(drc);
>  }
>  
> @@ -426,8 +410,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
>          spapr_drc_release(drc);
>      }
>  
> -    drc->awaiting_allocation = false;
> -
>      if (drc->dev) {
>          /* A device present at reset is coldplugged */
>          drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
> @@ -493,7 +475,6 @@ static const VMStateDescription vmstate_spapr_drc = {
>          VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
>          VMSTATE_BOOL(configured, sPAPRDRConnector),
>          VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
> -        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
>          VMSTATE_END_OF_LIST()
>      }
>  };
> diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> index 715016b..18a196e 100644
> --- a/include/hw/ppc/spapr_drc.h
> +++ b/include/hw/ppc/spapr_drc.h
> @@ -200,7 +200,6 @@ typedef struct sPAPRDRConnector {
>      sPAPRConfigureConnectorState *ccs;
>  
>      bool awaiting_release;
> -    bool awaiting_allocation;
>  
>      /* device pointer, via link property */
>      DeviceState *dev;
David Gibson July 12, 2017, 11:05 a.m. UTC | #3
On Wed, Jul 12, 2017 at 12:00:01PM +0200, Greg Kurz wrote:
> On Wed, 12 Jul 2017 15:53:11 +1000
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > The awaiting_allocation flag in the DRC was introduced by aab9913
> > "spapr_drc: Prevent detach racing against attach for CPU DR", allegedly to
> > prevent a guest crash on racing attach and detach.  Except.. information
> > from the BZ actually suggests a qemu crash, not a guest crash.  And there
> 
> Can you share an url to the BZ ?

Alas, no.  I have that information second hand from.. Bharata,
think.. and I believe the BZ is an IBM internal one.

> 
> > shouldn't be a problem here anyway: if the guest has already moved the DRC
> > away from UNUSABLE state, the detach would already be deferred, and if it
> > hadn't it should be safe to detach it (the guest should fail gracefully
> > when it attempts to change the allocation state).
> > 
> > I think this was probably just a bandaid for some other problem in the
> > state management.  So, remove awaiting_allocation and associated code.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr_drc.c         | 25 +++----------------------
> >  include/hw/ppc/spapr_drc.h |  1 -
> >  2 files changed, 3 insertions(+), 23 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > index 9b07f80..89ba3d6 100644
> > --- a/hw/ppc/spapr_drc.c
> > +++ b/hw/ppc/spapr_drc.c
> > @@ -170,19 +170,13 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc)
> >      if (!drc->dev) {
> >          return RTAS_OUT_NO_SUCH_INDICATOR;
> >      }
> > -    if (drc->awaiting_release && drc->awaiting_allocation) {
> > -        /* kernel is acknowledging a previous hotplug event
> > -         * while we are already removing it.
> > -         * it's safe to ignore awaiting_allocation here since we know the
> > -         * situation is predicated on the guest either already having done
> > -         * so (boot-time hotplug), or never being able to acquire in the
> > -         * first place (hotplug followed by immediate unplug).
> > -         */
> > +    if (drc->awaiting_release) {
> > +        /* Don't allow the guest to move a device away from UNUSABLE
> > +         * state when we want to unplug it */
> >          return RTAS_OUT_NO_SUCH_INDICATOR;
> >      }
> >  
> >      drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
> > -    drc->awaiting_allocation = false;
> >  
> >      return RTAS_OUT_SUCCESS;
> >  }
> > @@ -357,10 +351,6 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
> >      drc->fdt = fdt;
> >      drc->fdt_start_offset = fdt_start_offset;
> >  
> > -    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
> > -        drc->awaiting_allocation = true;
> > -    }
> > -
> >      object_property_add_link(OBJECT(drc), "device",
> >                               object_get_typename(OBJECT(drc->dev)),
> >                               (Object **)(&drc->dev),
> > @@ -398,12 +388,6 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
> >          return;
> >      }
> >  
> > -    if (drc->awaiting_allocation) {
> > -        drc->awaiting_release = true;
> > -        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
> > -        return;
> > -    }
> > -
> >      spapr_drc_release(drc);
> >  }
> >  
> > @@ -426,8 +410,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
> >          spapr_drc_release(drc);
> >      }
> >  
> > -    drc->awaiting_allocation = false;
> > -
> >      if (drc->dev) {
> >          /* A device present at reset is coldplugged */
> >          drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
> > @@ -493,7 +475,6 @@ static const VMStateDescription vmstate_spapr_drc = {
> >          VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
> >          VMSTATE_BOOL(configured, sPAPRDRConnector),
> >          VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
> > -        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
> >          VMSTATE_END_OF_LIST()
> >      }
> >  };
> > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> > index 715016b..18a196e 100644
> > --- a/include/hw/ppc/spapr_drc.h
> > +++ b/include/hw/ppc/spapr_drc.h
> > @@ -200,7 +200,6 @@ typedef struct sPAPRDRConnector {
> >      sPAPRConfigureConnectorState *ccs;
> >  
> >      bool awaiting_release;
> > -    bool awaiting_allocation;
> >  
> >      /* device pointer, via link property */
> >      DeviceState *dev;
>
Greg Kurz July 12, 2017, 11:27 a.m. UTC | #4
On Wed, 12 Jul 2017 21:05:45 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Wed, Jul 12, 2017 at 12:00:01PM +0200, Greg Kurz wrote:
> > On Wed, 12 Jul 2017 15:53:11 +1000
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> >   
> > > The awaiting_allocation flag in the DRC was introduced by aab9913
> > > "spapr_drc: Prevent detach racing against attach for CPU DR", allegedly to
> > > prevent a guest crash on racing attach and detach.  Except.. information
> > > from the BZ actually suggests a qemu crash, not a guest crash.  And there  
> > 
> > Can you share an url to the BZ ?  
> 
> Alas, no.  I have that information second hand from.. Bharata,
> think.. and I believe the BZ is an IBM internal one.
> 

I did some digging and I could find it in our internal bugzilla. And indeed,
it mentions QEMU crashing because of a SEGV...

> >   
> > > shouldn't be a problem here anyway: if the guest has already moved the DRC
> > > away from UNUSABLE state, the detach would already be deferred, and if it
> > > hadn't it should be safe to detach it (the guest should fail gracefully
> > > when it attempts to change the allocation state).
> > > 
> > > I think this was probably just a bandaid for some other problem in the
> > > state management.  So, remove awaiting_allocation and associated code.
> > > 

... so I tend to agree this was a bandaid.

Reviewed-by: Greg Kurz <groug@kaod.org>

I'll re-try the scenario from the BZ with this patch applied to see if we
hit the crash again.

> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > >  hw/ppc/spapr_drc.c         | 25 +++----------------------
> > >  include/hw/ppc/spapr_drc.h |  1 -
> > >  2 files changed, 3 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > > index 9b07f80..89ba3d6 100644
> > > --- a/hw/ppc/spapr_drc.c
> > > +++ b/hw/ppc/spapr_drc.c
> > > @@ -170,19 +170,13 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc)
> > >      if (!drc->dev) {
> > >          return RTAS_OUT_NO_SUCH_INDICATOR;
> > >      }
> > > -    if (drc->awaiting_release && drc->awaiting_allocation) {
> > > -        /* kernel is acknowledging a previous hotplug event
> > > -         * while we are already removing it.
> > > -         * it's safe to ignore awaiting_allocation here since we know the
> > > -         * situation is predicated on the guest either already having done
> > > -         * so (boot-time hotplug), or never being able to acquire in the
> > > -         * first place (hotplug followed by immediate unplug).
> > > -         */
> > > +    if (drc->awaiting_release) {
> > > +        /* Don't allow the guest to move a device away from UNUSABLE
> > > +         * state when we want to unplug it */
> > >          return RTAS_OUT_NO_SUCH_INDICATOR;
> > >      }
> > >  
> > >      drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
> > > -    drc->awaiting_allocation = false;
> > >  
> > >      return RTAS_OUT_SUCCESS;
> > >  }
> > > @@ -357,10 +351,6 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
> > >      drc->fdt = fdt;
> > >      drc->fdt_start_offset = fdt_start_offset;
> > >  
> > > -    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
> > > -        drc->awaiting_allocation = true;
> > > -    }
> > > -
> > >      object_property_add_link(OBJECT(drc), "device",
> > >                               object_get_typename(OBJECT(drc->dev)),
> > >                               (Object **)(&drc->dev),
> > > @@ -398,12 +388,6 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
> > >          return;
> > >      }
> > >  
> > > -    if (drc->awaiting_allocation) {
> > > -        drc->awaiting_release = true;
> > > -        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
> > > -        return;
> > > -    }
> > > -
> > >      spapr_drc_release(drc);
> > >  }
> > >  
> > > @@ -426,8 +410,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
> > >          spapr_drc_release(drc);
> > >      }
> > >  
> > > -    drc->awaiting_allocation = false;
> > > -
> > >      if (drc->dev) {
> > >          /* A device present at reset is coldplugged */
> > >          drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
> > > @@ -493,7 +475,6 @@ static const VMStateDescription vmstate_spapr_drc = {
> > >          VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
> > >          VMSTATE_BOOL(configured, sPAPRDRConnector),
> > >          VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
> > > -        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
> > >          VMSTATE_END_OF_LIST()
> > >      }
> > >  };
> > > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> > > index 715016b..18a196e 100644
> > > --- a/include/hw/ppc/spapr_drc.h
> > > +++ b/include/hw/ppc/spapr_drc.h
> > > @@ -200,7 +200,6 @@ typedef struct sPAPRDRConnector {
> > >      sPAPRConfigureConnectorState *ccs;
> > >  
> > >      bool awaiting_release;
> > > -    bool awaiting_allocation;
> > >  
> > >      /* device pointer, via link property */
> > >      DeviceState *dev;  
> >   
> 
> 
>
Greg Kurz July 12, 2017, 5:04 p.m. UTC | #5
On Wed, 12 Jul 2017 13:27:42 +0200
Greg Kurz <groug@kaod.org> wrote:

> On Wed, 12 Jul 2017 21:05:45 +1000
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Wed, Jul 12, 2017 at 12:00:01PM +0200, Greg Kurz wrote:  
> > > On Wed, 12 Jul 2017 15:53:11 +1000
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > >     
> > > > The awaiting_allocation flag in the DRC was introduced by aab9913
> > > > "spapr_drc: Prevent detach racing against attach for CPU DR", allegedly to
> > > > prevent a guest crash on racing attach and detach.  Except.. information
> > > > from the BZ actually suggests a qemu crash, not a guest crash.  And there    
> > > 
> > > Can you share an url to the BZ ?    
> > 
> > Alas, no.  I have that information second hand from.. Bharata,
> > think.. and I believe the BZ is an IBM internal one.
> >   
> 
> I did some digging and I could find it in our internal bugzilla. And indeed,
> it mentions QEMU crashing because of a SEGV...
> 
> > >     
> > > > shouldn't be a problem here anyway: if the guest has already moved the DRC
> > > > away from UNUSABLE state, the detach would already be deferred, and if it
> > > > hadn't it should be safe to detach it (the guest should fail gracefully
> > > > when it attempts to change the allocation state).
> > > > 
> > > > I think this was probably just a bandaid for some other problem in the
> > > > state management.  So, remove awaiting_allocation and associated code.
> > > >   
> 
> ... so I tend to agree this was a bandaid.
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> I'll re-try the scenario from the BZ with this patch applied to see if we
> hit the crash again.
> 

The crashing scenario was basically start a guest using libvirt, to add
a bunch of vCPUs and then remove them.

I've been running the addition/removal in a loop (using 'virsh setvcpus') and
no QEMU crash is observed, with or without this patch.

But, without this patch I would quickly (ie, 8 invocations of setvcpus) reach
a situation where:
- 'virsh setvcpus' would time out and fail forever from now on
- 'lscpu' in the guest and 'virsh vcpuinfo' show different number of vCPUs

With this patch applied, I could run the loop much much longer, and only
observe a 'virsh setvcpus' timeout from time to time. But at some point,
'virsh setvcpus' would fail consecutively 3 or 4 times with:

error: internal error: qemu didn't report thread id for vcpu '8'

and then fail with the following error forever:

error: unsupported configuration: failed to find appropriate hotpluggable
 vcpus to reach the desired target vcpu count

'lscpu' and 'virsh vcpuinfo' also produce inconsistent output.

I could continue to hotplug/unplug vCPUs with the QEMU monitor though,
so I guess there's something wrong in libvirt. I've asked Shiva (on Cc)
to help on the investigation.

Anyway, this patch seems to improve things and doesn't cause QEMU
to crash. I thus maintain my R-b and you can add:

Tested-by: Greg Kurz <groug@kaod.org>

> > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > > ---
> > > >  hw/ppc/spapr_drc.c         | 25 +++----------------------
> > > >  include/hw/ppc/spapr_drc.h |  1 -
> > > >  2 files changed, 3 insertions(+), 23 deletions(-)
> > > > 
> > > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > > > index 9b07f80..89ba3d6 100644
> > > > --- a/hw/ppc/spapr_drc.c
> > > > +++ b/hw/ppc/spapr_drc.c
> > > > @@ -170,19 +170,13 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc)
> > > >      if (!drc->dev) {
> > > >          return RTAS_OUT_NO_SUCH_INDICATOR;
> > > >      }
> > > > -    if (drc->awaiting_release && drc->awaiting_allocation) {
> > > > -        /* kernel is acknowledging a previous hotplug event
> > > > -         * while we are already removing it.
> > > > -         * it's safe to ignore awaiting_allocation here since we know the
> > > > -         * situation is predicated on the guest either already having done
> > > > -         * so (boot-time hotplug), or never being able to acquire in the
> > > > -         * first place (hotplug followed by immediate unplug).
> > > > -         */
> > > > +    if (drc->awaiting_release) {
> > > > +        /* Don't allow the guest to move a device away from UNUSABLE
> > > > +         * state when we want to unplug it */
> > > >          return RTAS_OUT_NO_SUCH_INDICATOR;
> > > >      }
> > > >  
> > > >      drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
> > > > -    drc->awaiting_allocation = false;
> > > >  
> > > >      return RTAS_OUT_SUCCESS;
> > > >  }
> > > > @@ -357,10 +351,6 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
> > > >      drc->fdt = fdt;
> > > >      drc->fdt_start_offset = fdt_start_offset;
> > > >  
> > > > -    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
> > > > -        drc->awaiting_allocation = true;
> > > > -    }
> > > > -
> > > >      object_property_add_link(OBJECT(drc), "device",
> > > >                               object_get_typename(OBJECT(drc->dev)),
> > > >                               (Object **)(&drc->dev),
> > > > @@ -398,12 +388,6 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
> > > >          return;
> > > >      }
> > > >  
> > > > -    if (drc->awaiting_allocation) {
> > > > -        drc->awaiting_release = true;
> > > > -        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
> > > > -        return;
> > > > -    }
> > > > -
> > > >      spapr_drc_release(drc);
> > > >  }
> > > >  
> > > > @@ -426,8 +410,6 @@ void spapr_drc_reset(sPAPRDRConnector *drc)
> > > >          spapr_drc_release(drc);
> > > >      }
> > > >  
> > > > -    drc->awaiting_allocation = false;
> > > > -
> > > >      if (drc->dev) {
> > > >          /* A device present at reset is coldplugged */
> > > >          drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
> > > > @@ -493,7 +475,6 @@ static const VMStateDescription vmstate_spapr_drc = {
> > > >          VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
> > > >          VMSTATE_BOOL(configured, sPAPRDRConnector),
> > > >          VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
> > > > -        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
> > > >          VMSTATE_END_OF_LIST()
> > > >      }
> > > >  };
> > > > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> > > > index 715016b..18a196e 100644
> > > > --- a/include/hw/ppc/spapr_drc.h
> > > > +++ b/include/hw/ppc/spapr_drc.h
> > > > @@ -200,7 +200,6 @@ typedef struct sPAPRDRConnector {
> > > >      sPAPRConfigureConnectorState *ccs;
> > > >  
> > > >      bool awaiting_release;
> > > > -    bool awaiting_allocation;
> > > >  
> > > >      /* device pointer, via link property */
> > > >      DeviceState *dev;    
> > >     
> > 
> > 
> >   
>
diff mbox

Patch

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 9b07f80..89ba3d6 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -170,19 +170,13 @@  static uint32_t drc_set_usable(sPAPRDRConnector *drc)
     if (!drc->dev) {
         return RTAS_OUT_NO_SUCH_INDICATOR;
     }
-    if (drc->awaiting_release && drc->awaiting_allocation) {
-        /* kernel is acknowledging a previous hotplug event
-         * while we are already removing it.
-         * it's safe to ignore awaiting_allocation here since we know the
-         * situation is predicated on the guest either already having done
-         * so (boot-time hotplug), or never being able to acquire in the
-         * first place (hotplug followed by immediate unplug).
-         */
+    if (drc->awaiting_release) {
+        /* Don't allow the guest to move a device away from UNUSABLE
+         * state when we want to unplug it */
         return RTAS_OUT_NO_SUCH_INDICATOR;
     }
 
     drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
-    drc->awaiting_allocation = false;
 
     return RTAS_OUT_SUCCESS;
 }
@@ -357,10 +351,6 @@  void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
     drc->fdt = fdt;
     drc->fdt_start_offset = fdt_start_offset;
 
-    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
-        drc->awaiting_allocation = true;
-    }
-
     object_property_add_link(OBJECT(drc), "device",
                              object_get_typename(OBJECT(drc->dev)),
                              (Object **)(&drc->dev),
@@ -398,12 +388,6 @@  void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
         return;
     }
 
-    if (drc->awaiting_allocation) {
-        drc->awaiting_release = true;
-        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
-        return;
-    }
-
     spapr_drc_release(drc);
 }
 
@@ -426,8 +410,6 @@  void spapr_drc_reset(sPAPRDRConnector *drc)
         spapr_drc_release(drc);
     }
 
-    drc->awaiting_allocation = false;
-
     if (drc->dev) {
         /* A device present at reset is coldplugged */
         drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
@@ -493,7 +475,6 @@  static const VMStateDescription vmstate_spapr_drc = {
         VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
         VMSTATE_BOOL(configured, sPAPRDRConnector),
         VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
-        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
index 715016b..18a196e 100644
--- a/include/hw/ppc/spapr_drc.h
+++ b/include/hw/ppc/spapr_drc.h
@@ -200,7 +200,6 @@  typedef struct sPAPRDRConnector {
     sPAPRConfigureConnectorState *ccs;
 
     bool awaiting_release;
-    bool awaiting_allocation;
 
     /* device pointer, via link property */
     DeviceState *dev;