diff mbox series

[for-6.2,v6,6/7] spapr: use DEVICE_UNPLUG_ERROR to report unplug errors

Message ID 20210719200827.1507276-7-danielhb413@gmail.com
State New
Headers show
Series DEVICE_UNPLUG_ERROR QAPI event | expand

Commit Message

Daniel Henrique Barboza July 19, 2021, 8:08 p.m. UTC
Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
error path, signalling that the hotunplug process wasn't successful.
This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
to signal this error to the management layer.

We also have another error path in spapr_memory_unplug_rollback() for
configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
in the hotunplug error path, but it will reconfigure them. Let's send
the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
case of older kernels.

Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 hw/ppc/spapr.c     |  9 ++++++++-
 hw/ppc/spapr_drc.c | 18 ++++++++++++------
 2 files changed, 20 insertions(+), 7 deletions(-)

Comments

Markus Armbruster Aug. 7, 2021, 2:06 p.m. UTC | #1
Daniel Henrique Barboza <danielhb413@gmail.com> writes:

> Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
> error path, signalling that the hotunplug process wasn't successful.
> This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
> to signal this error to the management layer.
>
> We also have another error path in spapr_memory_unplug_rollback() for
> configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
> in the hotunplug error path, but it will reconfigure them. Let's send
> the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
> case of older kernels.
>
> Reviewed-by: Greg Kurz <groug@kaod.org>
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---
>  hw/ppc/spapr.c     |  9 ++++++++-
>  hw/ppc/spapr_drc.c | 18 ++++++++++++------
>  2 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 1611d7ab05..5459f9a7e9 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -29,6 +29,7 @@
>  #include "qemu/datadir.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-events-machine.h"
> +#include "qapi/qapi-events-qdev.h"
>  #include "qapi/visitor.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/hostmem.h"
> @@ -3686,13 +3687,19 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
>  
>      /*
>       * Tell QAPI that something happened and the memory
> -     * hotunplug wasn't successful.
> +     * hotunplug wasn't successful. Keep sending
> +     * MEM_UNPLUG_ERROR even while sending DEVICE_UNPLUG_ERROR
> +     * until the deprecation MEM_UNPLUG_ERROR is due.
>       */
>      if (dev->id) {
>          qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
>                                       "for device %s", dev->id);
>          qapi_event_send_mem_unplug_error(dev->id, qapi_error);
>      }
> +
> +    qapi_event_send_device_unplug_error(!!dev->id, dev->id,
> +                                        dev->canonical_path,
> +                                        qapi_error != NULL, qapi_error);
>  }
>  

When dev->id is null, we send something like

    {"event": "DEVICE_UNPLUG_ERROR",
     "data": {"path": "/machine/..."},
     "timestamp": ...}

Unless I'm missing something, this is all the information the management
application really needs.

When dev->id is non-null, we add to "data":

              "device": "dev123",
              "msg": "Memory hotunplug rejected by the guest for device dev123",

I'm fine with emitting the device ID when we have it.

What's the intended use of "msg"?

Could DEVICE_UNPLUG_ERROR ever be emitted for this device with a
different "msg"?

If "msg" is useful when dev->id is non-null, then it's likely useful
when dev->id is null.  Why not

              "msg": "Memory hotunplug rejected by the guest",

always?

If we do that here, we'll likely do it everywhere, and then member @msg
isn't actually optional.

>  /* Callback to be called during DRC release. */
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index a4d9496f76..8f0479631f 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -17,6 +17,8 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "qom/object.h"
>  #include "migration/vmstate.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-events-qdev.h"
>  #include "qapi/visitor.h"
>  #include "qemu/error-report.h"
>  #include "hw/ppc/spapr.h" /* for RTAS return codes */
> @@ -160,6 +162,11 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>           * means that the kernel is refusing the removal.
>           */
>          if (drc->unplug_requested && drc->dev) {
> +            const char qapi_error_fmt[] = \

Drop the superfluous \

> +"Device hotunplug rejected by the guest for device %s";

Unusual indentation.

> +
> +            g_autofree char *qapi_error = NULL;
> +
>              if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
>                  spapr = SPAPR_MACHINE(qdev_get_machine());
>  
> @@ -169,14 +176,13 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>              drc->unplug_requested = false;
>  
>              if (drc->dev->id) {
> -                error_report("Device hotunplug rejected by the guest "
> -                             "for device %s", drc->dev->id);
> +                qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
> +                error_report(qapi_error_fmt, drc->dev->id);

Simpler:

                   qapi_error = ...
                   error_report("%s", qapi_error);

Matter of taste.  Maintainer decides.

>              }
>  
> -            /*
> -             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
> -             * it is implemented.
> -             */
> +            qapi_event_send_device_unplug_error(!!drc->dev->id, drc->dev->id,
> +                                                drc->dev->canonical_path,
> +                                                qapi_error != NULL, qapi_error);

My questions on "msg" apply.

>          }
>  
>          return RTAS_OUT_SUCCESS; /* Nothing to do */
Daniel Henrique Barboza Aug. 9, 2021, 6:47 p.m. UTC | #2
On 8/7/21 11:06 AM, Markus Armbruster wrote:
> Daniel Henrique Barboza <danielhb413@gmail.com> writes:
> 
>> Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
>> error path, signalling that the hotunplug process wasn't successful.
>> This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
>> to signal this error to the management layer.
>>
>> We also have another error path in spapr_memory_unplug_rollback() for
>> configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
>> in the hotunplug error path, but it will reconfigure them. Let's send
>> the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
>> case of older kernels.
>>
>> Reviewed-by: Greg Kurz <groug@kaod.org>
>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>> ---
>>   hw/ppc/spapr.c     |  9 ++++++++-
>>   hw/ppc/spapr_drc.c | 18 ++++++++++++------
>>   2 files changed, 20 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 1611d7ab05..5459f9a7e9 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -29,6 +29,7 @@
>>   #include "qemu/datadir.h"
>>   #include "qapi/error.h"
>>   #include "qapi/qapi-events-machine.h"
>> +#include "qapi/qapi-events-qdev.h"
>>   #include "qapi/visitor.h"
>>   #include "sysemu/sysemu.h"
>>   #include "sysemu/hostmem.h"
>> @@ -3686,13 +3687,19 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
>>   
>>       /*
>>        * Tell QAPI that something happened and the memory
>> -     * hotunplug wasn't successful.
>> +     * hotunplug wasn't successful. Keep sending
>> +     * MEM_UNPLUG_ERROR even while sending DEVICE_UNPLUG_ERROR
>> +     * until the deprecation MEM_UNPLUG_ERROR is due.
>>        */
>>       if (dev->id) {
>>           qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
>>                                        "for device %s", dev->id);
>>           qapi_event_send_mem_unplug_error(dev->id, qapi_error);
>>       }
>> +
>> +    qapi_event_send_device_unplug_error(!!dev->id, dev->id,
>> +                                        dev->canonical_path,
>> +                                        qapi_error != NULL, qapi_error);
>>   }
>>   
> 
> When dev->id is null, we send something like
> 
>      {"event": "DEVICE_UNPLUG_ERROR",
>       "data": {"path": "/machine/..."},
>       "timestamp": ...}
> 
> Unless I'm missing something, this is all the information the management
> application really needs.
> 
> When dev->id is non-null, we add to "data":
> 
>                "device": "dev123",
>                "msg": "Memory hotunplug rejected by the guest for device dev123",
> 
> I'm fine with emitting the device ID when we have it.
> 
> What's the intended use of "msg"?
> 
> Could DEVICE_UNPLUG_ERROR ever be emitted for this device with a
> different "msg"?


It won't have a different 'msg' for the current use of the event in both ppc64
and x86. It'll always be the same '<dev> hotunplug rejected by the guest'
message.

The idea is that a future caller might want to insert a more informative
message, such as "hotunplug failed: memory is being used by kernel space"
or any other more specific condition. But then I guess we can argue that,
if that time comes, one can just add this new optional 'msg' member in this
event, and for now we can live without it.

Would you oppose to renaming this new event to "DEVICE_UNPLUG_GUEST_ERROR"
and then remove the 'msg' member? I guess this rename would make it clearer
for management that we're reporting a guest side error, making any further
clarifications via 'msg' unneeded.


Thanks,


Daniel




> 
> If "msg" is useful when dev->id is non-null, then it's likely useful
> when dev->id is null.  Why not
> 
>                "msg": "Memory hotunplug rejected by the guest",
> 
> always?
> 
> If we do that here, we'll likely do it everywhere, and then member @msg
> isn't actually optional.
> 
>>   /* Callback to be called during DRC release. */
>> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
>> index a4d9496f76..8f0479631f 100644
>> --- a/hw/ppc/spapr_drc.c
>> +++ b/hw/ppc/spapr_drc.c
>> @@ -17,6 +17,8 @@
>>   #include "hw/ppc/spapr_drc.h"
>>   #include "qom/object.h"
>>   #include "migration/vmstate.h"
>> +#include "qapi/error.h"
>> +#include "qapi/qapi-events-qdev.h"
>>   #include "qapi/visitor.h"
>>   #include "qemu/error-report.h"
>>   #include "hw/ppc/spapr.h" /* for RTAS return codes */
>> @@ -160,6 +162,11 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>>            * means that the kernel is refusing the removal.
>>            */
>>           if (drc->unplug_requested && drc->dev) {
>> +            const char qapi_error_fmt[] = \
> 
> Drop the superfluous \
> 
>> +"Device hotunplug rejected by the guest for device %s";
> 
> Unusual indentation.
> 
>> +
>> +            g_autofree char *qapi_error = NULL;
>> +
>>               if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
>>                   spapr = SPAPR_MACHINE(qdev_get_machine());
>>   
>> @@ -169,14 +176,13 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>>               drc->unplug_requested = false;
>>   
>>               if (drc->dev->id) {
>> -                error_report("Device hotunplug rejected by the guest "
>> -                             "for device %s", drc->dev->id);
>> +                qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
>> +                error_report(qapi_error_fmt, drc->dev->id);
> 
> Simpler:
> 
>                     qapi_error = ...
>                     error_report("%s", qapi_error);
> 
> Matter of taste.  Maintainer decides.
> 
>>               }
>>   
>> -            /*
>> -             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
>> -             * it is implemented.
>> -             */
>> +            qapi_event_send_device_unplug_error(!!drc->dev->id, drc->dev->id,
>> +                                                drc->dev->canonical_path,
>> +                                                qapi_error != NULL, qapi_error);
> 
> My questions on "msg" apply.
> 
>>           }
>>   
>>           return RTAS_OUT_SUCCESS; /* Nothing to do */
>
David Gibson Aug. 10, 2021, 1:03 a.m. UTC | #3
On Mon, Aug 09, 2021 at 03:47:14PM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 8/7/21 11:06 AM, Markus Armbruster wrote:
> > Daniel Henrique Barboza <danielhb413@gmail.com> writes:
> > 
> > > Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
> > > error path, signalling that the hotunplug process wasn't successful.
> > > This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
> > > to signal this error to the management layer.
> > > 
> > > We also have another error path in spapr_memory_unplug_rollback() for
> > > configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
> > > in the hotunplug error path, but it will reconfigure them. Let's send
> > > the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
> > > case of older kernels.
> > > 
> > > Reviewed-by: Greg Kurz <groug@kaod.org>
> > > Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> > > ---
> > >   hw/ppc/spapr.c     |  9 ++++++++-
> > >   hw/ppc/spapr_drc.c | 18 ++++++++++++------
> > >   2 files changed, 20 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 1611d7ab05..5459f9a7e9 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -29,6 +29,7 @@
> > >   #include "qemu/datadir.h"
> > >   #include "qapi/error.h"
> > >   #include "qapi/qapi-events-machine.h"
> > > +#include "qapi/qapi-events-qdev.h"
> > >   #include "qapi/visitor.h"
> > >   #include "sysemu/sysemu.h"
> > >   #include "sysemu/hostmem.h"
> > > @@ -3686,13 +3687,19 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
> > >       /*
> > >        * Tell QAPI that something happened and the memory
> > > -     * hotunplug wasn't successful.
> > > +     * hotunplug wasn't successful. Keep sending
> > > +     * MEM_UNPLUG_ERROR even while sending DEVICE_UNPLUG_ERROR
> > > +     * until the deprecation MEM_UNPLUG_ERROR is due.
> > >        */
> > >       if (dev->id) {
> > >           qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
> > >                                        "for device %s", dev->id);
> > >           qapi_event_send_mem_unplug_error(dev->id, qapi_error);
> > >       }
> > > +
> > > +    qapi_event_send_device_unplug_error(!!dev->id, dev->id,
> > > +                                        dev->canonical_path,
> > > +                                        qapi_error != NULL, qapi_error);
> > >   }
> > 
> > When dev->id is null, we send something like
> > 
> >      {"event": "DEVICE_UNPLUG_ERROR",
> >       "data": {"path": "/machine/..."},
> >       "timestamp": ...}
> > 
> > Unless I'm missing something, this is all the information the management
> > application really needs.
> > 
> > When dev->id is non-null, we add to "data":
> > 
> >                "device": "dev123",
> >                "msg": "Memory hotunplug rejected by the guest for device dev123",
> > 
> > I'm fine with emitting the device ID when we have it.
> > 
> > What's the intended use of "msg"?
> > 
> > Could DEVICE_UNPLUG_ERROR ever be emitted for this device with a
> > different "msg"?
> 
> 
> It won't have a different 'msg' for the current use of the event in both ppc64
> and x86. It'll always be the same '<dev> hotunplug rejected by the guest'
> message.
> 
> The idea is that a future caller might want to insert a more informative
> message, such as "hotunplug failed: memory is being used by kernel space"
> or any other more specific condition. But then I guess we can argue that,
> if that time comes, one can just add this new optional 'msg' member in this
> event, and for now we can live without it.

Right.  We could also consider making the current message more
specific about why we chose to cancel the unplug: e.g. "guest
unisolated DRC after unplug request" for PAPR, and something
appropriate to the ACPI specifics for x86.  Not sure if that's useful
enough to justify it.

> Would you oppose to renaming this new event to "DEVICE_UNPLUG_GUEST_ERROR"
> and then remove the 'msg' member? I guess this rename would make it clearer
> for management that we're reporting a guest side error, making any further
> clarifications via 'msg' unneeded.
> 
> 
> Thanks,
> 
> 
> Daniel
> 
> 
> 
> 
> > 
> > If "msg" is useful when dev->id is non-null, then it's likely useful
> > when dev->id is null.  Why not
> > 
> >                "msg": "Memory hotunplug rejected by the guest",
> > 
> > always?
> > 
> > If we do that here, we'll likely do it everywhere, and then member @msg
> > isn't actually optional.
> > 
> > >   /* Callback to be called during DRC release. */
> > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > > index a4d9496f76..8f0479631f 100644
> > > --- a/hw/ppc/spapr_drc.c
> > > +++ b/hw/ppc/spapr_drc.c
> > > @@ -17,6 +17,8 @@
> > >   #include "hw/ppc/spapr_drc.h"
> > >   #include "qom/object.h"
> > >   #include "migration/vmstate.h"
> > > +#include "qapi/error.h"
> > > +#include "qapi/qapi-events-qdev.h"
> > >   #include "qapi/visitor.h"
> > >   #include "qemu/error-report.h"
> > >   #include "hw/ppc/spapr.h" /* for RTAS return codes */
> > > @@ -160,6 +162,11 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
> > >            * means that the kernel is refusing the removal.
> > >            */
> > >           if (drc->unplug_requested && drc->dev) {
> > > +            const char qapi_error_fmt[] = \
> > 
> > Drop the superfluous \
> > 
> > > +"Device hotunplug rejected by the guest for device %s";
> > 
> > Unusual indentation.
> > 
> > > +
> > > +            g_autofree char *qapi_error = NULL;
> > > +
> > >               if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
> > >                   spapr = SPAPR_MACHINE(qdev_get_machine());
> > > @@ -169,14 +176,13 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
> > >               drc->unplug_requested = false;
> > >               if (drc->dev->id) {
> > > -                error_report("Device hotunplug rejected by the guest "
> > > -                             "for device %s", drc->dev->id);
> > > +                qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
> > > +                error_report(qapi_error_fmt, drc->dev->id);
> > 
> > Simpler:
> > 
> >                     qapi_error = ...
> >                     error_report("%s", qapi_error);
> > 
> > Matter of taste.  Maintainer decides.
> > 
> > >               }
> > > -            /*
> > > -             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
> > > -             * it is implemented.
> > > -             */
> > > +            qapi_event_send_device_unplug_error(!!drc->dev->id, drc->dev->id,
> > > +                                                drc->dev->canonical_path,
> > > +                                                qapi_error != NULL, qapi_error);
> > 
> > My questions on "msg" apply.
> > 
> > >           }
> > >           return RTAS_OUT_SUCCESS; /* Nothing to do */
> > 
>
Markus Armbruster Aug. 23, 2021, 1:33 p.m. UTC | #4
Daniel Henrique Barboza <danielhb413@gmail.com> writes:

> On 8/7/21 11:06 AM, Markus Armbruster wrote:
>> Daniel Henrique Barboza <danielhb413@gmail.com> writes:
>> 
>>> Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
>>> error path, signalling that the hotunplug process wasn't successful.
>>> This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
>>> to signal this error to the management layer.
>>>
>>> We also have another error path in spapr_memory_unplug_rollback() for
>>> configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
>>> in the hotunplug error path, but it will reconfigure them. Let's send
>>> the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
>>> case of older kernels.
>>>
>>> Reviewed-by: Greg Kurz <groug@kaod.org>
>>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>>> ---
>>>   hw/ppc/spapr.c     |  9 ++++++++-
>>>   hw/ppc/spapr_drc.c | 18 ++++++++++++------
>>>   2 files changed, 20 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index 1611d7ab05..5459f9a7e9 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -29,6 +29,7 @@
>>>   #include "qemu/datadir.h"
>>>   #include "qapi/error.h"
>>>   #include "qapi/qapi-events-machine.h"
>>> +#include "qapi/qapi-events-qdev.h"
>>>   #include "qapi/visitor.h"
>>>   #include "sysemu/sysemu.h"
>>>   #include "sysemu/hostmem.h"
>>> @@ -3686,13 +3687,19 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
>>>         /*
>>>        * Tell QAPI that something happened and the memory
>>> -     * hotunplug wasn't successful.
>>> +     * hotunplug wasn't successful. Keep sending
>>> +     * MEM_UNPLUG_ERROR even while sending DEVICE_UNPLUG_ERROR
>>> +     * until the deprecation MEM_UNPLUG_ERROR is due.
>>>        */
>>>       if (dev->id) {
>>>           qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
>>>                                        "for device %s", dev->id);
>>>           qapi_event_send_mem_unplug_error(dev->id, qapi_error);
>>>       }
>>> +
>>> +    qapi_event_send_device_unplug_error(!!dev->id, dev->id,
>>> +                                        dev->canonical_path,
>>> +                                        qapi_error != NULL, qapi_error);
>>>   }
>>>   
>> When dev->id is null, we send something like
>>
>>      {"event": "DEVICE_UNPLUG_ERROR",
>>       "data": {"path": "/machine/..."},
>>       "timestamp": ...}
>>
>> Unless I'm missing something, this is all the information the management
>> application really needs.
>>
>> When dev->id is non-null, we add to "data":
>>
>>                "device": "dev123",
>>                "msg": "Memory hotunplug rejected by the guest for device dev123",
>>
>> I'm fine with emitting the device ID when we have it.
>>
>> What's the intended use of "msg"?
>>
>> Could DEVICE_UNPLUG_ERROR ever be emitted for this device with a
>> different "msg"?
>
>
> It won't have a different 'msg' for the current use of the event in both ppc64
> and x86. It'll always be the same '<dev> hotunplug rejected by the guest'
> message.
>
> The idea is that a future caller might want to insert a more informative
> message, such as "hotunplug failed: memory is being used by kernel space"
> or any other more specific condition. But then I guess we can argue that,
> if that time comes, one can just add this new optional 'msg' member in this
> event, and for now we can live without it.
>
> Would you oppose to renaming this new event to "DEVICE_UNPLUG_GUEST_ERROR"
> and then remove the 'msg' member? I guess this rename would make it clearer
> for management that we're reporting a guest side error, making any further
> clarifications via 'msg' unneeded.

No objection.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 1611d7ab05..5459f9a7e9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -29,6 +29,7 @@ 
 #include "qemu/datadir.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/hostmem.h"
@@ -3686,13 +3687,19 @@  void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
 
     /*
      * Tell QAPI that something happened and the memory
-     * hotunplug wasn't successful.
+     * hotunplug wasn't successful. Keep sending
+     * MEM_UNPLUG_ERROR even while sending DEVICE_UNPLUG_ERROR
+     * until the deprecation MEM_UNPLUG_ERROR is due.
      */
     if (dev->id) {
         qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
                                      "for device %s", dev->id);
         qapi_event_send_mem_unplug_error(dev->id, qapi_error);
     }
+
+    qapi_event_send_device_unplug_error(!!dev->id, dev->id,
+                                        dev->canonical_path,
+                                        qapi_error != NULL, qapi_error);
 }
 
 /* Callback to be called during DRC release. */
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index a4d9496f76..8f0479631f 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -17,6 +17,8 @@ 
 #include "hw/ppc/spapr_drc.h"
 #include "qom/object.h"
 #include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/ppc/spapr.h" /* for RTAS return codes */
@@ -160,6 +162,11 @@  static uint32_t drc_unisolate_logical(SpaprDrc *drc)
          * means that the kernel is refusing the removal.
          */
         if (drc->unplug_requested && drc->dev) {
+            const char qapi_error_fmt[] = \
+"Device hotunplug rejected by the guest for device %s";
+
+            g_autofree char *qapi_error = NULL;
+
             if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
                 spapr = SPAPR_MACHINE(qdev_get_machine());
 
@@ -169,14 +176,13 @@  static uint32_t drc_unisolate_logical(SpaprDrc *drc)
             drc->unplug_requested = false;
 
             if (drc->dev->id) {
-                error_report("Device hotunplug rejected by the guest "
-                             "for device %s", drc->dev->id);
+                qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
+                error_report(qapi_error_fmt, drc->dev->id);
             }
 
-            /*
-             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
-             * it is implemented.
-             */
+            qapi_event_send_device_unplug_error(!!drc->dev->id, drc->dev->id,
+                                                drc->dev->canonical_path,
+                                                qapi_error != NULL, qapi_error);
         }
 
         return RTAS_OUT_SUCCESS; /* Nothing to do */