diff mbox series

[v4,18/24] qdev: hotplug: provide do_unplug handler

Message ID 20180926094219.20322-19-david@redhat.com
State New
Headers show
Series memory-device: complete refactoring + virtio-pmem | expand

Commit Message

David Hildenbrand Sept. 26, 2018, 9:42 a.m. UTC
The unplug and unplug_request handlers are special: They are not
executed when unrealizing a device, but rather trigger the removal of a
device from device_del() via object_unparent() - to effectively
unrealize a device.

If such a device has a child bus and another device attached to
that bus (e.g. how virtio devices are created with their proxy device),
we will not get a call to the unplug handler. As we want to support
hotplug handlers (and especially also some unplug logic to undo resource
assignment) for such devices, we cannot simply call the unplug handler
when unrealizing - it has a different semantic ("trigger removal").

To handle this scenario, we need a do_unplug handler, that will be
executed for all devices with a hotplug handler.

While at it, introduce hotplug_fn_nofail and fix a spelling mistake in
a comment.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 hw/core/hotplug.c    | 10 ++++++++++
 hw/core/qdev.c       |  6 ++++++
 include/hw/hotplug.h | 26 ++++++++++++++++++++++++--
 3 files changed, 40 insertions(+), 2 deletions(-)

Comments

Igor Mammedov Sept. 27, 2018, 1:01 p.m. UTC | #1
On Wed, 26 Sep 2018 11:42:13 +0200
David Hildenbrand <david@redhat.com> wrote:

> The unplug and unplug_request handlers are special: They are not
> executed when unrealizing a device, but rather trigger the removal of a
> device from device_del() via object_unparent() - to effectively
> unrealize a device.
> 
> If such a device has a child bus and another device attached to
> that bus (e.g. how virtio devices are created with their proxy device),
> we will not get a call to the unplug handler. As we want to support
> hotplug handlers (and especially also some unplug logic to undo resource
> assignment) for such devices, we cannot simply call the unplug handler
> when unrealizing - it has a different semantic ("trigger removal").
> 
> To handle this scenario, we need a do_unplug handler, that will be
> executed for all devices with a hotplug handler.
could you clarify what would be call flow for unplug in this case
starting from 'device_del'?

> 
> While at it, introduce hotplug_fn_nofail and fix a spelling mistake in
> a comment.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  hw/core/hotplug.c    | 10 ++++++++++
>  hw/core/qdev.c       |  6 ++++++
>  include/hw/hotplug.h | 26 ++++++++++++++++++++++++--
>  3 files changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c
> index 2253072d0e..e7a68d5160 100644
> --- a/hw/core/hotplug.c
> +++ b/hw/core/hotplug.c
> @@ -45,6 +45,16 @@ void hotplug_handler_post_plug(HotplugHandler *plug_handler,
>      }
>  }
>  
> +void hotplug_handler_do_unplug(HotplugHandler *plug_handler,
> +                                 DeviceState *plugged_dev)
> +{
> +    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler);
> +
> +    if (hdc->do_unplug) {
> +        hdc->do_unplug(plug_handler, plugged_dev);
> +    }
> +}
> +
>  void hotplug_handler_unplug_request(HotplugHandler *plug_handler,
>                                      DeviceState *plugged_dev,
>                                      Error **errp)
> diff --git a/hw/core/qdev.c b/hw/core/qdev.c
> index 36b788a66b..dde2726099 100644
> --- a/hw/core/qdev.c
> +++ b/hw/core/qdev.c
> @@ -873,6 +873,12 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
>          }
>      } else if (!value && dev->realized) {
>          Error **local_errp = NULL;
> +
> +        hotplug_ctrl = qdev_get_hotplug_handler(dev);
> +        if (hotplug_ctrl) {
> +            hotplug_handler_do_unplug(hotplug_ctrl, dev);
> +        }
> +
>          QLIST_FOREACH(bus, &dev->child_bus, sibling) {
>              local_errp = local_err ? NULL : &local_err;
>              object_property_set_bool(OBJECT(bus), false, "realized",
> diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h
> index 51541d63e1..2fa5833cf1 100644
> --- a/include/hw/hotplug.h
> +++ b/include/hw/hotplug.h
> @@ -31,13 +31,21 @@ typedef struct HotplugHandler {
>  
>  /**
>   * hotplug_fn:
> - * @plug_handler: a device performing plug/uplug action
> + * @plug_handler: a device performing (un)plug action
>   * @plugged_dev: a device that has been (un)plugged
>   * @errp: returns an error if this function fails
>   */
>  typedef void (*hotplug_fn)(HotplugHandler *plug_handler,
>                             DeviceState *plugged_dev, Error **errp);
>  
> +/**
> + * hotplug_fn_nofail:
> + * @plug_handler: a device performing un(plug) action
> + * @plugged_dev: a device that has been (un)plugged
> + */
> +typedef void (*hotplug_fn_nofail)(HotplugHandler *plug_handler,
> +                                  DeviceState *plugged_dev);
> +
>  /**
>   * HotplugDeviceClass:
>   *
> @@ -49,12 +57,17 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler,
>   * @plug: plug callback called at end of device.realize(true).
>   * @post_plug: post plug callback called after device.realize(true) and device
>   *             reset
> + * @do_unplug: unplug callback called at start of device.realize(false)
>   * @unplug_request: unplug request callback.
>   *                  Used as a means to initiate device unplug for devices that
>   *                  require asynchronous unplug handling.
>   * @unplug: unplug callback.
>   *          Used for device removal with devices that implement
>   *          asynchronous and synchronous (surprise) removal.
> + * Note: unplug_request and unplug are only called for devices to initiate
> + *       unplug of a device hierarchy (e.g. triggered by device_del). For
> + *       devices that will be removed along with this device hierarchy only
> + *       do_unplug will be called (e.g. to unassign resources).
>   */
>  typedef struct HotplugHandlerClass {
>      /* <private> */
> @@ -63,7 +76,8 @@ typedef struct HotplugHandlerClass {
>      /* <public> */
>      hotplug_fn pre_plug;
>      hotplug_fn plug;
> -    void (*post_plug)(HotplugHandler *plug_handler, DeviceState *plugged_dev);
> +    hotplug_fn_nofail post_plug;
> +    hotplug_fn_nofail do_unplug;
>      hotplug_fn unplug_request;
>      hotplug_fn unplug;
>  } HotplugHandlerClass;
> @@ -94,6 +108,14 @@ void hotplug_handler_pre_plug(HotplugHandler *plug_handler,
>  void hotplug_handler_post_plug(HotplugHandler *plug_handler,
>                                 DeviceState *plugged_dev);
>  
> +/**
> + * hotplug_handler_do_unplug:
> + *
> + * Call #HotplugHandlerClass.do_unplug callback of @plug_handler.
> + */
> +void hotplug_handler_do_unplug(HotplugHandler *plug_handler,
> +                               DeviceState *plugged_dev);
> +
>  /**
>   * hotplug_handler_unplug_request:
>   *
David Hildenbrand Sept. 28, 2018, 12:21 p.m. UTC | #2
On 27/09/2018 15:01, Igor Mammedov wrote:
> On Wed, 26 Sep 2018 11:42:13 +0200
> David Hildenbrand <david@redhat.com> wrote:
> 
>> The unplug and unplug_request handlers are special: They are not
>> executed when unrealizing a device, but rather trigger the removal of a
>> device from device_del() via object_unparent() - to effectively
>> unrealize a device.
>>
>> If such a device has a child bus and another device attached to
>> that bus (e.g. how virtio devices are created with their proxy device),
>> we will not get a call to the unplug handler. As we want to support
>> hotplug handlers (and especially also some unplug logic to undo resource
>> assignment) for such devices, we cannot simply call the unplug handler
>> when unrealizing - it has a different semantic ("trigger removal").
>>
>> To handle this scenario, we need a do_unplug handler, that will be
>> executed for all devices with a hotplug handler.
> could you clarify what would be call flow for unplug in this case
> starting from 'device_del'?

Let's work it through for virtio-pmem:

qemu-system-x86_64 -machine pc -m 8G,maxmem=20G \
  [...] \
  -object memory-backend-file,id=mem1,share,mem-path=/dev/zero,size=4G \
  -device virtio-pmem-pci,id=vp1,memdev=mem1 -monitor stdio

info qtree gives us:

   bus: pci.0
      type PCI
      dev: virtio-pmem-pci, id "vp1"
	[...]
        bus: virtio-bus
          type virtio-pci-bus
          dev: virtio-pmem, id ""
            memaddr = 9663676416 (0x240000000)
            memdev = "/objects/mem1"
	    [...]

"device_del vp1":

qmp_device_del(vp1)->qdev_unplug(vp1)->hotplug_handler_unplug_request(vp1)

piix4_device_unplug_request_cb(vp1)->acpi_pcihp_device_unplug_cb(vp1)

-> Guest has to process the request and respond

acpi_pcihp_eject_slot(vp1)->object_unparent(vp1)


Now, this triggers the unplug of the device hierarchy:

object_unparent(vp1)->device_unparent(vp1)>device_set_realized(vp1, 0)

->bus_set_realized(virtio-bus, 0)->device_set_realized(virtio-pmem, 0)

This is the place where this hooks is comes into play:

->hotplug_handler_do_unplug(virtio-pmem)->machine
handler->virtio_pmem_do_unplug(virtio-pmem)

Followed by object_unparent(virtio-bus)->bus_unparent(virtio-bus)
Followed by object_unparent(virtio-pmem)->device_unparent(virtio-pmem)


At this place, the hierarchy is gone. Hotplug succeeded and the
virtio-pmem device (memory device) has been properly unplugged.
Igor Mammedov Oct. 1, 2018, 1:24 p.m. UTC | #3
On Fri, 28 Sep 2018 14:21:33 +0200
David Hildenbrand <david@redhat.com> wrote:

> On 27/09/2018 15:01, Igor Mammedov wrote:
> > On Wed, 26 Sep 2018 11:42:13 +0200
> > David Hildenbrand <david@redhat.com> wrote:
> >   
> >> The unplug and unplug_request handlers are special: They are not
> >> executed when unrealizing a device, but rather trigger the removal of a
> >> device from device_del() via object_unparent() - to effectively
> >> unrealize a device.
> >>
> >> If such a device has a child bus and another device attached to
> >> that bus (e.g. how virtio devices are created with their proxy device),
> >> we will not get a call to the unplug handler. As we want to support
> >> hotplug handlers (and especially also some unplug logic to undo resource
> >> assignment) for such devices, we cannot simply call the unplug handler
> >> when unrealizing - it has a different semantic ("trigger removal").
> >>
> >> To handle this scenario, we need a do_unplug handler, that will be
> >> executed for all devices with a hotplug handler.  
> > could you clarify what would be call flow for unplug in this case
> > starting from 'device_del'?  
> 
> Let's work it through for virtio-pmem:
> 
> qemu-system-x86_64 -machine pc -m 8G,maxmem=20G \
>   [...] \
>   -object memory-backend-file,id=mem1,share,mem-path=/dev/zero,size=4G \
>   -device virtio-pmem-pci,id=vp1,memdev=mem1 -monitor stdio
> 
> info qtree gives us:
> 
>    bus: pci.0
>       type PCI
>       dev: virtio-pmem-pci, id "vp1"
> 	[...]
>         bus: virtio-bus
>           type virtio-pci-bus
>           dev: virtio-pmem, id ""
>             memaddr = 9663676416 (0x240000000)
>             memdev = "/objects/mem1"
> 	    [...]
> 
> "device_del vp1":
> 
> qmp_device_del(vp1)->qdev_unplug(vp1)->hotplug_handler_unplug_request(vp1)
> 
> piix4_device_unplug_request_cb(vp1)->acpi_pcihp_device_unplug_cb(vp1)
> 
> -> Guest has to process the request and respond  
> 
> acpi_pcihp_eject_slot(vp1)->object_unparent(vp1)
that's one of the possible call flows, unplug could also originate
from shpc or native pci-e hot-plug.
PCI unplug hasn't ever been factored out from old PCI device/bus code,
so PCIDevice::unrealize takes care of parent resource teardown.
(well, there wasn't any reason to factor it out till we started
talking about hybrid devices).
We probably should do the same refactoring like it was done for
pc-dimm/cpu unplug
(see qdev_get_hotplug_handler()+hotplug_handler_unplug() usage)

> Now, this triggers the unplug of the device hierarchy:
> 
> object_unparent(vp1)->device_unparent(vp1)>device_set_realized(vp1, 0)
> 
> ->bus_set_realized(virtio-bus, 0)->device_set_realized(virtio-pmem, 0)  
> 
> This is the place where this hooks is comes into play:
> 
> ->hotplug_handler_do_unplug(virtio-pmem)->machine  
> handler->virtio_pmem_do_unplug(virtio-pmem)
> 
> Followed by object_unparent(virtio-bus)->bus_unparent(virtio-bus)
> Followed by object_unparent(virtio-pmem)->device_unparent(virtio-pmem)
> 
> 
> At this place, the hierarchy is gone. Hotplug succeeded and the
> virtio-pmem device (memory device) has been properly unplugged.
I'm concerned that both plug and unplug flows are implicit
and handled as if it were separate devices without enforcing
a particular ordering of (un)plug handlers.
It would work right now but it looks rather fragile to me.

If I remember right, the suggested and partially implemented idea
in one of your previous series was to override default hotplug
handler with a machine one for plugged in device [1][2].
However impl. wasn't exactly what I've suggested since it matches
all memory-devices.

1) qdev: let machine hotplug handler to override bus hotplug handler
2) pc: route all memory devices through  the machine hotplug handler

So lets reiterate, we have TYPE_VIRTIO_PMEM and TYPE_VIRTIO_PMEM_PCI
the former implements TYPE_MEMORY_DEVICE interface and the later is
a wrapper PCI/whatnot device shim.
So when you plug that composite device you'd get 2 independent
plug hooks called, which makes it unrelable/broken design.

My next question would be why TYPE_VIRTIO_PMEM_PCI can't implement 
TYPE_MEMORY_DEVICE interface and TYPE_VIRTIO_PMEM be a simple VIRTIO
device without any hotplug hooks (so shim device would proxy all
memory-device logic to its child)?
/huh, then you don't need get_device_id() as well/

That way using [2] and [1 - modulo it should match only concrete type]
machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
and explicitly call machine + pci hotplug handlers in necessary order.

flow would look like:
  [acpi|shcp|native pci-e eject]->  
       hotplug_ctrl = qdev_get_hotplug_handler(dev);
       hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
            machine_unplug()
               machine_virtio_pci_pmem_cb(): 
                  // we now that's device has 2 stage hotplug handlers,
                  // so we can arrange hotplug sequence in necessary order
                  hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);

                  //then do unplug in whatever order that's correct,
                  // I'd assume tear down/stop PCI device first, flushing
                  // command virtio command queues and that unplug memory itself.
                  hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
                  memory_device_unplug()

Similar logic applies to device_add/device_del paths, with a difference that
origin point would be monitor/qmp.

Point is to have a single explicit callback chain that applies to a concrete
device type. That way if ever change an ordering of calling plug callbacks
in qdev core, the expected for a device callback sequence would still
remain in place ensuring that device (un)plugged as expected.

Also it should be easier to trace for a reader, than 2 disjoint callbacks of
composite device (which only know to author of that device and then only
till he/she remembers how that thing works).
David Hildenbrand Oct. 2, 2018, 9:49 a.m. UTC | #4
On 01/10/2018 15:24, Igor Mammedov wrote:
> On Fri, 28 Sep 2018 14:21:33 +0200
> David Hildenbrand <david@redhat.com> wrote:
> 
>> On 27/09/2018 15:01, Igor Mammedov wrote:
>>> On Wed, 26 Sep 2018 11:42:13 +0200
>>> David Hildenbrand <david@redhat.com> wrote:
>>>   
>>>> The unplug and unplug_request handlers are special: They are not
>>>> executed when unrealizing a device, but rather trigger the removal of a
>>>> device from device_del() via object_unparent() - to effectively
>>>> unrealize a device.
>>>>
>>>> If such a device has a child bus and another device attached to
>>>> that bus (e.g. how virtio devices are created with their proxy device),
>>>> we will not get a call to the unplug handler. As we want to support
>>>> hotplug handlers (and especially also some unplug logic to undo resource
>>>> assignment) for such devices, we cannot simply call the unplug handler
>>>> when unrealizing - it has a different semantic ("trigger removal").
>>>>
>>>> To handle this scenario, we need a do_unplug handler, that will be
>>>> executed for all devices with a hotplug handler.  
>>> could you clarify what would be call flow for unplug in this case
>>> starting from 'device_del'?  
>>
>> Let's work it through for virtio-pmem:
>>
>> qemu-system-x86_64 -machine pc -m 8G,maxmem=20G \
>>   [...] \
>>   -object memory-backend-file,id=mem1,share,mem-path=/dev/zero,size=4G \
>>   -device virtio-pmem-pci,id=vp1,memdev=mem1 -monitor stdio
>>
>> info qtree gives us:
>>
>>    bus: pci.0
>>       type PCI
>>       dev: virtio-pmem-pci, id "vp1"
>> 	[...]
>>         bus: virtio-bus
>>           type virtio-pci-bus
>>           dev: virtio-pmem, id ""
>>             memaddr = 9663676416 (0x240000000)
>>             memdev = "/objects/mem1"
>> 	    [...]
>>
>> "device_del vp1":
>>
>> qmp_device_del(vp1)->qdev_unplug(vp1)->hotplug_handler_unplug_request(vp1)
>>
>> piix4_device_unplug_request_cb(vp1)->acpi_pcihp_device_unplug_cb(vp1)
>>
>> -> Guest has to process the request and respond  
>>
>> acpi_pcihp_eject_slot(vp1)->object_unparent(vp1)
> that's one of the possible call flows, unplug could also originate
> from shpc or native pci-e hot-plug.
> PCI unplug hasn't ever been factored out from old PCI device/bus code,
> so PCIDevice::unrealize takes care of parent resource teardown.
> (well, there wasn't any reason to factor it out till we started
> talking about hybrid devices).
> We probably should do the same refactoring like it was done for
> pc-dimm/cpu unplug
> (see qdev_get_hotplug_handler()+hotplug_handler_unplug() usage)
> 
>> Now, this triggers the unplug of the device hierarchy:
>>
>> object_unparent(vp1)->device_unparent(vp1)>device_set_realized(vp1, 0)
>>
>> ->bus_set_realized(virtio-bus, 0)->device_set_realized(virtio-pmem, 0)  
>>
>> This is the place where this hooks is comes into play:
>>
>> ->hotplug_handler_do_unplug(virtio-pmem)->machine  
>> handler->virtio_pmem_do_unplug(virtio-pmem)
>>
>> Followed by object_unparent(virtio-bus)->bus_unparent(virtio-bus)
>> Followed by object_unparent(virtio-pmem)->device_unparent(virtio-pmem)
>>
>>
>> At this place, the hierarchy is gone. Hotplug succeeded and the
>> virtio-pmem device (memory device) has been properly unplugged.
> I'm concerned that both plug and unplug flows are implicit
> and handled as if it were separate devices without enforcing
> a particular ordering of (un)plug handlers.
> It would work right now but it looks rather fragile to me.

In my ideal world, the plug+unplug handlers would only perform checks
and essentially trigger an object_unparent(). (either directly or by
some guest action).

Inside object_unparent(), the call flow of unrealize steps is defined.
By moving the "real unplug" part into "do_unplug" and therefor
essentially calling it when unrealizing, we could generalize this for
all unplug handlers.

I think, order of realization and therefore the order of hotplug handler
calls is strictly defined already. Same applies to unrealization if we
would factor the essential parts out into e.g. "do_unplug". That order
is strictly encoded in device_set_realized() and bus_set_realized().

> 
> If I remember right, the suggested and partially implemented idea
> in one of your previous series was to override default hotplug
> handler with a machine one for plugged in device [1][2].
> However impl. wasn't exactly what I've suggested since it matches
> all memory-devices.
> 
> 1) qdev: let machine hotplug handler to override bus hotplug handler
> 2) pc: route all memory devices through  the machine hotplug handler
> 
> So lets reiterate, we have TYPE_VIRTIO_PMEM and TYPE_VIRTIO_PMEM_PCI
> the former implements TYPE_MEMORY_DEVICE interface and the later is
> a wrapper PCI/whatnot device shim.
> So when you plug that composite device you'd get 2 independent
> plug hooks called, which makes it unrelable/broken design.

Can you elaborate why this is broken? I don't consider the
realize/unrealize order broken, and that is where we plug into. But yes,
we logically plug a device hierarchy and therefore get a separate
hotplug handler calls.

> 
> My next question would be why TYPE_VIRTIO_PMEM_PCI can't implement 
> TYPE_MEMORY_DEVICE interface and TYPE_VIRTIO_PMEM be a simple VIRTIO
> device without any hotplug hooks (so shim device would proxy all
> memory-device logic to its child)?
> /huh, then you don't need get_device_id() as well/

I had the same idea while going through different options. Then we would
have to forward all calls directly to the child. We cannot reuse
TYPE_MEMORY_DEVICE, so we would either need a new interface or define
the functions we want manually for each such device.

> 
> That way using [2] and [1 - modulo it should match only concrete type]
> machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
> and explicitly call machine + pci hotplug handlers in necessary order.
> 
> flow would look like:
>   [acpi|shcp|native pci-e eject]->  
>        hotplug_ctrl = qdev_get_hotplug_handler(dev);
>        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
>             machine_unplug()
>                machine_virtio_pci_pmem_cb(): 
>                   // we now that's device has 2 stage hotplug handlers,
>                   // so we can arrange hotplug sequence in necessary order
>                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
> 
>                   //then do unplug in whatever order that's correct,
>                   // I'd assume tear down/stop PCI device first, flushing
>                   // command virtio command queues and that unplug memory itself.
>                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
>                   memory_device_unplug()
> 
> Similar logic applies to device_add/device_del paths, with a difference that
> origin point would be monitor/qmp.

Let's see. User calls device_del(). That triggers an unplug_request. For
virtio-pmem, there is nothing to do.

eject hook is called by the guest. For now we do an object_unparent.
This would now be wrong. We would have to call a proper hotplug handler
chain (I guess that's the refactoring you mentioned above).

> 
> Point is to have a single explicit callback chain that applies to a concrete
> device type. That way if ever change an ordering of calling plug callbacks
> in qdev core, the expected for a device callback sequence would still
> remain in place ensuring that device (un)plugged as expected.

I haven't tested yet if this will work, but I can give it a try. I
learned that in QEMU things often seem easier than they actually are :)

> 
> Also it should be easier to trace for a reader, than 2 disjoint callbacks of
> composite device (which only know to author of that device and then only
> till he/she remembers how that thing works).

In my view it makes things slightly more complicated, because you have
to follow a hotplug handler chain that plugs devices via proxy devices.
(e.g. passing through TYPE_MEMORY_DEVICE calls to a child, and therefore
essentially hotplugging the child), instead of only watching out for
which device get's hotplugged and finding exactly one hotplug handler.
Of course, for a device hierarchy, multiple devices get logically
hotplugged.
Igor Mammedov Oct. 2, 2018, 2:23 p.m. UTC | #5
On Tue, 2 Oct 2018 11:49:09 +0200
David Hildenbrand <david@redhat.com> wrote:

> On 01/10/2018 15:24, Igor Mammedov wrote:
> > On Fri, 28 Sep 2018 14:21:33 +0200
> > David Hildenbrand <david@redhat.com> wrote:
> >   
> >> On 27/09/2018 15:01, Igor Mammedov wrote:  
> >>> On Wed, 26 Sep 2018 11:42:13 +0200
> >>> David Hildenbrand <david@redhat.com> wrote:
> >>>     
> >>>> The unplug and unplug_request handlers are special: They are not
> >>>> executed when unrealizing a device, but rather trigger the removal of a
> >>>> device from device_del() via object_unparent() - to effectively
> >>>> unrealize a device.
> >>>>
> >>>> If such a device has a child bus and another device attached to
> >>>> that bus (e.g. how virtio devices are created with their proxy device),
> >>>> we will not get a call to the unplug handler. As we want to support
> >>>> hotplug handlers (and especially also some unplug logic to undo resource
> >>>> assignment) for such devices, we cannot simply call the unplug handler
> >>>> when unrealizing - it has a different semantic ("trigger removal").
> >>>>
> >>>> To handle this scenario, we need a do_unplug handler, that will be
> >>>> executed for all devices with a hotplug handler.    
> >>> could you clarify what would be call flow for unplug in this case
> >>> starting from 'device_del'?    
> >>
> >> Let's work it through for virtio-pmem:
> >>
> >> qemu-system-x86_64 -machine pc -m 8G,maxmem=20G \
> >>   [...] \
> >>   -object memory-backend-file,id=mem1,share,mem-path=/dev/zero,size=4G \
> >>   -device virtio-pmem-pci,id=vp1,memdev=mem1 -monitor stdio
> >>
> >> info qtree gives us:
> >>
> >>    bus: pci.0
> >>       type PCI
> >>       dev: virtio-pmem-pci, id "vp1"
> >> 	[...]
> >>         bus: virtio-bus
> >>           type virtio-pci-bus
> >>           dev: virtio-pmem, id ""
> >>             memaddr = 9663676416 (0x240000000)
> >>             memdev = "/objects/mem1"
> >> 	    [...]
> >>
> >> "device_del vp1":
> >>
> >> qmp_device_del(vp1)->qdev_unplug(vp1)->hotplug_handler_unplug_request(vp1)
> >>
> >> piix4_device_unplug_request_cb(vp1)->acpi_pcihp_device_unplug_cb(vp1)
> >>  
> >> -> Guest has to process the request and respond    
> >>
> >> acpi_pcihp_eject_slot(vp1)->object_unparent(vp1)  
> > that's one of the possible call flows, unplug could also originate
> > from shpc or native pci-e hot-plug.
> > PCI unplug hasn't ever been factored out from old PCI device/bus code,
> > so PCIDevice::unrealize takes care of parent resource teardown.
> > (well, there wasn't any reason to factor it out till we started
> > talking about hybrid devices).
> > We probably should do the same refactoring like it was done for
> > pc-dimm/cpu unplug
> > (see qdev_get_hotplug_handler()+hotplug_handler_unplug() usage)
> >   
> >> Now, this triggers the unplug of the device hierarchy:
> >>
> >> object_unparent(vp1)->device_unparent(vp1)>device_set_realized(vp1, 0)
> >>  
> >> ->bus_set_realized(virtio-bus, 0)->device_set_realized(virtio-pmem, 0)    
> >>
> >> This is the place where this hooks is comes into play:
> >>  
> >> ->hotplug_handler_do_unplug(virtio-pmem)->machine    
> >> handler->virtio_pmem_do_unplug(virtio-pmem)
> >>
> >> Followed by object_unparent(virtio-bus)->bus_unparent(virtio-bus)
> >> Followed by object_unparent(virtio-pmem)->device_unparent(virtio-pmem)
> >>
> >>
> >> At this place, the hierarchy is gone. Hotplug succeeded and the
> >> virtio-pmem device (memory device) has been properly unplugged.  
> > I'm concerned that both plug and unplug flows are implicit
> > and handled as if it were separate devices without enforcing
> > a particular ordering of (un)plug handlers.
> > It would work right now but it looks rather fragile to me.  
> 
> In my ideal world, the plug+unplug handlers would only perform checks
> and essentially trigger an object_unparent(). (either directly or by
> some guest action).
that's not the purpose of hotplug handlers, it's purpose to setup/teardown
wiring of a device to a controller and/or owner of resources
i.e. where it needs to be connected to.

In case of unplug, object_unparent() is a minimum action that might
take a place to remove a device from parent's scope and destroy
it if there is no references left, however there might be more
things to do before that could happen.

> Inside object_unparent(), the call flow of unrealize steps is defined.
> By moving the "real unplug" part into "do_unplug" and therefor
> essentially calling it when unrealizing, we could generalize this for
> all unplug handlers.
> I think, order of realization and therefore the order of hotplug handler
> calls is strictly defined already. Same applies to unrealization if we
> would factor the essential parts out into e.g. "do_unplug". That order
> is strictly encoded in device_set_realized() and bus_set_realized().

I don't see any benefits in adding do_unplug() in this case,
it would essentially replace hotplug_handler_unplug() in event origin
point with object_unparent() and renaming unplug() to do_unplug().

As result object_unparent() will start do more than unparenting and
destroying the device and a point where unplug originates becomes
poorly documented/lost for a reader among other object_unparent() calls.

hotplug handlers are controller businesses and not the device one so
hiding (generalizing) it inside of device.realize() doesn't look
the right this to do, I'd rather keep these things separate as much
as possible.
 
> > If I remember right, the suggested and partially implemented idea
> > in one of your previous series was to override default hotplug
> > handler with a machine one for plugged in device [1][2].
> > However impl. wasn't exactly what I've suggested since it matches
> > all memory-devices.
> > 
> > 1) qdev: let machine hotplug handler to override bus hotplug handler
> > 2) pc: route all memory devices through  the machine hotplug handler
> > 
> > So lets reiterate, we have TYPE_VIRTIO_PMEM and TYPE_VIRTIO_PMEM_PCI
> > the former implements TYPE_MEMORY_DEVICE interface and the later is
> > a wrapper PCI/whatnot device shim.
> > So when you plug that composite device you'd get 2 independent
> > plug hooks called, which makes it unrelable/broken design.  
> 
> Can you elaborate why this is broken? I don't consider the
> realize/unrealize order broken, and that is where we plug into. But yes,
> we logically plug a device hierarchy and therefore get a separate
> hotplug handler calls.

1st:

consider we have a composite device A that contains B explicitly
manged by A (un)realizefn(). Now if we use your model of independent
callbacks we have only following fixed plug flow possible:
   a>realize()
      ::device_set_realized()
         a:realizefn()
             b->realize()
               ::device_set_realized()
                   b:realizefn()
                   hotplug_handler_plug(b) => b_hotplug_callback
             ...
         hotplug_handler_plug(a) => a_hotplug_callback

that limits composite device wiring to external components to
the only possible order
  1st: b_hotplug_callback() + 2nd: a_hotplug_callback
and other way around for unplug()

That however might not be order we need to do wiring/teardown though,
depending on composite device and controllers it might be necessary to
call callbacks in opposite order or mix both into one to do the wiring
correctly. And to do that we would need drop (move) b_hotplug_callback()
into a_hotplug_callback(), i.e. make it the way I've originally suggested.

hotplug callback call flow might be different in child_bus case
(well, it's different in current code) and it might change in future
(ex: I'm looking into dropping hotplug_handler_post_plug() and
moving hotplug_handler_plug() to a later point to address the same
issue as commits 25e89788/8449bcf94 but without post_plug callback).

It's more robust for devices do not depend heavily on specific order
and define its plug sequence explicitly outside of device core.
This way it won't break apart when device core code is amended. 

2nd:
from user point of view (and API) when composite device is created
we are dealing with 1 device (even if it's composed of many others internally,
it's not an external user business). The same should apply to hotplug
handlers. i.e. wire that device using whatever controllers are necessary
but do not jump through layers inside of device from external code
which hotplug handlers are.

> > My next question would be why TYPE_VIRTIO_PMEM_PCI can't implement 
> > TYPE_MEMORY_DEVICE interface and TYPE_VIRTIO_PMEM be a simple VIRTIO
> > device without any hotplug hooks (so shim device would proxy all
> > memory-device logic to its child)?
> > /huh, then you don't need get_device_id() as well/  
> 
> I had the same idea while going through different options. Then we would
> have to forward all calls directly to the child. We cannot reuse
> TYPE_MEMORY_DEVICE, so we would either need a new interface or define
> the functions we want manually for each such device.
for all I care parent device could alias out its memory-device api to a child
or just outright access/call child internal APIs/members to do the job
(that's what virtio devices often do), but that's the price to pay for
ability to change type of the end device.

> > That way using [2] and [1 - modulo it should match only concrete type]
> > machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
> > and explicitly call machine + pci hotplug handlers in necessary order.
> > 
> > flow would look like:
> >   [acpi|shcp|native pci-e eject]->  
> >        hotplug_ctrl = qdev_get_hotplug_handler(dev);
> >        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
> >             machine_unplug()
> >                machine_virtio_pci_pmem_cb(): 
> >                   // we now that's device has 2 stage hotplug handlers,
> >                   // so we can arrange hotplug sequence in necessary order
> >                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
> > 
> >                   //then do unplug in whatever order that's correct,
> >                   // I'd assume tear down/stop PCI device first, flushing
> >                   // command virtio command queues and that unplug memory itself.
> >                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
> >                   memory_device_unplug()
> > 
> > Similar logic applies to device_add/device_del paths, with a difference that
> > origin point would be monitor/qmp.  
> 
> Let's see. User calls device_del(). That triggers an unplug_request. For
> virtio-pmem, there is nothing to do.
virtio-pmem is not a concrete device though, it's just internal thingy
inside of concrete device, the actual virtio-pmem-pci device is associated
with a pci controller that notifies guest about unplug request using
whatever hotplug method was configured for the controller (shpc/native pci-e/acpi).

> eject hook is called by the guest. For now we do an object_unparent.
> This would now be wrong. We would have to call a proper hotplug handler
> chain (I guess that's the refactoring you mentioned above).
it works for typical pci devices but it won't work for composite
ones that need access to controllers/resources not provided by
it's direct parent. At minimum it should be object_unparent() wrapped
into unplug() callback  callback set for the pci bus if we'd look for shallow
conversion. But I haven't looked in details...

> > Point is to have a single explicit callback chain that applies to a concrete
> > device type. That way if ever change an ordering of calling plug callbacks
> > in qdev core, the expected for a device callback sequence would still
> > remain in place ensuring that device (un)plugged as expected.  
> 
> I haven't tested yet if this will work, but I can give it a try. I
> learned that in QEMU things often seem easier than they actually are :)
> 
> > 
> > Also it should be easier to trace for a reader, than 2 disjoint callbacks of
> > composite device (which only know to author of that device and then only
> > till he/she remembers how that thing works).  
> 
> In my view it makes things slightly more complicated, because you have
> to follow a hotplug handler chain that plugs devices via proxy devices.
> (e.g. passing through TYPE_MEMORY_DEVICE calls to a child, and therefore
> essentially hotplugging the child), instead of only watching out for
> which device get's hotplugged and finding exactly one hotplug handler.
> Of course, for a device hierarchy, multiple devices get logically
> hotplugged.
it's child only from internal point of view (in virtio design it's
virtio interface to share logic between different transports),
users don't really care about it.
It's job of proxy to forward data between external/internal world,
unfortunately adds more boilerplate but that's how virtio has been
designed.
As for following explicitly defined hotplug handlers chain,
it's explict and relevant parts are close to each other so it's easier
to understand what's going on, than trying to figure out implicit
callbacks sequence and how they are related.
David Hildenbrand Oct. 2, 2018, 3:36 p.m. UTC | #6
>> Inside object_unparent(), the call flow of unrealize steps is defined.
>> By moving the "real unplug" part into "do_unplug" and therefor
>> essentially calling it when unrealizing, we could generalize this for
>> all unplug handlers.
>> I think, order of realization and therefore the order of hotplug handler
>> calls is strictly defined already. Same applies to unrealization if we
>> would factor the essential parts out into e.g. "do_unplug". That order
>> is strictly encoded in device_set_realized() and bus_set_realized().
> 
> I don't see any benefits in adding do_unplug() in this case,
> it would essentially replace hotplug_handler_unplug() in event origin
> point with object_unparent() and renaming unplug() to do_unplug().
> 
> As result object_unparent() will start do more than unparenting and
> destroying the device and a point where unplug originates becomes
> poorly documented/lost for a reader among other object_unparent() calls.
> 
> hotplug handlers are controller businesses and not the device one so
> hiding (generalizing) it inside of device.realize() doesn't look
> the right this to do, I'd rather keep these things separate as much
> as possible.

As long as we find another way to make this work I am happy. What I
propose here works (and in my view does not violate any concepts, it
just extends hotunplug handlers to device hierarchies getting
unplugged). But I also understand the potential problems you think we
could have in the future. Let's see if we can make your suggestion work.

>  
>>> If I remember right, the suggested and partially implemented idea
>>> in one of your previous series was to override default hotplug
>>> handler with a machine one for plugged in device [1][2].
>>> However impl. wasn't exactly what I've suggested since it matches
>>> all memory-devices.
>>>
>>> 1) qdev: let machine hotplug handler to override bus hotplug handler
>>> 2) pc: route all memory devices through  the machine hotplug handler
>>>
>>> So lets reiterate, we have TYPE_VIRTIO_PMEM and TYPE_VIRTIO_PMEM_PCI
>>> the former implements TYPE_MEMORY_DEVICE interface and the later is
>>> a wrapper PCI/whatnot device shim.
>>> So when you plug that composite device you'd get 2 independent
>>> plug hooks called, which makes it unrelable/broken design.  
>>
>> Can you elaborate why this is broken? I don't consider the
>> realize/unrealize order broken, and that is where we plug into. But yes,
>> we logically plug a device hierarchy and therefore get a separate
>> hotplug handler calls.
> 
> 1st:
> 
> consider we have a composite device A that contains B explicitly
> manged by A (un)realizefn(). Now if we use your model of independent
> callbacks we have only following fixed plug flow possible:
>    a>realize()
>       ::device_set_realized()
>          a:realizefn()
>              b->realize()
>                ::device_set_realized()
>                    b:realizefn()
>                    hotplug_handler_plug(b) => b_hotplug_callback
>              ...
>          hotplug_handler_plug(a) => a_hotplug_callback
> 
> that limits composite device wiring to external components to
> the only possible order

That why we have pre_plug, plug and post_plug handlers for I guess ...

>   1st: b_hotplug_callback() + 2nd: a_hotplug_callback
> and other way around for unplug()
> 
> That however might not be order we need to do wiring/teardown though,
> depending on composite device and controllers it might be necessary to
> call callbacks in opposite order or mix both into one to do the wiring
> correctly. And to do that we would need drop (move) b_hotplug_callback()
> into a_hotplug_callback(), i.e. make it the way I've originally suggested.
> 
> hotplug callback call flow might be different in child_bus case
> (well, it's different in current code) and it might change in future
> (ex: I'm looking into dropping hotplug_handler_post_plug() and
> moving hotplug_handler_plug() to a later point to address the same
> issue as commits 25e89788/8449bcf94 but without post_plug callback).

.. however that sounds like a good idea, I was wondering the same why we
actually need the post_plug handler.

> 
> It's more robust for devices do not depend heavily on specific order
> and define its plug sequence explicitly outside of device core.
> This way it won't break apart when device core code is amended. 
> 
> 2nd:
> from user point of view (and API) when composite device is created
> we are dealing with 1 device (even if it's composed of many others internally,
> it's not an external user business). The same should apply to hotplug
> handlers. i.e. wire that device using whatever controllers are necessary
> but do not jump through layers inside of device from external code
> which hotplug handlers are.

It somehow looks strange to have plug handler scattered all over
device_set_realize(), while unplug handlers are at a completely
different place and not involved in unrealize(). This makes one think
that hotplugging a device hierarchy works, while unplugging does
currently not work.

> 
>>> My next question would be why TYPE_VIRTIO_PMEM_PCI can't implement 
>>> TYPE_MEMORY_DEVICE interface and TYPE_VIRTIO_PMEM be a simple VIRTIO
>>> device without any hotplug hooks (so shim device would proxy all
>>> memory-device logic to its child)?
>>> /huh, then you don't need get_device_id() as well/  
>>
>> I had the same idea while going through different options. Then we would
>> have to forward all calls directly to the child. We cannot reuse
>> TYPE_MEMORY_DEVICE, so we would either need a new interface or define
>> the functions we want manually for each such device.
> for all I care parent device could alias out its memory-device api to a child
> or just outright access/call child internal APIs/members to do the job
> (that's what virtio devices often do), but that's the price to pay for
> ability to change type of the end device.
> 
>>> That way using [2] and [1 - modulo it should match only concrete type]
>>> machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
>>> and explicitly call machine + pci hotplug handlers in necessary order.
>>>
>>> flow would look like:
>>>   [acpi|shcp|native pci-e eject]->  
>>>        hotplug_ctrl = qdev_get_hotplug_handler(dev);
>>>        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
>>>             machine_unplug()
>>>                machine_virtio_pci_pmem_cb(): 
>>>                   // we now that's device has 2 stage hotplug handlers,
>>>                   // so we can arrange hotplug sequence in necessary order
>>>                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
>>>
>>>                   //then do unplug in whatever order that's correct,
>>>                   // I'd assume tear down/stop PCI device first, flushing
>>>                   // command virtio command queues and that unplug memory itself.
>>>                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
>>>                   memory_device_unplug()
>>>
>>> Similar logic applies to device_add/device_del paths, with a difference that
>>> origin point would be monitor/qmp.  
>>
>> Let's see. User calls device_del(). That triggers an unplug_request. For
>> virtio-pmem, there is nothing to do.
> virtio-pmem is not a concrete device though, it's just internal thingy
> inside of concrete device, the actual virtio-pmem-pci device is associated
> with a pci controller that notifies guest about unplug request using
> whatever hotplug method was configured for the controller (shpc/native pci-e/acpi).
> 
>> eject hook is called by the guest. For now we do an object_unparent.
>> This would now be wrong. We would have to call a proper hotplug handler
>> chain (I guess that's the refactoring you mentioned above).
> it works for typical pci devices but it won't work for composite
> ones that need access to controllers/resources not provided by
> it's direct parent. At minimum it should be object_unparent() wrapped
> into unplug() callback  callback set for the pci bus if we'd look for shallow
> conversion. But I haven't looked in details...

I will do that during the next weeks. I'll resend the unrelated memory
device changes for now.

> 
>>> Point is to have a single explicit callback chain that applies to a concrete
>>> device type. That way if ever change an ordering of calling plug callbacks
>>> in qdev core, the expected for a device callback sequence would still
>>> remain in place ensuring that device (un)plugged as expected.  
>>
>> I haven't tested yet if this will work, but I can give it a try. I
>> learned that in QEMU things often seem easier than they actually are :)
>>
>>>
>>> Also it should be easier to trace for a reader, than 2 disjoint callbacks of
>>> composite device (which only know to author of that device and then only
>>> till he/she remembers how that thing works).  
>>
>> In my view it makes things slightly more complicated, because you have
>> to follow a hotplug handler chain that plugs devices via proxy devices.
>> (e.g. passing through TYPE_MEMORY_DEVICE calls to a child, and therefore
>> essentially hotplugging the child), instead of only watching out for
>> which device get's hotplugged and finding exactly one hotplug handler.
>> Of course, for a device hierarchy, multiple devices get logically
>> hotplugged.
> it's child only from internal point of view (in virtio design it's
> virtio interface to share logic between different transports),
> users don't really care about it.
> It's job of proxy to forward data between external/internal world,
> unfortunately adds more boilerplate but that's how virtio has been
> designed.
> As for following explicitly defined hotplug handlers chain,
> it's explict and relevant parts are close to each other so it's easier
> to understand what's going on, than trying to figure out implicit
> callbacks sequence and how they are related.
> 

So to summarize, you clearly favor a hotplug chain on a single device
over multiple hotplug handlers for separate devices in a device hierarchy.
David Gibson Oct. 3, 2018, 6:29 a.m. UTC | #7
On Wed, Sep 26, 2018 at 11:42:13AM +0200, David Hildenbrand wrote:
> The unplug and unplug_request handlers are special: They are not
> executed when unrealizing a device, but rather trigger the removal of a
> device from device_del() via object_unparent() - to effectively
> unrealize a device.
> 
> If such a device has a child bus and another device attached to
> that bus (e.g. how virtio devices are created with their proxy device),
> we will not get a call to the unplug handler. As we want to support
> hotplug handlers (and especially also some unplug logic to undo resource
> assignment) for such devices, we cannot simply call the unplug handler
> when unrealizing - it has a different semantic ("trigger removal").
> 
> To handle this scenario, we need a do_unplug handler, that will be
> executed for all devices with a hotplug handler.
> 
> While at it, introduce hotplug_fn_nofail and fix a spelling mistake in
> a comment.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  hw/core/hotplug.c    | 10 ++++++++++
>  hw/core/qdev.c       |  6 ++++++
>  include/hw/hotplug.h | 26 ++++++++++++++++++++++++--
>  3 files changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c
> index 2253072d0e..e7a68d5160 100644
> --- a/hw/core/hotplug.c
> +++ b/hw/core/hotplug.c
> @@ -45,6 +45,16 @@ void hotplug_handler_post_plug(HotplugHandler *plug_handler,
>      }
>  }
>  
> +void hotplug_handler_do_unplug(HotplugHandler *plug_handler,
> +                                 DeviceState *plugged_dev)

Hrm.  I really dislike things named "do_X".  The "do" rarely adds any
useful meaning.  And when there's also something called just plain
"X", it's *always* unclear how they relate to each other.

That's doubly true when it's a general interface like this, rather
than just some local functions.
David Hildenbrand Oct. 3, 2018, 5:21 p.m. UTC | #8
On 03/10/2018 08:29, David Gibson wrote:
> On Wed, Sep 26, 2018 at 11:42:13AM +0200, David Hildenbrand wrote:
>> The unplug and unplug_request handlers are special: They are not
>> executed when unrealizing a device, but rather trigger the removal of a
>> device from device_del() via object_unparent() - to effectively
>> unrealize a device.
>>
>> If such a device has a child bus and another device attached to
>> that bus (e.g. how virtio devices are created with their proxy device),
>> we will not get a call to the unplug handler. As we want to support
>> hotplug handlers (and especially also some unplug logic to undo resource
>> assignment) for such devices, we cannot simply call the unplug handler
>> when unrealizing - it has a different semantic ("trigger removal").
>>
>> To handle this scenario, we need a do_unplug handler, that will be
>> executed for all devices with a hotplug handler.
>>
>> While at it, introduce hotplug_fn_nofail and fix a spelling mistake in
>> a comment.
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>  hw/core/hotplug.c    | 10 ++++++++++
>>  hw/core/qdev.c       |  6 ++++++
>>  include/hw/hotplug.h | 26 ++++++++++++++++++++++++--
>>  3 files changed, 40 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c
>> index 2253072d0e..e7a68d5160 100644
>> --- a/hw/core/hotplug.c
>> +++ b/hw/core/hotplug.c
>> @@ -45,6 +45,16 @@ void hotplug_handler_post_plug(HotplugHandler *plug_handler,
>>      }
>>  }
>>  
>> +void hotplug_handler_do_unplug(HotplugHandler *plug_handler,
>> +                                 DeviceState *plugged_dev)
> 
> Hrm.  I really dislike things named "do_X".  The "do" rarely adds any
> useful meaning.  And when there's also something called just plain
> "X", it's *always* unclear how they relate to each other.
> 
> That's doubly true when it's a general interface like this, rather
> than just some local functions.
> 

Yes, the naming is not the best, but I didn't want to rename all unplug
handlers before we have an agreement on how to proceed. My concept would
be that

1. unplug() is renamed to trigger_unplug(). unplug_request() remains.
2. do_unplug() is renamed to pre_unplug() (just like pre_plug())
3. we might have in addition unplug() after realize(false) - just like
plug()

trigger_unplug() would perform checks and result in object_unparent().
From there, all cleanup/unplugging would be performed via the unrealize
chain, just like we have for realize() now. That would allow to properly
unplug complete device hierarchies.

But Igor rather wants one hotplug handler chain, and no dedicated
hotplug handlers for other devices in the device hierarchy. As long as
there are no other opinions I guess I will have to go into the direction
Igor proposes to get virtio-pmem and friends upstream.
Igor Mammedov Oct. 4, 2018, 3:59 p.m. UTC | #9
On Wed, 3 Oct 2018 19:21:25 +0200
David Hildenbrand <david@redhat.com> wrote:

> On 03/10/2018 08:29, David Gibson wrote:
> > On Wed, Sep 26, 2018 at 11:42:13AM +0200, David Hildenbrand wrote:  
> >> The unplug and unplug_request handlers are special: They are not
> >> executed when unrealizing a device, but rather trigger the removal of a
> >> device from device_del() via object_unparent() - to effectively
> >> unrealize a device.
> >>
> >> If such a device has a child bus and another device attached to
> >> that bus (e.g. how virtio devices are created with their proxy device),
> >> we will not get a call to the unplug handler. As we want to support
> >> hotplug handlers (and especially also some unplug logic to undo resource
> >> assignment) for such devices, we cannot simply call the unplug handler
> >> when unrealizing - it has a different semantic ("trigger removal").
> >>
> >> To handle this scenario, we need a do_unplug handler, that will be
> >> executed for all devices with a hotplug handler.
> >>
> >> While at it, introduce hotplug_fn_nofail and fix a spelling mistake in
> >> a comment.
> >>
> >> Signed-off-by: David Hildenbrand <david@redhat.com>
> >> ---
> >>  hw/core/hotplug.c    | 10 ++++++++++
> >>  hw/core/qdev.c       |  6 ++++++
> >>  include/hw/hotplug.h | 26 ++++++++++++++++++++++++--
> >>  3 files changed, 40 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c
> >> index 2253072d0e..e7a68d5160 100644
> >> --- a/hw/core/hotplug.c
> >> +++ b/hw/core/hotplug.c
> >> @@ -45,6 +45,16 @@ void hotplug_handler_post_plug(HotplugHandler *plug_handler,
> >>      }
> >>  }
> >>  
> >> +void hotplug_handler_do_unplug(HotplugHandler *plug_handler,
> >> +                                 DeviceState *plugged_dev)  
> > 
> > Hrm.  I really dislike things named "do_X".  The "do" rarely adds any
> > useful meaning.  And when there's also something called just plain
> > "X", it's *always* unclear how they relate to each other.
> > 
> > That's doubly true when it's a general interface like this, rather
> > than just some local functions.
> >   
> 
> Yes, the naming is not the best, but I didn't want to rename all unplug
> handlers before we have an agreement on how to proceed. My concept would
> be that
> 
> 1. unplug() is renamed to trigger_unplug(). unplug_request() remains.
> 2. do_unplug() is renamed to pre_unplug() (just like pre_plug())
> 3. we might have in addition unplug() after realize(false) - just like
> plug()
> 
> trigger_unplug() would perform checks and result in object_unparent().
> From there, all cleanup/unplugging would be performed via the unrealize
> chain, just like we have for realize() now. That would allow to properly
> unplug complete device hierarchies.
> 
> But Igor rather wants one hotplug handler chain, and no dedicated
> hotplug handlers for other devices in the device hierarchy.

Because what we are dealing here with (virtio-pmem) is not separate
devices hierarchy, it's one complex device and if we are to avoid
layering violation, we should operate internal devices via that outer
device which would orchestrate given to it resources internally as it
sees it fit.

It's similar with be spapr cpu core, where internal threads do
not have their own handlers they are plugged as the integral part
of the core.

What I'm strongly opposed is using separate hotplug handlers for
internal devices of a composite one.
I'm more lenient in cases of where the hotplug handler of a composite
device access it's internals directly if creating interfaces to
manage internal devices is big over-engineering, since all
hotplug flow is explicitly described within one handler and
I don't need to hunt around to figure out how device is wired up.

It's still not right wrt not violating abstraction layers and
might break if internal details change, but usually hotplug
handler is target unique and tightly coupled code of manged
and managing pieces (like the case of spapr cpu core) so it
works so far. For some generic handler I'd vote for following
all the rules.

In this series approach, handlers are used if they are separate
devices without explicit connection which I find totally broken
(and tried to explain in this thread, probably not well enough).


> As long as
> there are no other opinions I guess I will have to go into the direction
> Igor proposes to get virtio-pmem and friends upstream.
>
David Hildenbrand Oct. 5, 2018, 7:40 a.m. UTC | #10
On 04/10/2018 17:59, Igor Mammedov wrote:
> On Wed, 3 Oct 2018 19:21:25 +0200
> David Hildenbrand <david@redhat.com> wrote:
> 
>> On 03/10/2018 08:29, David Gibson wrote:
>>> On Wed, Sep 26, 2018 at 11:42:13AM +0200, David Hildenbrand wrote:  
>>>> The unplug and unplug_request handlers are special: They are not
>>>> executed when unrealizing a device, but rather trigger the removal of a
>>>> device from device_del() via object_unparent() - to effectively
>>>> unrealize a device.
>>>>
>>>> If such a device has a child bus and another device attached to
>>>> that bus (e.g. how virtio devices are created with their proxy device),
>>>> we will not get a call to the unplug handler. As we want to support
>>>> hotplug handlers (and especially also some unplug logic to undo resource
>>>> assignment) for such devices, we cannot simply call the unplug handler
>>>> when unrealizing - it has a different semantic ("trigger removal").
>>>>
>>>> To handle this scenario, we need a do_unplug handler, that will be
>>>> executed for all devices with a hotplug handler.
>>>>
>>>> While at it, introduce hotplug_fn_nofail and fix a spelling mistake in
>>>> a comment.
>>>>
>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>> ---
>>>>  hw/core/hotplug.c    | 10 ++++++++++
>>>>  hw/core/qdev.c       |  6 ++++++
>>>>  include/hw/hotplug.h | 26 ++++++++++++++++++++++++--
>>>>  3 files changed, 40 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c
>>>> index 2253072d0e..e7a68d5160 100644
>>>> --- a/hw/core/hotplug.c
>>>> +++ b/hw/core/hotplug.c
>>>> @@ -45,6 +45,16 @@ void hotplug_handler_post_plug(HotplugHandler *plug_handler,
>>>>      }
>>>>  }
>>>>  
>>>> +void hotplug_handler_do_unplug(HotplugHandler *plug_handler,
>>>> +                                 DeviceState *plugged_dev)  
>>>
>>> Hrm.  I really dislike things named "do_X".  The "do" rarely adds any
>>> useful meaning.  And when there's also something called just plain
>>> "X", it's *always* unclear how they relate to each other.
>>>
>>> That's doubly true when it's a general interface like this, rather
>>> than just some local functions.
>>>   
>>
>> Yes, the naming is not the best, but I didn't want to rename all unplug
>> handlers before we have an agreement on how to proceed. My concept would
>> be that
>>
>> 1. unplug() is renamed to trigger_unplug(). unplug_request() remains.
>> 2. do_unplug() is renamed to pre_unplug() (just like pre_plug())
>> 3. we might have in addition unplug() after realize(false) - just like
>> plug()
>>
>> trigger_unplug() would perform checks and result in object_unparent().
>> From there, all cleanup/unplugging would be performed via the unrealize
>> chain, just like we have for realize() now. That would allow to properly
>> unplug complete device hierarchies.
>>
>> But Igor rather wants one hotplug handler chain, and no dedicated
>> hotplug handlers for other devices in the device hierarchy.
> 
> Because what we are dealing here with (virtio-pmem) is not separate
> devices hierarchy, it's one complex device and if we are to avoid
> layering violation, we should operate internal devices via that outer
> device which would orchestrate given to it resources internally as it
> sees it fit.
> 
> It's similar with be spapr cpu core, where internal threads do
> not have their own handlers they are plugged as the integral part
> of the core.
> 
> What I'm strongly opposed is using separate hotplug handlers for
> internal devices of a composite one.
> I'm more lenient in cases of where the hotplug handler of a composite
> device access it's internals directly if creating interfaces to
> manage internal devices is big over-engineering, since all
> hotplug flow is explicitly described within one handler and
> I don't need to hunt around to figure out how device is wired up.
> 
> It's still not right wrt not violating abstraction layers and
> might break if internal details change, but usually hotplug
> handler is target unique and tightly coupled code of manged
> and managing pieces (like the case of spapr cpu core) so it
> works so far. For some generic handler I'd vote for following
> all the rules.
> 
> In this series approach, handlers are used if they are separate
> devices without explicit connection which I find totally broken
> (and tried to explain in this thread, probably not well enough).

Thanks for the extended explanation.

Let's see if I can make it work. I guess virtio devices are really
special (and turning other devices without proxys into memory devices
would be much easier).
David Hildenbrand Oct. 8, 2018, 11:47 a.m. UTC | #11
> That way using [2] and [1 - modulo it should match only concrete type]
> machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
> and explicitly call machine + pci hotplug handlers in necessary order.
> 
> flow would look like:
>   [acpi|shcp|native pci-e eject]->  
>        hotplug_ctrl = qdev_get_hotplug_handler(dev);
>        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
>             machine_unplug()
>                machine_virtio_pci_pmem_cb(): 
>                   // we now that's device has 2 stage hotplug handlers,
>                   // so we can arrange hotplug sequence in necessary order
>                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
> 
>                   //then do unplug in whatever order that's correct,
>                   // I'd assume tear down/stop PCI device first, flushing
>                   // command virtio command queues and that unplug memory itself.
>                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
>                   memory_device_unplug()
> 

Looking into the details, this order is not possible. The unplug will
essentially do a device_unparent() leading to the whole hierarchy
getting destroyed. The memory_device part always has to come first.


> Similar logic applies to device_add/device_del paths, with a difference that
> origin point would be monitor/qmp.
> 
> Point is to have a single explicit callback chain that applies to a concrete
> device type. That way if ever change an ordering of calling plug callbacks
> in qdev core, the expected for a device callback sequence would still
> remain in place ensuring that device (un)plugged as expected.
> 
> Also it should be easier to trace for a reader, than 2 disjoint callbacks of
> composite device (which only know to author of that device and then only
> till he/she remembers how that thing works).
>
Igor Mammedov Oct. 8, 2018, 12:19 p.m. UTC | #12
On Mon, 8 Oct 2018 13:47:53 +0200
David Hildenbrand <david@redhat.com> wrote:

> > That way using [2] and [1 - modulo it should match only concrete type]
> > machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
> > and explicitly call machine + pci hotplug handlers in necessary order.
> > 
> > flow would look like:
> >   [acpi|shcp|native pci-e eject]->  
> >        hotplug_ctrl = qdev_get_hotplug_handler(dev);
> >        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
> >             machine_unplug()
> >                machine_virtio_pci_pmem_cb(): 
> >                   // we now that's device has 2 stage hotplug handlers,
> >                   // so we can arrange hotplug sequence in necessary order
> >                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
> > 
> >                   //then do unplug in whatever order that's correct,
> >                   // I'd assume tear down/stop PCI device first, flushing
> >                   // command virtio command queues and that unplug memory itself.
> >                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
> >                   memory_device_unplug()
> >   
> 
> Looking into the details, this order is not possible. The unplug will
> essentially do a device_unparent() leading to the whole hierarchy
> getting destroyed. The memory_device part always has to come first.

Question here is if there are anything that should be handled first on
virtio level before memory_device/pmem part is called?
If there isn't it might be fine to swap the order of unplug sequence.


 
> > Similar logic applies to device_add/device_del paths, with a difference that
> > origin point would be monitor/qmp.
> > 
> > Point is to have a single explicit callback chain that applies to a concrete
> > device type. That way if ever change an ordering of calling plug callbacks
> > in qdev core, the expected for a device callback sequence would still
> > remain in place ensuring that device (un)plugged as expected.
> > 
> > Also it should be easier to trace for a reader, than 2 disjoint callbacks of
> > composite device (which only know to author of that device and then only
> > till he/she remembers how that thing works).
> >   
> 
>
David Hildenbrand Oct. 8, 2018, 12:41 p.m. UTC | #13
On 08/10/2018 14:19, Igor Mammedov wrote:
> On Mon, 8 Oct 2018 13:47:53 +0200
> David Hildenbrand <david@redhat.com> wrote:
> 
>>> That way using [2] and [1 - modulo it should match only concrete type]
>>> machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
>>> and explicitly call machine + pci hotplug handlers in necessary order.
>>>
>>> flow would look like:
>>>   [acpi|shcp|native pci-e eject]->  
>>>        hotplug_ctrl = qdev_get_hotplug_handler(dev);
>>>        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
>>>             machine_unplug()
>>>                machine_virtio_pci_pmem_cb(): 
>>>                   // we now that's device has 2 stage hotplug handlers,
>>>                   // so we can arrange hotplug sequence in necessary order
>>>                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
>>>
>>>                   //then do unplug in whatever order that's correct,
>>>                   // I'd assume tear down/stop PCI device first, flushing
>>>                   // command virtio command queues and that unplug memory itself.
>>>                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
>>>                   memory_device_unplug()
>>>   
>>
>> Looking into the details, this order is not possible. The unplug will
>> essentially do a device_unparent() leading to the whole hierarchy
>> getting destroyed. The memory_device part always has to come first.
> 
> Question here is if there are anything that should be handled first on
> virtio level before memory_device/pmem part is called?
> If there isn't it might be fine to swap the order of unplug sequence.
> 

Was asking myself the same thing, but as we are effectively holding the
iothread lock and the guest triggered the unplug, I guess it is fine to
unregister the memory region at this point.
Igor Mammedov Oct. 8, 2018, 2:12 p.m. UTC | #14
On Mon, 8 Oct 2018 14:41:50 +0200
David Hildenbrand <david@redhat.com> wrote:

> On 08/10/2018 14:19, Igor Mammedov wrote:
> > On Mon, 8 Oct 2018 13:47:53 +0200
> > David Hildenbrand <david@redhat.com> wrote:
> >   
> >>> That way using [2] and [1 - modulo it should match only concrete type]
> >>> machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
> >>> and explicitly call machine + pci hotplug handlers in necessary order.
> >>>
> >>> flow would look like:
> >>>   [acpi|shcp|native pci-e eject]->  
> >>>        hotplug_ctrl = qdev_get_hotplug_handler(dev);
> >>>        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
> >>>             machine_unplug()
> >>>                machine_virtio_pci_pmem_cb(): 
> >>>                   // we now that's device has 2 stage hotplug handlers,
> >>>                   // so we can arrange hotplug sequence in necessary order
> >>>                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
> >>>
> >>>                   //then do unplug in whatever order that's correct,
> >>>                   // I'd assume tear down/stop PCI device first, flushing
> >>>                   // command virtio command queues and that unplug memory itself.
> >>>                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
> >>>                   memory_device_unplug()
> >>>     
> >>
> >> Looking into the details, this order is not possible. The unplug will
> >> essentially do a device_unparent() leading to the whole hierarchy
> >> getting destroyed. The memory_device part always has to come first.  
> > 
> > Question here is if there are anything that should be handled first on
> > virtio level before memory_device/pmem part is called?
> > If there isn't it might be fine to swap the order of unplug sequence.
> >   
> 
> Was asking myself the same thing, but as we are effectively holding the
> iothread lock and the guest triggered the unplug, I guess it is fine to
> unregister the memory region at this point.
It looks the same to me but I'm not familiar with virtio or PCI.
I'd ask Michael if it's safe thing to do.
David Hildenbrand Oct. 11, 2018, 8:50 a.m. UTC | #15
On 08/10/2018 16:12, Igor Mammedov wrote:
> On Mon, 8 Oct 2018 14:41:50 +0200
> David Hildenbrand <david@redhat.com> wrote:
> 
>> On 08/10/2018 14:19, Igor Mammedov wrote:
>>> On Mon, 8 Oct 2018 13:47:53 +0200
>>> David Hildenbrand <david@redhat.com> wrote:
>>>   
>>>>> That way using [2] and [1 - modulo it should match only concrete type]
>>>>> machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
>>>>> and explicitly call machine + pci hotplug handlers in necessary order.
>>>>>
>>>>> flow would look like:
>>>>>   [acpi|shcp|native pci-e eject]->  
>>>>>        hotplug_ctrl = qdev_get_hotplug_handler(dev);
>>>>>        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
>>>>>             machine_unplug()
>>>>>                machine_virtio_pci_pmem_cb(): 
>>>>>                   // we now that's device has 2 stage hotplug handlers,
>>>>>                   // so we can arrange hotplug sequence in necessary order
>>>>>                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
>>>>>
>>>>>                   //then do unplug in whatever order that's correct,
>>>>>                   // I'd assume tear down/stop PCI device first, flushing
>>>>>                   // command virtio command queues and that unplug memory itself.
>>>>>                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
>>>>>                   memory_device_unplug()
>>>>>     
>>>>
>>>> Looking into the details, this order is not possible. The unplug will
>>>> essentially do a device_unparent() leading to the whole hierarchy
>>>> getting destroyed. The memory_device part always has to come first.  
>>>
>>> Question here is if there are anything that should be handled first on
>>> virtio level before memory_device/pmem part is called?
>>> If there isn't it might be fine to swap the order of unplug sequence.
>>>   
>>
>> Was asking myself the same thing, but as we are effectively holding the
>> iothread lock and the guest triggered the unplug, I guess it is fine to
>> unregister the memory region at this point.
> It looks the same to me but I'm not familiar with virtio or PCI.
> I'd ask Michael if it's safe thing to do.

It is certainly cleaner to do it after the device was unrealized.

The only way I see is to provide a post_unplug handler that will be run
after unrealize(false), but before deleting the object(s).

As I already said that, the unplug/unplug_request handlers are very
different to the other handlers, as they actively delete(request to
delete) an object. In contrast to pre_plug/plug that don't create an
object but only wire it up while realizing the device.

That is the reason why we can't do stuff after calling the bus hotunplug
handler but only before it. We cannot really modify the calling order.

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 046d8f1f76..777a9486bf 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -885,6 +885,12 @@ static void device_set_realized(Object *obj, bool
value, Error **errp)
             local_errp = local_err ? NULL : &local_err;
             dc->unrealize(dev, local_errp);
         }
+
+        hotplug_ctrl = qdev_get_hotplug_handler(dev);
+        if (hotplug_ctrl) {
+            hotplug_handler_post_unplug(hotplug_ctrl, dev);
+        }
+
         dev->pending_deleted_event = true;
         DEVICE_LISTENER_CALL(unrealize, Reverse, dev);
     }

At that point all devices are unrealized but still alive.

We can do what you imagined from there - make virtio-pmem-pci the memory
device and overwrite its hotplug handler. Call a handler chain (in case
we would have a pci post_unplug handler someday).

If we want to do something before unplug, we can use the current unplug
handler (via hotplug handler chain).

Do you have other ideas?
Igor Mammedov Oct. 12, 2018, 8:27 a.m. UTC | #16
On Thu, 11 Oct 2018 10:50:13 +0200
David Hildenbrand <david@redhat.com> wrote:

> On 08/10/2018 16:12, Igor Mammedov wrote:
> > On Mon, 8 Oct 2018 14:41:50 +0200
> > David Hildenbrand <david@redhat.com> wrote:
> >   
> >> On 08/10/2018 14:19, Igor Mammedov wrote:  
> >>> On Mon, 8 Oct 2018 13:47:53 +0200
> >>> David Hildenbrand <david@redhat.com> wrote:
> >>>     
> >>>>> That way using [2] and [1 - modulo it should match only concrete type]
> >>>>> machine would be able to override hotplug handlers for TYPE_VIRTIO_PMEM_PCI
> >>>>> and explicitly call machine + pci hotplug handlers in necessary order.

*1

> >>>>> flow would look like:
> >>>>>   [acpi|shcp|native pci-e eject]->  
> >>>>>        hotplug_ctrl = qdev_get_hotplug_handler(dev);
> >>>>>        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); ->
> >>>>>             machine_unplug()
> >>>>>                machine_virtio_pci_pmem_cb(): 
> >>>>>                   // we now that's device has 2 stage hotplug handlers,
> >>>>>                   // so we can arrange hotplug sequence in necessary order
> >>>>>                   hotplug_ctrl2 = qdev_get_bus_hotplug_handler(dev);
> >>>>>
> >>>>>                   //then do unplug in whatever order that's correct,
> >>>>>                   // I'd assume tear down/stop PCI device first, flushing
> >>>>>                   // command virtio command queues and that unplug memory itself.
> >>>>>                   hotplug_handler_unplug(hotplug_ctrl2, dev, &local_err);
> >>>>>                   memory_device_unplug()
> >>>>>       
> >>>>
> >>>> Looking into the details, this order is not possible. The unplug will
> >>>> essentially do a device_unparent() leading to the whole hierarchy
> >>>> getting destroyed. The memory_device part always has to come first.    
> >>>

*2

> >>> Question here is if there are anything that should be handled first on
> >>> virtio level before memory_device/pmem part is called?
> >>> If there isn't it might be fine to swap the order of unplug sequence.
> >>>     
> >>
> >> Was asking myself the same thing, but as we are effectively holding the
> >> iothread lock and the guest triggered the unplug, I guess it is fine to
> >> unregister the memory region at this point.  
> > It looks the same to me but I'm not familiar with virtio or PCI.
> > I'd ask Michael if it's safe thing to do.  
> 
> It is certainly cleaner to do it after the device was unrealized.
> 
> The only way I see is to provide a post_unplug handler that will be run
> after unrealize(false), but before deleting the object(s).

The correct order should be opposite to one that created a devices,
i.e. unplug -> unrealize -> delete.
Doing unplug stuff after device was unrealized looks outright wrong
(essentially device doesn't exists anymore except memory where it's
been located).

> As I already said that, the unplug/unplug_request handlers are very
> different to the other handlers, as they actively delete(request to
> delete) an object. In contrast to pre_plug/plug that don't create an
> object but only wire it up while realizing the device.>
> 
> That is the reason why we can't do stuff after calling the bus hotunplug
> handler but only before it. We cannot really modify the calling order.

There is nothing special in unplug handlers wrt plug ones, they all are
external to the being created device. Theoretically we can move pre_plug
/plug from device_set_realize() to outer caller qdev_device_add() and
nothing would change.

The problem here is the lack of unplug handler for pci device so
unplugging boils down to object_unparent() which will unrealize
device (and in process unplug it) and then delete it.
What we really need is to factor out unplug code from pci device
unrealizefn(). Then ideally unplug controller could look like:
 static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
 {
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    ... do some port specific unplug ...
+    hotplug_handler_do_unplug(hotplug_ctrl, dev); // default pci device unplug or pmem specific
     object_unparent(OBJECT(dev));
 }

where tear down and unrealize/delete parts are separated from each other.

> diff --git a/hw/core/qdev.c b/hw/core/qdev.c
> index 046d8f1f76..777a9486bf 100644
> --- a/hw/core/qdev.c
> +++ b/hw/core/qdev.c
> @@ -885,6 +885,12 @@ static void device_set_realized(Object *obj, bool
> value, Error **errp)
>              local_errp = local_err ? NULL : &local_err;
>              dc->unrealize(dev, local_errp);
>          }
> +
> +        hotplug_ctrl = qdev_get_hotplug_handler(dev);
> +        if (hotplug_ctrl) {
> +            hotplug_handler_post_unplug(hotplug_ctrl, dev);
> +        }
> +
>          dev->pending_deleted_event = true;
>          DEVICE_LISTENER_CALL(unrealize, Reverse, dev);
>      }
> 
> At that point all devices are unrealized but still alive.
there is no device at this point, only memory where it's been located.

> We can do what you imagined from there - make virtio-pmem-pci the memory
> device and overwrite its hotplug handler. Call a handler chain (in case
> we would have a pci post_unplug handler someday).
making virtio-pmem-pci the memory device is orthogonal to the (un)plug
topic.

> If we want to do something before unplug, we can use the current unplug
> handler (via hotplug handler chain).
>
> Do you have other ideas?
I'd proceed with suggestions made earlier [1][2] on this thread.
That should solve the issue at hand with out factoring out PCI unplug
from old pci::unrealize(). One would have to introduce shim unplug
handlers for pci/bridge/pcie that would call object_unparent(), but
that's the extent of another shallow PCI re-factoring.
Of cause that's assuming that sequence
 1.  memory_device_unplug()
 2.  pci_unplug()
is correct in virtio-pmem-pci case.
David Hildenbrand Oct. 12, 2018, 8:45 a.m. UTC | #17
> 
> The correct order should be opposite to one that created a devices,
> i.e. unplug -> unrealize -> delete.
> Doing unplug stuff after device was unrealized looks outright wrong
> (essentially device doesn't exists anymore except memory where it's
> been located).

pre_plug -> realize -> plug

unplug -> unrealize -> post_unplug

doesn't look that wrong to me. But the problem seems to be that unplug
basically spans the whole unrealize phase (including the post_unplug
phase). So unplug should usually already contains the post_unplug part
as you noted below (when moving the object_unparent() part out).

> 
>> As I already said that, the unplug/unplug_request handlers are very
>> different to the other handlers, as they actively delete(request to
>> delete) an object. In contrast to pre_plug/plug that don't create an
>> object but only wire it up while realizing the device.>
>>
>> That is the reason why we can't do stuff after calling the bus hotunplug
>> handler but only before it. We cannot really modify the calling order.
> 
> There is nothing special in unplug handlers wrt plug ones, they all are
> external to the being created device. Theoretically we can move pre_plug
> /plug from device_set_realize() to outer caller qdev_device_add() and
> nothing would change.

I guess at some point we should definitely move them out, this only
leads to confusion. (e.g. hotplug handlers getting called on device
within device hierarchies although we don't want this to be possible)

> 
> The problem here is the lack of unplug handler for pci device so
> unplugging boils down to object_unparent() which will unrealize
> device (and in process unplug it) and then delete it.
> What we really need is to factor out unplug code from pci device
> unrealizefn(). Then ideally unplug controller could look like:
>  static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
>  {
> +    hotplug_ctrl = qdev_get_hotplug_handler(dev);
> +    ... do some port specific unplug ...
> +    hotplug_handler_do_unplug(hotplug_ctrl, dev); // default pci device unplug or pmem specific
>      object_unparent(OBJECT(dev));
>  }
> 
> where tear down and unrealize/delete parts are separated from each other.

That makes sense, but we would then handle it for all PCI devices via
the hotplug chain I guess. (otherwise a object_unparent would be missing)

[...]
>>
>> Do you have other ideas?
> I'd proceed with suggestions made earlier [1][2] on this thread.
> That should solve the issue at hand with out factoring out PCI unplug
> from old pci::unrealize(). One would have to introduce shim unplug
> handlers for pci/bridge/pcie that would call object_unparent(), but
> that's the extent of another shallow PCI re-factoring.
> Of cause that's assuming that sequence
>  1.  memory_device_unplug()
>  2.  pci_unplug()
> is correct in virtio-pmem-pci case.

That is indeed possible as long as the memory device part has to come
first. I'll give it a try.

I already started prototyping and found some other PCI hotplug handler
issues I have to solve first ....
Igor Mammedov Oct. 12, 2018, 2:21 p.m. UTC | #18
On Fri, 12 Oct 2018 10:45:41 +0200
David Hildenbrand <david@redhat.com> wrote:

> > 
> > The correct order should be opposite to one that created a devices,
> > i.e. unplug -> unrealize -> delete.
> > Doing unplug stuff after device was unrealized looks outright wrong
> > (essentially device doesn't exists anymore except memory where it's
> > been located).  
> 
> pre_plug -> realize -> plug
> 
> unplug -> unrealize -> post_unplug
> 
> doesn't look that wrong to me. But the problem seems to be that unplug
> basically spans the whole unrealize phase (including the post_unplug
> phase). So unplug should usually already contains the post_unplug part
> as you noted below (when moving the object_unparent() part out).
that just shortcut to move forward somewhere, personally I prefer having
as less callbacks as possible, to me current unplug is pretty flexible
we can do practically anything from it pre_unplug and post_unplug if
necessary. Hence I don't see a reason for adding extra callbacks on top
and for already mentioned reasons tight integration (hiding) of hotplug
infrastructure within device_set_realized().

  
> >> As I already said that, the unplug/unplug_request handlers are very
> >> different to the other handlers, as they actively delete(request to
> >> delete) an object. In contrast to pre_plug/plug that don't create an
> >> object but only wire it up while realizing the device.>
> >>
> >> That is the reason why we can't do stuff after calling the bus hotunplug
> >> handler but only before it. We cannot really modify the calling order.  
> > 
> > There is nothing special in unplug handlers wrt plug ones, they all are
> > external to the being created device. Theoretically we can move pre_plug
> > /plug from device_set_realize() to outer caller qdev_device_add() and
> > nothing would change.  
> 
> I guess at some point we should definitely move them out, this only
> leads to confusion. (e.g. hotplug handlers getting called on device
> within device hierarchies although we don't want this to be possible)
For that to happen we probably would need to make qdev_device_add()
provide a friendly C API for adding a device coming not from CLI
with its options. Right now we would need to build QemuOpts
before manually before creating device with qdev_device_add(),
it might be fine but I haven't really looked into it.

> > The problem here is the lack of unplug handler for pci device so
> > unplugging boils down to object_unparent() which will unrealize
> > device (and in process unplug it) and then delete it.
> > What we really need is to factor out unplug code from pci device
> > unrealizefn(). Then ideally unplug controller could look like:
> >  static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
> >  {
> > +    hotplug_ctrl = qdev_get_hotplug_handler(dev);
> > +    ... do some port specific unplug ...
> > +    hotplug_handler_do_unplug(hotplug_ctrl, dev); // default pci device unplug or pmem specific
> >      object_unparent(OBJECT(dev));
> >  }
> > 
> > where tear down and unrealize/delete parts are separated from each other.  
> 
> That makes sense, but we would then handle it for all PCI devices via
> the hotplug chain I guess. (otherwise a object_unparent would be missing)
I have an additional idea on top this, which will do a little more, see example:

 static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
 {
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    ... do some port specific unplug ...
+    hotplug_handler_do_unplug(hotplug_ctrl, dev); // default pci device unplug or pmem specific
+        => pci_unplug_handler():
+             object_property_set_bool(OBJECT(dev), FALSE, "realized", &err);
     object_unparent(OBJECT(dev));
}

i.e. simulate tear down by doing explicit unrealize() from unplug handler
but don't delete device from handler. Just leave deleting it to point of
origin of unplug event. (concrete hw endpoints that trigger it)

It's still not how it should be (unrealize and tear down are still done
as a single step), but at least we isolate it from deleting part.
If isolating pci.unrealize() won't be sufficient, we might try to factor out
from there minimal parts that's necessary for composite virtio device to
work.
(I don't insist on complete PCI unplug refactoring, so it won't block
this series).

> [...]
> >>
> >> Do you have other ideas?  
> > I'd proceed with suggestions made earlier [1][2] on this thread.
> > That should solve the issue at hand with out factoring out PCI unplug
> > from old pci::unrealize(). One would have to introduce shim unplug
> > handlers for pci/bridge/pcie that would call object_unparent(), but
> > that's the extent of another shallow PCI re-factoring.
> > Of cause that's assuming that sequence
> >  1.  memory_device_unplug()
> >  2.  pci_unplug()
> > is correct in virtio-pmem-pci case.  
> 
> That is indeed possible as long as the memory device part has to come
> first. I'll give it a try.
> 
> I already started prototyping and found some other PCI hotplug handler
> issues I have to solve first ....
I've been recently auditing plug/unplug parts across tree so if you have
any question regarding it feel free to ping me.
David Hildenbrand Oct. 15, 2018, 7:21 a.m. UTC | #19
On 12/10/2018 16:21, Igor Mammedov wrote:
> On Fri, 12 Oct 2018 10:45:41 +0200
> David Hildenbrand <david@redhat.com> wrote:
> 
>>>
>>> The correct order should be opposite to one that created a devices,
>>> i.e. unplug -> unrealize -> delete.
>>> Doing unplug stuff after device was unrealized looks outright wrong
>>> (essentially device doesn't exists anymore except memory where it's
>>> been located).  
>>
>> pre_plug -> realize -> plug
>>
>> unplug -> unrealize -> post_unplug
>>
>> doesn't look that wrong to me. But the problem seems to be that unplug
>> basically spans the whole unrealize phase (including the post_unplug
>> phase). So unplug should usually already contains the post_unplug part
>> as you noted below (when moving the object_unparent() part out).
> that just shortcut to move forward somewhere, personally I prefer having
> as less callbacks as possible, to me current unplug is pretty flexible
> we can do practically anything from it pre_unplug and post_unplug if
> necessary. Hence I don't see a reason for adding extra callbacks on top
> and for already mentioned reasons tight integration (hiding) of hotplug
> infrastructure within device_set_realized().

Yes, I agree if object_unparent() is moved out.

> 
>   
>>>> As I already said that, the unplug/unplug_request handlers are very
>>>> different to the other handlers, as they actively delete(request to
>>>> delete) an object. In contrast to pre_plug/plug that don't create an
>>>> object but only wire it up while realizing the device.>
>>>>
>>>> That is the reason why we can't do stuff after calling the bus hotunplug
>>>> handler but only before it. We cannot really modify the calling order.  
>>>
>>> There is nothing special in unplug handlers wrt plug ones, they all are
>>> external to the being created device. Theoretically we can move pre_plug
>>> /plug from device_set_realize() to outer caller qdev_device_add() and
>>> nothing would change.  
>>
>> I guess at some point we should definitely move them out, this only
>> leads to confusion. (e.g. hotplug handlers getting called on device
>> within device hierarchies although we don't want this to be possible)
> For that to happen we probably would need to make qdev_device_add()
> provide a friendly C API for adding a device coming not from CLI
> with its options. Right now we would need to build QemuOpts
> before manually before creating device with qdev_device_add(),
> it might be fine but I haven't really looked into it.

Yes, this might require more thought.

> 
>>> The problem here is the lack of unplug handler for pci device so
>>> unplugging boils down to object_unparent() which will unrealize
>>> device (and in process unplug it) and then delete it.
>>> What we really need is to factor out unplug code from pci device
>>> unrealizefn(). Then ideally unplug controller could look like:
>>>  static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
>>>  {
>>> +    hotplug_ctrl = qdev_get_hotplug_handler(dev);
>>> +    ... do some port specific unplug ...
>>> +    hotplug_handler_do_unplug(hotplug_ctrl, dev); // default pci device unplug or pmem specific
>>>      object_unparent(OBJECT(dev));
>>>  }
>>>
>>> where tear down and unrealize/delete parts are separated from each other.  
>>
>> That makes sense, but we would then handle it for all PCI devices via
>> the hotplug chain I guess. (otherwise a object_unparent would be missing)
> I have an additional idea on top this, which will do a little more, see example:
> 
>  static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
>  {
> +    hotplug_ctrl = qdev_get_hotplug_handler(dev);
> +    ... do some port specific unplug ...
> +    hotplug_handler_do_unplug(hotplug_ctrl, dev); // default pci device unplug or pmem specific
> +        => pci_unplug_handler():
> +             object_property_set_bool(OBJECT(dev), FALSE, "realized", &err);
>      object_unparent(OBJECT(dev));
> }
> 
> i.e. simulate tear down by doing explicit unrealize() from unplug handler
> but don't delete device from handler. Just leave deleting it to point of
> origin of unplug event. (concrete hw endpoints that trigger it)
> 
> It's still not how it should be (unrealize and tear down are still done
> as a single step), but at least we isolate it from deleting part.
> If isolating pci.unrealize() won't be sufficient, we might try to factor out
> from there minimal parts that's necessary for composite virtio device to
> work.
> (I don't insist on complete PCI unplug refactoring, so it won't block
> this series).
> 

Yes, I had a similar idea in mind. First of all we need to get the
hotplug handler calls right and then think about how/where to move out
the actual PCI realization stuff. (hotplug handlers getting overwritten,
see below)

>> [...]
>>>>
>>>> Do you have other ideas?  
>>> I'd proceed with suggestions made earlier [1][2] on this thread.
>>> That should solve the issue at hand with out factoring out PCI unplug
>>> from old pci::unrealize(). One would have to introduce shim unplug
>>> handlers for pci/bridge/pcie that would call object_unparent(), but
>>> that's the extent of another shallow PCI re-factoring.
>>> Of cause that's assuming that sequence
>>>  1.  memory_device_unplug()
>>>  2.  pci_unplug()
>>> is correct in virtio-pmem-pci case.  
>>
>> That is indeed possible as long as the memory device part has to come
>> first. I'll give it a try.
>>
>> I already started prototyping and found some other PCI hotplug handler
>> issues I have to solve first ....
> I've been recently auditing plug/unplug parts across tree so if you have
> any question regarding it feel free to ping me.
> 

I guess its best to talk at KVM forum. There are plenty of things to
sort out before this can be considered clean :)

(most importantly the ACPI hotplug handler overwriting other hotplug
handlers and only registering after all devices have been coldplugged -
grml.). I have a basic prototype running, but that hotplug handler part
needs some more love.

Thanks!
diff mbox series

Patch

diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c
index 2253072d0e..e7a68d5160 100644
--- a/hw/core/hotplug.c
+++ b/hw/core/hotplug.c
@@ -45,6 +45,16 @@  void hotplug_handler_post_plug(HotplugHandler *plug_handler,
     }
 }
 
+void hotplug_handler_do_unplug(HotplugHandler *plug_handler,
+                                 DeviceState *plugged_dev)
+{
+    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler);
+
+    if (hdc->do_unplug) {
+        hdc->do_unplug(plug_handler, plugged_dev);
+    }
+}
+
 void hotplug_handler_unplug_request(HotplugHandler *plug_handler,
                                     DeviceState *plugged_dev,
                                     Error **errp)
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 36b788a66b..dde2726099 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -873,6 +873,12 @@  static void device_set_realized(Object *obj, bool value, Error **errp)
         }
     } else if (!value && dev->realized) {
         Error **local_errp = NULL;
+
+        hotplug_ctrl = qdev_get_hotplug_handler(dev);
+        if (hotplug_ctrl) {
+            hotplug_handler_do_unplug(hotplug_ctrl, dev);
+        }
+
         QLIST_FOREACH(bus, &dev->child_bus, sibling) {
             local_errp = local_err ? NULL : &local_err;
             object_property_set_bool(OBJECT(bus), false, "realized",
diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h
index 51541d63e1..2fa5833cf1 100644
--- a/include/hw/hotplug.h
+++ b/include/hw/hotplug.h
@@ -31,13 +31,21 @@  typedef struct HotplugHandler {
 
 /**
  * hotplug_fn:
- * @plug_handler: a device performing plug/uplug action
+ * @plug_handler: a device performing (un)plug action
  * @plugged_dev: a device that has been (un)plugged
  * @errp: returns an error if this function fails
  */
 typedef void (*hotplug_fn)(HotplugHandler *plug_handler,
                            DeviceState *plugged_dev, Error **errp);
 
+/**
+ * hotplug_fn_nofail:
+ * @plug_handler: a device performing un(plug) action
+ * @plugged_dev: a device that has been (un)plugged
+ */
+typedef void (*hotplug_fn_nofail)(HotplugHandler *plug_handler,
+                                  DeviceState *plugged_dev);
+
 /**
  * HotplugDeviceClass:
  *
@@ -49,12 +57,17 @@  typedef void (*hotplug_fn)(HotplugHandler *plug_handler,
  * @plug: plug callback called at end of device.realize(true).
  * @post_plug: post plug callback called after device.realize(true) and device
  *             reset
+ * @do_unplug: unplug callback called at start of device.realize(false)
  * @unplug_request: unplug request callback.
  *                  Used as a means to initiate device unplug for devices that
  *                  require asynchronous unplug handling.
  * @unplug: unplug callback.
  *          Used for device removal with devices that implement
  *          asynchronous and synchronous (surprise) removal.
+ * Note: unplug_request and unplug are only called for devices to initiate
+ *       unplug of a device hierarchy (e.g. triggered by device_del). For
+ *       devices that will be removed along with this device hierarchy only
+ *       do_unplug will be called (e.g. to unassign resources).
  */
 typedef struct HotplugHandlerClass {
     /* <private> */
@@ -63,7 +76,8 @@  typedef struct HotplugHandlerClass {
     /* <public> */
     hotplug_fn pre_plug;
     hotplug_fn plug;
-    void (*post_plug)(HotplugHandler *plug_handler, DeviceState *plugged_dev);
+    hotplug_fn_nofail post_plug;
+    hotplug_fn_nofail do_unplug;
     hotplug_fn unplug_request;
     hotplug_fn unplug;
 } HotplugHandlerClass;
@@ -94,6 +108,14 @@  void hotplug_handler_pre_plug(HotplugHandler *plug_handler,
 void hotplug_handler_post_plug(HotplugHandler *plug_handler,
                                DeviceState *plugged_dev);
 
+/**
+ * hotplug_handler_do_unplug:
+ *
+ * Call #HotplugHandlerClass.do_unplug callback of @plug_handler.
+ */
+void hotplug_handler_do_unplug(HotplugHandler *plug_handler,
+                               DeviceState *plugged_dev);
+
 /**
  * hotplug_handler_unplug_request:
  *