diff mbox

[V3] virtio: do not require IOMMU to be created in advance

Message ID 1488877751-13419-1-git-send-email-jasowang@redhat.com
State New
Headers show

Commit Message

Jason Wang March 7, 2017, 9:09 a.m. UTC
After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
after caching ring translations"), IOMMU was required to be created in
advance. This is because we can only get the correct dma_as after pci
IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
inconvenient for user. This patch releases this by:

- introduce a bus_master_ready method for PCIDeviceClass and trigger
  this during pci_init_bus_master
- implement virtio-pci method and 1) reset the dma_as 2) re-register
  the memory listener to the new dma_as

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes from V2:
- delay pci_init_bus_master() after pci device is realized to make
  bus_master_ready a more generic method
---
 hw/pci/pci.c               | 11 ++++++++---
 hw/virtio/virtio-pci.c     |  9 +++++++++
 hw/virtio/virtio.c         | 19 +++++++++++++++++++
 include/hw/pci/pci.h       |  1 +
 include/hw/virtio/virtio.h |  1 +
 5 files changed, 38 insertions(+), 3 deletions(-)

Comments

Peter Xu March 7, 2017, 9:19 a.m. UTC | #1
On Tue, Mar 07, 2017 at 05:09:11PM +0800, Jason Wang wrote:
> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> after caching ring translations"), IOMMU was required to be created in
> advance. This is because we can only get the correct dma_as after pci
> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> inconvenient for user. This patch releases this by:
> 
> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>   this during pci_init_bus_master
> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>   the memory listener to the new dma_as
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Reviewed-by: Peter Xu <peterx@redhat.com>
Marcel Apfelbaum March 7, 2017, 12:47 p.m. UTC | #2
On 03/07/2017 11:09 AM, Jason Wang wrote:
> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> after caching ring translations"), IOMMU was required to be created in
> advance. This is because we can only get the correct dma_as after pci
> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> inconvenient for user. This patch releases this by:
>
> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>   this during pci_init_bus_master
> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>   the memory listener to the new dma_as
>

Hi Jason,

> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes from V2:
> - delay pci_init_bus_master() after pci device is realized to make
>   bus_master_ready a more generic method
> ---
>  hw/pci/pci.c               | 11 ++++++++---
>  hw/virtio/virtio-pci.c     |  9 +++++++++
>  hw/virtio/virtio.c         | 19 +++++++++++++++++++
>  include/hw/pci/pci.h       |  1 +
>  include/hw/virtio/virtio.h |  1 +
>  5 files changed, 38 insertions(+), 3 deletions(-)
>
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 273f1e4..22e6ad9 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>  static void pci_init_bus_master(PCIDevice *pci_dev)
>  {
>      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>
>      memory_region_init_alias(&pci_dev->bus_master_enable_region,
>                               OBJECT(pci_dev), "bus master",
> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>      address_space_init(&pci_dev->bus_master_as,
>                         &pci_dev->bus_master_enable_region, pci_dev->name);
> +    if (pc->bus_master_ready) {
> +        pc->bus_master_ready(pci_dev);
> +    }
>  }
>
>  static void pcibus_machine_done(Notifier *notifier, void *data)
> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>      pci_dev->devfn = devfn;
>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>
> -    if (qdev_hotplug) {
> -        pci_init_bus_master(pci_dev);
> -    }
>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>      pci_dev->irq_state = 0;
>      pci_config_alloc(pci_dev);
> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>          }
>      }
>
> +    if (qdev_hotplug) {
> +        pci_init_bus_master(pci_dev);
> +    }
> +

How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
I suppose you want the code to run after the "realize" function?
If so, what happens if a "realize" function of another device needs the bus_master_as
(at hotplug time)?

Thanks,
Marcel

>      /* rom loading */
>      is_default_rom = false;
>      if (pci_dev->romfile == NULL && pc->romfile != NULL) {
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index b76f3f6..21cbda2 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1845,6 +1845,14 @@ static void virtio_pci_exit(PCIDevice *pci_dev)
>      address_space_destroy(&proxy->modern_as);
>  }
>
> +static void virtio_pci_bus_master_ready(PCIDevice *pci_dev)
> +{
> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
> +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> +
> +    virtio_device_reset_dma_as(vdev);
> +}
> +
>  static void virtio_pci_reset(DeviceState *qdev)
>  {
>      VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
> @@ -1904,6 +1912,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
>      dc->props = virtio_pci_properties;
>      k->realize = virtio_pci_realize;
>      k->exit = virtio_pci_exit;
> +    k->bus_master_ready = virtio_pci_bus_master_ready;
>      k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
>      k->revision = VIRTIO_PCI_ABI_VERSION;
>      k->class_id = PCI_CLASS_OTHERS;
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index efce4b3..09f4cf4 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -2594,6 +2594,25 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
>      return virtio_bus_ioeventfd_enabled(vbus);
>  }
>
> +void virtio_device_reset_dma_as(VirtIODevice *vdev)
> +{
> +    DeviceState *qdev = DEVICE(vdev);
> +    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
> +    VirtioBusState *bus = VIRTIO_BUS(qbus);
> +    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> +    bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> +
> +    if (klass->get_dma_as != NULL && has_iommu) {
> +        vdev->dma_as = klass->get_dma_as(qbus->parent);
> +    } else {
> +        vdev->dma_as = &address_space_memory;
> +    }
> +
> +    memory_listener_unregister(&vdev->listener);
> +    memory_listener_register(&vdev->listener, vdev->dma_as);
> +}
> +
> +
>  static const TypeInfo virtio_device_info = {
>      .name = TYPE_VIRTIO_DEVICE,
>      .parent = TYPE_DEVICE,
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 9349acb..e700a58 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -207,6 +207,7 @@ typedef struct PCIDeviceClass {
>
>      void (*realize)(PCIDevice *dev, Error **errp);
>      int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
> +    void (*bus_master_ready)(PCIDevice *dev);
>      PCIUnregisterFunc *exit;
>      PCIConfigReadFunc *config_read;
>      PCIConfigWriteFunc *config_write;
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 15efcf2..21fa273 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -283,6 +283,7 @@ void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
>  int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
>  void virtio_device_release_ioeventfd(VirtIODevice *vdev);
>  bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
> +void virtio_device_reset_dma_as(VirtIODevice *vdev);
>  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
>  void virtio_queue_host_notifier_read(EventNotifier *n);
>  void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
>
Peter Xu March 8, 2017, 2:43 a.m. UTC | #3
On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
> On 03/07/2017 11:09 AM, Jason Wang wrote:
> >After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> >after caching ring translations"), IOMMU was required to be created in
> >advance. This is because we can only get the correct dma_as after pci
> >IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> >inconvenient for user. This patch releases this by:
> >
> >- introduce a bus_master_ready method for PCIDeviceClass and trigger
> >  this during pci_init_bus_master
> >- implement virtio-pci method and 1) reset the dma_as 2) re-register
> >  the memory listener to the new dma_as
> >
> 
> Hi Jason,
> 
> >Cc: Paolo Bonzini <pbonzini@redhat.com>
> >Signed-off-by: Jason Wang <jasowang@redhat.com>
> >---
> >Changes from V2:
> >- delay pci_init_bus_master() after pci device is realized to make
> >  bus_master_ready a more generic method
> >---
> > hw/pci/pci.c               | 11 ++++++++---
> > hw/virtio/virtio-pci.c     |  9 +++++++++
> > hw/virtio/virtio.c         | 19 +++++++++++++++++++
> > include/hw/pci/pci.h       |  1 +
> > include/hw/virtio/virtio.h |  1 +
> > 5 files changed, 38 insertions(+), 3 deletions(-)
> >
> >diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >index 273f1e4..22e6ad9 100644
> >--- a/hw/pci/pci.c
> >+++ b/hw/pci/pci.c
> >@@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
> > static void pci_init_bus_master(PCIDevice *pci_dev)
> > {
> >     AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> >+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
> >
> >     memory_region_init_alias(&pci_dev->bus_master_enable_region,
> >                              OBJECT(pci_dev), "bus master",
> >@@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
> >     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> >     address_space_init(&pci_dev->bus_master_as,
> >                        &pci_dev->bus_master_enable_region, pci_dev->name);
> >+    if (pc->bus_master_ready) {
> >+        pc->bus_master_ready(pci_dev);
> >+    }
> > }
> >
> > static void pcibus_machine_done(Notifier *notifier, void *data)
> >@@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> >     pci_dev->devfn = devfn;
> >     pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
> >
> >-    if (qdev_hotplug) {
> >-        pci_init_bus_master(pci_dev);
> >-    }
> >     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
> >     pci_dev->irq_state = 0;
> >     pci_config_alloc(pci_dev);
> >@@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >         }
> >     }
> >
> >+    if (qdev_hotplug) {
> >+        pci_init_bus_master(pci_dev);
> >+    }
> >+
> 
> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
> I suppose you want the code to run after the "realize" function?
> If so, what happens if a "realize" function of another device needs the bus_master_as
> (at hotplug time)?

My unmature understanding is that, we can just call
pci_device_iommu_address_space() if device realization really needs
the address space, rather than using bus_master_as.

An example is vfio-pci device. It is using
pci_device_iommu_address_space() in vfio_realize(). Though I _guess_
there may be other reasons behind, e.g., VFIOAddressSpace should be
1:1 mapped to bus master address space, so we may want to make sure
this address space will be the same when different devices are using
the same address space (while bus_master_as is one per device, even if
two devices have the same backend address space, there will be two
distinct bus_master_as).

IIUC, bus_master_as is only used to emulate the master bit in PCI
command register. In that case, that should only be there for the
guest operations, while imho device realization is "emulation code",
which can directly use pci_device_iommu_address_space(). Actually, if
it plays with bus_master_as even if it can, I guess it'll just fail
since that region has not yet been enabled.

Please kindly correct me if I made a mistake...

Thanks,

-- peterx
Jason Wang March 8, 2017, 3:15 a.m. UTC | #4
On 2017年03月08日 10:43, Peter Xu wrote:
> On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>> after caching ring translations"), IOMMU was required to be created in
>>> advance. This is because we can only get the correct dma_as after pci
>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>> inconvenient for user. This patch releases this by:
>>>
>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>   this during pci_init_bus_master
>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>   the memory listener to the new dma_as
>>>
>> Hi Jason,
>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> ---
>>> Changes from V2:
>>> - delay pci_init_bus_master() after pci device is realized to make
>>>   bus_master_ready a more generic method
>>> ---
>>> hw/pci/pci.c               | 11 ++++++++---
>>> hw/virtio/virtio-pci.c     |  9 +++++++++
>>> hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>> include/hw/pci/pci.h       |  1 +
>>> include/hw/virtio/virtio.h |  1 +
>>> 5 files changed, 38 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index 273f1e4..22e6ad9 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>> static void pci_init_bus_master(PCIDevice *pci_dev)
>>> {
>>>      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>
>>>      memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>                               OBJECT(pci_dev), "bus master",
>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>      address_space_init(&pci_dev->bus_master_as,
>>>                         &pci_dev->bus_master_enable_region, pci_dev->name);
>>> +    if (pc->bus_master_ready) {
>>> +        pc->bus_master_ready(pci_dev);
>>> +    }
>>> }
>>>
>>> static void pcibus_machine_done(Notifier *notifier, void *data)
>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>      pci_dev->devfn = devfn;
>>>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>
>>> -    if (qdev_hotplug) {
>>> -        pci_init_bus_master(pci_dev);
>>> -    }
>>>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>      pci_dev->irq_state = 0;
>>>      pci_config_alloc(pci_dev);
>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>          }
>>>      }
>>>
>>> +    if (qdev_hotplug) {
>>> +        pci_init_bus_master(pci_dev);
>>> +    }
>>> +
>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>> I suppose you want the code to run after the "realize" function?

Yes.

>> If so, what happens if a "realize" function of another device needs the bus_master_as
>> (at hotplug time)?

I'm not sure this is really needed. What kind of device need to check 
hotplug during their realize? (Looks like we don't have such kind of 
device now).

> My unmature understanding is that, we can just call
> pci_device_iommu_address_space() if device realization really needs
> the address space, rather than using bus_master_as.

A little issue is pci_device_iommu_address_space() can be wrong if it 
was called before OMMU was created.

Thanks

>
> An example is vfio-pci device. It is using
> pci_device_iommu_address_space() in vfio_realize(). Though I _guess_
> there may be other reasons behind, e.g., VFIOAddressSpace should be
> 1:1 mapped to bus master address space, so we may want to make sure
> this address space will be the same when different devices are using
> the same address space (while bus_master_as is one per device, even if
> two devices have the same backend address space, there will be two
> distinct bus_master_as).
>
> IIUC, bus_master_as is only used to emulate the master bit in PCI
> command register. In that case, that should only be there for the
> guest operations, while imho device realization is "emulation code",
> which can directly use pci_device_iommu_address_space(). Actually, if
> it plays with bus_master_as even if it can, I guess it'll just fail
> since that region has not yet been enabled.
>
> Please kindly correct me if I made a mistake...
>
> Thanks,
>
> -- peterx
Marcel Apfelbaum March 8, 2017, 8:17 a.m. UTC | #5
On 03/08/2017 05:15 AM, Jason Wang wrote:
>
>
> On 2017年03月08日 10:43, Peter Xu wrote:
>> On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>> after caching ring translations"), IOMMU was required to be created in
>>>> advance. This is because we can only get the correct dma_as after pci
>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>> inconvenient for user. This patch releases this by:
>>>>
>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>   this during pci_init_bus_master
>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>   the memory listener to the new dma_as
>>>>
>>> Hi Jason,
>>>
>>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>> ---
>>>> Changes from V2:
>>>> - delay pci_init_bus_master() after pci device is realized to make
>>>>   bus_master_ready a more generic method
>>>> ---
>>>> hw/pci/pci.c               | 11 ++++++++---
>>>> hw/virtio/virtio-pci.c     |  9 +++++++++
>>>> hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>>> include/hw/pci/pci.h       |  1 +
>>>> include/hw/virtio/virtio.h |  1 +
>>>> 5 files changed, 38 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index 273f1e4..22e6ad9 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>>> static void pci_init_bus_master(PCIDevice *pci_dev)
>>>> {
>>>>      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>>
>>>>      memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>>                               OBJECT(pci_dev), "bus master",
>>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>>      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>>      address_space_init(&pci_dev->bus_master_as,
>>>>                         &pci_dev->bus_master_enable_region, pci_dev->name);
>>>> +    if (pc->bus_master_ready) {
>>>> +        pc->bus_master_ready(pci_dev);
>>>> +    }
>>>> }
>>>>
>>>> static void pcibus_machine_done(Notifier *notifier, void *data)
>>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>>      pci_dev->devfn = devfn;
>>>>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>>
>>>> -    if (qdev_hotplug) {
>>>> -        pci_init_bus_master(pci_dev);
>>>> -    }
>>>>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>>      pci_dev->irq_state = 0;
>>>>      pci_config_alloc(pci_dev);
>>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>>          }
>>>>      }
>>>>
>>>> +    if (qdev_hotplug) {
>>>> +        pci_init_bus_master(pci_dev);
>>>> +    }
>>>> +
>>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>>> I suppose you want the code to run after the "realize" function?
>
> Yes.
>
>>> If so, what happens if a "realize" function of another device needs the bus_master_as
>>> (at hotplug time)?
>
> I'm not sure this is really needed. What kind of device need to check hotplug during their realize? (Looks like we don't have such kind of device now).

I am sorry I was not clear enough:
If a device is added after the system is up (hotplug), we cannot depend on the "machine_done"
event to enable "bus master".
This is why we have
    if (qdev_hotplug)
         pci_init_bus_master(pci_dev);

The code you proposed changes the order, so this call is done *after* realize.

My question was: What if any other device may require the bus_master_as
at realize time (and can be hot-plugged) ?
For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
and caches the bus_master_as.

It is possible the mentioned devices can't be hot-plugged or
are not relevant for PCIe.

I am only saying we should double check when making such a change.

>
>> My unmature understanding is that, we can just call
>> pci_device_iommu_address_space() if device realization really needs
>> the address space, rather than using bus_master_as.
>
> A little issue is pci_device_iommu_address_space() can be wrong if it was called before OMMU was created.
>

At hotplug time this is irrelevant because the iommu has already been created.

Thanks,
Marcel

> Thanks
>
>>
>> An example is vfio-pci device. It is using
>> pci_device_iommu_address_space() in vfio_realize(). Though I _guess_
>> there may be other reasons behind, e.g., VFIOAddressSpace should be
>> 1:1 mapped to bus master address space, so we may want to make sure
>> this address space will be the same when different devices are using
>> the same address space (while bus_master_as is one per device, even if
>> two devices have the same backend address space, there will be two
>> distinct bus_master_as).
>>
>> IIUC, bus_master_as is only used to emulate the master bit in PCI
>> command register. In that case, that should only be there for the
>> guest operations, while imho device realization is "emulation code",
>> which can directly use pci_device_iommu_address_space(). Actually, if
>> it plays with bus_master_as even if it can, I guess it'll just fail
>> since that region has not yet been enabled.
>>
>> Please kindly correct me if I made a mistake...
>>
>> Thanks,
>>
>> -- peterx
>
Marcel Apfelbaum March 8, 2017, 8:24 a.m. UTC | #6
On 03/08/2017 04:43 AM, Peter Xu wrote:
> On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>> after caching ring translations"), IOMMU was required to be created in
>>> advance. This is because we can only get the correct dma_as after pci
>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>> inconvenient for user. This patch releases this by:
>>>
>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>  this during pci_init_bus_master
>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>  the memory listener to the new dma_as
>>>
>>
>> Hi Jason,
>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> ---
>>> Changes from V2:
>>> - delay pci_init_bus_master() after pci device is realized to make
>>>  bus_master_ready a more generic method
>>> ---
>>> hw/pci/pci.c               | 11 ++++++++---
>>> hw/virtio/virtio-pci.c     |  9 +++++++++
>>> hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>> include/hw/pci/pci.h       |  1 +
>>> include/hw/virtio/virtio.h |  1 +
>>> 5 files changed, 38 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index 273f1e4..22e6ad9 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>> static void pci_init_bus_master(PCIDevice *pci_dev)
>>> {
>>>     AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>
>>>     memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>                              OBJECT(pci_dev), "bus master",
>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>     address_space_init(&pci_dev->bus_master_as,
>>>                        &pci_dev->bus_master_enable_region, pci_dev->name);
>>> +    if (pc->bus_master_ready) {
>>> +        pc->bus_master_ready(pci_dev);
>>> +    }
>>> }
>>>
>>> static void pcibus_machine_done(Notifier *notifier, void *data)
>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>     pci_dev->devfn = devfn;
>>>     pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>
>>> -    if (qdev_hotplug) {
>>> -        pci_init_bus_master(pci_dev);
>>> -    }
>>>     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>     pci_dev->irq_state = 0;
>>>     pci_config_alloc(pci_dev);
>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>         }
>>>     }
>>>
>>> +    if (qdev_hotplug) {
>>> +        pci_init_bus_master(pci_dev);
>>> +    }
>>> +
>>
>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>> I suppose you want the code to run after the "realize" function?
>> If so, what happens if a "realize" function of another device needs the bus_master_as
>> (at hotplug time)?
>
> My unmature understanding is that, we can just call
> pci_device_iommu_address_space() if device realization really needs
> the address space, rather than using bus_master_as.
>

Yes, each device that support IOMMU needs to call it instead of bus_master_as.
As if it really needs this information at realize time is debatable.
The vfio-pci is a special case and Alex Williamson explain why it is needed
at the "realize" time.


> An example is vfio-pci device. It is using
> pci_device_iommu_address_space() in vfio_realize(). Though I _guess_
> there may be other reasons behind, e.g., VFIOAddressSpace should be
> 1:1 mapped to bus master address space, so we may want to make sure
> this address space will be the same when different devices are using
> the same address space (while bus_master_as is one per device, even if
> two devices have the same backend address space, there will be two
> distinct bus_master_as).
>

Understood

> IIUC, bus_master_as is only used to emulate the master bit in PCI
> command register. In that case, that should only be there for the
> guest operations, while imho device realization is "emulation code",
> which can directly use pci_device_iommu_address_space(). Actually, if
> it plays with bus_master_as even if it can, I guess it'll just fail
> since that region has not yet been enabled.
>
> Please kindly correct me if I made a mistake...
>

You are right the device does not need the bus master address space before the
VM is up, this is we the init process of this region is delayed until machine done.


My question was why do you need to move pci_init_bus_master after device realization
at hotplug time and how would influence the other devices.


Thanks,
Marcel

> Thanks,
>
> -- peterx
>
Peter Xu March 8, 2017, 9:09 a.m. UTC | #7
On Wed, Mar 08, 2017 at 10:17:09AM +0200, Marcel Apfelbaum wrote:

[...]

> 
> I am sorry I was not clear enough:
> If a device is added after the system is up (hotplug), we cannot depend on the "machine_done"
> event to enable "bus master".
> This is why we have
>    if (qdev_hotplug)
>         pci_init_bus_master(pci_dev);
> 
> The code you proposed changes the order, so this call is done *after* realize.
> 
> My question was: What if any other device may require the bus_master_as
> at realize time (and can be hot-plugged) ?
> For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
> and caches the bus_master_as.

Oh, I didn't notice that there are other devices that used
bus_master_as during realization. If so... Would this really work even
without hot plug? Considering that bus_master_as won't be inited until
machine done phase?

-- peterx
Peter Xu March 8, 2017, 9:16 a.m. UTC | #8
On Wed, Mar 08, 2017 at 05:09:45PM +0800, Peter Xu wrote:
> On Wed, Mar 08, 2017 at 10:17:09AM +0200, Marcel Apfelbaum wrote:
> 
> [...]
> 
> > 
> > I am sorry I was not clear enough:
> > If a device is added after the system is up (hotplug), we cannot depend on the "machine_done"
> > event to enable "bus master".
> > This is why we have
> >    if (qdev_hotplug)
> >         pci_init_bus_master(pci_dev);
> > 
> > The code you proposed changes the order, so this call is done *after* realize.
> > 
> > My question was: What if any other device may require the bus_master_as
> > at realize time (and can be hot-plugged) ?
> > For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
> > and caches the bus_master_as.
> 
> Oh, I didn't notice that there are other devices that used
> bus_master_as during realization. If so... Would this really work even
> without hot plug? Considering that bus_master_as won't be inited until
> machine done phase?

Please ignore my question... I think the answer is that these devices
are only caching the pointer of bus_master_as. So it won't really use
the address space before machine_done.

If so, IMHO moving pci_init_bus_master() after device specific
realize() is okay as well then, right?

Thanks,

-- peterx
Jason Wang March 8, 2017, 9:57 a.m. UTC | #9
On 2017年03月08日 16:17, Marcel Apfelbaum wrote:
> On 03/08/2017 05:15 AM, Jason Wang wrote:
>>
>>
>> On 2017年03月08日 10:43, Peter Xu wrote:
>>> On Tue, Mar 07, 2017 at 02:47:30PM +0200, Marcel Apfelbaum wrote:
>>>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>>>> after caching ring translations"), IOMMU was required to be 
>>>>> created in
>>>>> advance. This is because we can only get the correct dma_as after pci
>>>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>>>> inconvenient for user. This patch releases this by:
>>>>>
>>>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>>>   this during pci_init_bus_master
>>>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>>>   the memory listener to the new dma_as
>>>>>
>>>> Hi Jason,
>>>>
>>>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>> ---
>>>>> Changes from V2:
>>>>> - delay pci_init_bus_master() after pci device is realized to make
>>>>>   bus_master_ready a more generic method
>>>>> ---
>>>>> hw/pci/pci.c               | 11 ++++++++---
>>>>> hw/virtio/virtio-pci.c     |  9 +++++++++
>>>>> hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>>>> include/hw/pci/pci.h       |  1 +
>>>>> include/hw/virtio/virtio.h |  1 +
>>>>> 5 files changed, 38 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>> index 273f1e4..22e6ad9 100644
>>>>> --- a/hw/pci/pci.c
>>>>> +++ b/hw/pci/pci.c
>>>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>>>> static void pci_init_bus_master(PCIDevice *pci_dev)
>>>>> {
>>>>>      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>>>
>>>>> memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>>>                               OBJECT(pci_dev), "bus master",
>>>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>>> memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>>>      address_space_init(&pci_dev->bus_master_as,
>>>>> &pci_dev->bus_master_enable_region, pci_dev->name);
>>>>> +    if (pc->bus_master_ready) {
>>>>> +        pc->bus_master_ready(pci_dev);
>>>>> +    }
>>>>> }
>>>>>
>>>>> static void pcibus_machine_done(Notifier *notifier, void *data)
>>>>> @@ -995,9 +999,6 @@ static PCIDevice 
>>>>> *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>>>      pci_dev->devfn = devfn;
>>>>>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>>>
>>>>> -    if (qdev_hotplug) {
>>>>> -        pci_init_bus_master(pci_dev);
>>>>> -    }
>>>>>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>>>      pci_dev->irq_state = 0;
>>>>>      pci_config_alloc(pci_dev);
>>>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState 
>>>>> *qdev, Error **errp)
>>>>>          }
>>>>>      }
>>>>>
>>>>> +    if (qdev_hotplug) {
>>>>> +        pci_init_bus_master(pci_dev);
>>>>> +    }
>>>>> +
>>>> How does it help if we move qdev_hotplug check outside 
>>>> "do_pci_register_device"?
>>>> I suppose you want the code to run after the "realize" function?
>>
>> Yes.
>>
>>>> If so, what happens if a "realize" function of another device needs 
>>>> the bus_master_as
>>>> (at hotplug time)?
>>
>> I'm not sure this is really needed. What kind of device need to check 
>> hotplug during their realize? (Looks like we don't have such kind of 
>> device now).
>
> I am sorry I was not clear enough:
> If a device is added after the system is up (hotplug), we cannot 
> depend on the "machine_done"
> event to enable "bus master".
> This is why we have
>    if (qdev_hotplug)
>         pci_init_bus_master(pci_dev);
>
> The code you proposed changes the order, so this call is done *after* 
> realize.
>
> My question was: What if any other device may require the bus_master_as
> at realize time (and can be hot-plugged) ?
> For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
> and caches the bus_master_as.
>
> It is possible the mentioned devices can't be hot-plugged or
> are not relevant for PCIe.
>
> I am only saying we should double check when making such a change.

Can't agree more.

For hcd-ehci/hcd-ohci, simply caching the as should be fine. But if they 
want to more things like virito, they should delay it to bus_master_ready.

>
>>
>>> My unmature understanding is that, we can just call
>>> pci_device_iommu_address_space() if device realization really needs
>>> the address space, rather than using bus_master_as.
>>
>> A little issue is pci_device_iommu_address_space() can be wrong if it 
>> was called before OMMU was created.
>>
>
> At hotplug time this is irrelevant because the iommu has already been 
> created.
>
> Thanks,
> Marcel

Yes.

Thanks
Marcel Apfelbaum March 8, 2017, 1:14 p.m. UTC | #10
On 03/08/2017 11:16 AM, Peter Xu wrote:
> On Wed, Mar 08, 2017 at 05:09:45PM +0800, Peter Xu wrote:
>> On Wed, Mar 08, 2017 at 10:17:09AM +0200, Marcel Apfelbaum wrote:
>>
>> [...]
>>
>>>
>>> I am sorry I was not clear enough:
>>> If a device is added after the system is up (hotplug), we cannot depend on the "machine_done"
>>> event to enable "bus master".
>>> This is why we have
>>>    if (qdev_hotplug)
>>>         pci_init_bus_master(pci_dev);
>>>
>>> The code you proposed changes the order, so this call is done *after* realize.
>>>
>>> My question was: What if any other device may require the bus_master_as
>>> at realize time (and can be hot-plugged) ?
>>> For example: hcd-ehci/hcd-ohci devices call pci_get_address_space()
>>> and caches the bus_master_as.
>>
>> Oh, I didn't notice that there are other devices that used
>> bus_master_as during realization. If so... Would this really work even
>> without hot plug? Considering that bus_master_as won't be inited until
>> machine done phase?
>
> Please ignore my question... I think the answer is that these devices
> are only caching the pointer of bus_master_as. So it won't really use
> the address space before machine_done.
>
> If so, IMHO moving pci_init_bus_master() after device specific
> realize() is okay as well then, right?
>

It should work, right. But you may want to double-check anyway :)

Thanks,
Marcel

> Thanks,
>
> -- peterx
>
Igor Mammedov March 8, 2017, 4:40 p.m. UTC | #11
On Tue, 7 Mar 2017 14:47:30 +0200
Marcel Apfelbaum <marcel@redhat.com> wrote:

> On 03/07/2017 11:09 AM, Jason Wang wrote:
> > After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> > after caching ring translations"), IOMMU was required to be created in
> > advance. This is because we can only get the correct dma_as after pci
> > IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> > inconvenient for user. This patch releases this by:
> >
> > - introduce a bus_master_ready method for PCIDeviceClass and trigger
> >   this during pci_init_bus_master
> > - implement virtio-pci method and 1) reset the dma_as 2) re-register
> >   the memory listener to the new dma_as
> >  
> 
> Hi Jason,
> 
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Changes from V2:
> > - delay pci_init_bus_master() after pci device is realized to make
> >   bus_master_ready a more generic method
> > ---
> >  hw/pci/pci.c               | 11 ++++++++---
> >  hw/virtio/virtio-pci.c     |  9 +++++++++
> >  hw/virtio/virtio.c         | 19 +++++++++++++++++++
> >  include/hw/pci/pci.h       |  1 +
> >  include/hw/virtio/virtio.h |  1 +
> >  5 files changed, 38 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index 273f1e4..22e6ad9 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
> >  static void pci_init_bus_master(PCIDevice *pci_dev)
> >  {
> >      AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> > +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
> >
> >      memory_region_init_alias(&pci_dev->bus_master_enable_region,
> >                               OBJECT(pci_dev), "bus master",
> > @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
> >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> >      address_space_init(&pci_dev->bus_master_as,
> >                         &pci_dev->bus_master_enable_region, pci_dev->name);
> > +    if (pc->bus_master_ready) {
> > +        pc->bus_master_ready(pci_dev);
> > +    }
> >  }
> >
> >  static void pcibus_machine_done(Notifier *notifier, void *data)
> > @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> >      pci_dev->devfn = devfn;
> >      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
> >
> > -    if (qdev_hotplug) {
> > -        pci_init_bus_master(pci_dev);
> > -    }
> >      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
> >      pci_dev->irq_state = 0;
> >      pci_config_alloc(pci_dev);
> > @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >          }
> >      }
> >
> > +    if (qdev_hotplug) {
> > +        pci_init_bus_master(pci_dev);
> > +    }
> > +  
> 
> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
> I suppose you want the code to run after the "realize" function?
> If so, what happens if a "realize" function of another device needs the bus_master_as
> (at hotplug time)?

Conceptually,
I'm not sure that inherited device class realize
should be aware of uninitialized bus_master,
which belongs to PCI device, nor should it initialize
it by calling pci_init_bus_master() explicitly,
it's parent's class job (PCIDevice).

more close to current code:
if xen-pci-passthrough were hotplugged then it looks like
this hunk could break it:
hw/xen/xen_pt.c:
 memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
would happen with uninitialized bus_master_as
as realize is called before pci_init_bus_master();

So the same question as Marcel's but other way around,
why this hunk "has to" be moved.


> 
> Thanks,
> Marcel
> 
> >      /* rom loading */
> >      is_default_rom = false;
> >      if (pci_dev->romfile == NULL && pc->romfile != NULL) {
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index b76f3f6..21cbda2 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -1845,6 +1845,14 @@ static void virtio_pci_exit(PCIDevice *pci_dev)
> >      address_space_destroy(&proxy->modern_as);
> >  }
> >
> > +static void virtio_pci_bus_master_ready(PCIDevice *pci_dev)
> > +{
> > +    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
> > +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> > +
> > +    virtio_device_reset_dma_as(vdev);
> > +}
> > +
> >  static void virtio_pci_reset(DeviceState *qdev)
> >  {
> >      VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
> > @@ -1904,6 +1912,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
> >      dc->props = virtio_pci_properties;
> >      k->realize = virtio_pci_realize;
> >      k->exit = virtio_pci_exit;
> > +    k->bus_master_ready = virtio_pci_bus_master_ready;
> >      k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
> >      k->revision = VIRTIO_PCI_ABI_VERSION;
> >      k->class_id = PCI_CLASS_OTHERS;
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index efce4b3..09f4cf4 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -2594,6 +2594,25 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
> >      return virtio_bus_ioeventfd_enabled(vbus);
> >  }
> >
> > +void virtio_device_reset_dma_as(VirtIODevice *vdev)
> > +{
> > +    DeviceState *qdev = DEVICE(vdev);
> > +    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
> > +    VirtioBusState *bus = VIRTIO_BUS(qbus);
> > +    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> > +    bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> > +
> > +    if (klass->get_dma_as != NULL && has_iommu) {
> > +        vdev->dma_as = klass->get_dma_as(qbus->parent);
> > +    } else {
> > +        vdev->dma_as = &address_space_memory;
> > +    }
> > +
> > +    memory_listener_unregister(&vdev->listener);
> > +    memory_listener_register(&vdev->listener, vdev->dma_as);
> > +}
this mostly subset of virtio_bus_device_plugged(), generalize?

> > +
> > +
> >  static const TypeInfo virtio_device_info = {
> >      .name = TYPE_VIRTIO_DEVICE,
> >      .parent = TYPE_DEVICE,
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index 9349acb..e700a58 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -207,6 +207,7 @@ typedef struct PCIDeviceClass {
> >
> >      void (*realize)(PCIDevice *dev, Error **errp);
> >      int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
> > +    void (*bus_master_ready)(PCIDevice *dev);
> >      PCIUnregisterFunc *exit;
> >      PCIConfigReadFunc *config_read;
> >      PCIConfigWriteFunc *config_write;
> > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > index 15efcf2..21fa273 100644
> > --- a/include/hw/virtio/virtio.h
> > +++ b/include/hw/virtio/virtio.h
> > @@ -283,6 +283,7 @@ void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
> >  int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
> >  void virtio_device_release_ioeventfd(VirtIODevice *vdev);
> >  bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
> > +void virtio_device_reset_dma_as(VirtIODevice *vdev);
> >  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
> >  void virtio_queue_host_notifier_read(EventNotifier *n);
> >  void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
> >  
> 
>
Jason Wang March 9, 2017, 2:32 a.m. UTC | #12
On 2017年03月09日 00:40, Igor Mammedov wrote:
> On Tue, 7 Mar 2017 14:47:30 +0200
> Marcel Apfelbaum<marcel@redhat.com>  wrote:
>
>> On 03/07/2017 11:09 AM, Jason Wang wrote:
>>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
>>> after caching ring translations"), IOMMU was required to be created in
>>> advance. This is because we can only get the correct dma_as after pci
>>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
>>> inconvenient for user. This patch releases this by:
>>>
>>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
>>>    this during pci_init_bus_master
>>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
>>>    the memory listener to the new dma_as
>>>   
>> Hi Jason,
>>
>>> Cc: Paolo Bonzini<pbonzini@redhat.com>
>>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>>> ---
>>> Changes from V2:
>>> - delay pci_init_bus_master() after pci device is realized to make
>>>    bus_master_ready a more generic method
>>> ---
>>>   hw/pci/pci.c               | 11 ++++++++---
>>>   hw/virtio/virtio-pci.c     |  9 +++++++++
>>>   hw/virtio/virtio.c         | 19 +++++++++++++++++++
>>>   include/hw/pci/pci.h       |  1 +
>>>   include/hw/virtio/virtio.h |  1 +
>>>   5 files changed, 38 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index 273f1e4..22e6ad9 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
>>>   static void pci_init_bus_master(PCIDevice *pci_dev)
>>>   {
>>>       AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
>>>
>>>       memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>>                                OBJECT(pci_dev), "bus master",
>>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
>>>       memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>>>       address_space_init(&pci_dev->bus_master_as,
>>>                          &pci_dev->bus_master_enable_region, pci_dev->name);
>>> +    if (pc->bus_master_ready) {
>>> +        pc->bus_master_ready(pci_dev);
>>> +    }
>>>   }
>>>
>>>   static void pcibus_machine_done(Notifier *notifier, void *data)
>>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>       pci_dev->devfn = devfn;
>>>       pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>>
>>> -    if (qdev_hotplug) {
>>> -        pci_init_bus_master(pci_dev);
>>> -    }
>>>       pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>       pci_dev->irq_state = 0;
>>>       pci_config_alloc(pci_dev);
>>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>           }
>>>       }
>>>
>>> +    if (qdev_hotplug) {
>>> +        pci_init_bus_master(pci_dev);
>>> +    }
>>> +
>> How does it help if we move qdev_hotplug check outside "do_pci_register_device"?
>> I suppose you want the code to run after the "realize" function?
>> If so, what happens if a "realize" function of another device needs the bus_master_as
>> (at hotplug time)?
> Conceptually,
> I'm not sure that inherited device class realize
> should be aware of uninitialized bus_master,
> which belongs to PCI device, nor should it initialize
> it by calling pci_init_bus_master() explicitly,
> it's parent's class job (PCIDevice).

Yes, I was trying to propose a workaround for 2.9. I'm sure we will 
refine the ordering in 2.10. And consider we have asked libvirt to 
create IOMMU first, I think I will withdraw the patch.

>
> more close to current code:
> if xen-pci-passthrough were hotplugged then it looks like
> this hunk could break it:
> hw/xen/xen_pt.c:
>   memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
> would happen with uninitialized bus_master_as
> as realize is called before pci_init_bus_master();

Yes, this won't work. This is exactly the same issue as virtio, but this 
will also break if it was created before an IOMMU.

>
> So the same question as Marcel's but other way around,
> why this hunk "has to" be moved.
>
>

Right.

Thanks
diff mbox

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 273f1e4..22e6ad9 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -83,6 +83,7 @@  static const VMStateDescription vmstate_pcibus = {
 static void pci_init_bus_master(PCIDevice *pci_dev)
 {
     AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
 
     memory_region_init_alias(&pci_dev->bus_master_enable_region,
                              OBJECT(pci_dev), "bus master",
@@ -90,6 +91,9 @@  static void pci_init_bus_master(PCIDevice *pci_dev)
     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
     address_space_init(&pci_dev->bus_master_as,
                        &pci_dev->bus_master_enable_region, pci_dev->name);
+    if (pc->bus_master_ready) {
+        pc->bus_master_ready(pci_dev);
+    }
 }
 
 static void pcibus_machine_done(Notifier *notifier, void *data)
@@ -995,9 +999,6 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pci_dev->devfn = devfn;
     pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
 
-    if (qdev_hotplug) {
-        pci_init_bus_master(pci_dev);
-    }
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
     pci_config_alloc(pci_dev);
@@ -1995,6 +1996,10 @@  static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         }
     }
 
+    if (qdev_hotplug) {
+        pci_init_bus_master(pci_dev);
+    }
+
     /* rom loading */
     is_default_rom = false;
     if (pci_dev->romfile == NULL && pc->romfile != NULL) {
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index b76f3f6..21cbda2 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1845,6 +1845,14 @@  static void virtio_pci_exit(PCIDevice *pci_dev)
     address_space_destroy(&proxy->modern_as);
 }
 
+static void virtio_pci_bus_master_ready(PCIDevice *pci_dev)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    virtio_device_reset_dma_as(vdev);
+}
+
 static void virtio_pci_reset(DeviceState *qdev)
 {
     VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
@@ -1904,6 +1912,7 @@  static void virtio_pci_class_init(ObjectClass *klass, void *data)
     dc->props = virtio_pci_properties;
     k->realize = virtio_pci_realize;
     k->exit = virtio_pci_exit;
+    k->bus_master_ready = virtio_pci_bus_master_ready;
     k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
     k->revision = VIRTIO_PCI_ABI_VERSION;
     k->class_id = PCI_CLASS_OTHERS;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index efce4b3..09f4cf4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2594,6 +2594,25 @@  bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
     return virtio_bus_ioeventfd_enabled(vbus);
 }
 
+void virtio_device_reset_dma_as(VirtIODevice *vdev)
+{
+    DeviceState *qdev = DEVICE(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
+    VirtioBusState *bus = VIRTIO_BUS(qbus);
+    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
+    bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+
+    if (klass->get_dma_as != NULL && has_iommu) {
+        vdev->dma_as = klass->get_dma_as(qbus->parent);
+    } else {
+        vdev->dma_as = &address_space_memory;
+    }
+
+    memory_listener_unregister(&vdev->listener);
+    memory_listener_register(&vdev->listener, vdev->dma_as);
+}
+
+
 static const TypeInfo virtio_device_info = {
     .name = TYPE_VIRTIO_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 9349acb..e700a58 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -207,6 +207,7 @@  typedef struct PCIDeviceClass {
 
     void (*realize)(PCIDevice *dev, Error **errp);
     int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
+    void (*bus_master_ready)(PCIDevice *dev);
     PCIUnregisterFunc *exit;
     PCIConfigReadFunc *config_read;
     PCIConfigWriteFunc *config_write;
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 15efcf2..21fa273 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -283,6 +283,7 @@  void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
 int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
 void virtio_device_release_ioeventfd(VirtIODevice *vdev);
 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
+void virtio_device_reset_dma_as(VirtIODevice *vdev);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
 void virtio_queue_host_notifier_read(EventNotifier *n);
 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,