diff mbox series

[v2] vhost: Warn if DEVIOTLB_UNMAP is not supported and ats is set

Message ID 20221019122717.1259803-1-eric.auger@redhat.com
State New
Headers show
Series [v2] vhost: Warn if DEVIOTLB_UNMAP is not supported and ats is set | expand

Commit Message

Eric Auger Oct. 19, 2022, 12:27 p.m. UTC
Since b68ba1ca5767 ("memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP
IOMMUTLBNotificationType"), vhost attempts to register DEVIOTLB_UNMAP
notifier. This latter is supported by the intel-iommu which supports
device-iotlb if the corresponding option is set. Then 958ec334bca3
("vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support") allowed
silent fallback to the legacy UNMAP notifier if the viommu does not
support device iotlb.

Initially vhost/viommu integration was introduced with intel iommu
assuming ats=on was set on virtio-pci device and device-iotlb was set
on the intel iommu. vhost acts as an ATS capable device since it
implements an IOTLB on kernel side. However translated transactions
that hit the device IOTLB do not transit through the vIOMMU. So this
requires a limited ATS support on viommu side. Anyway this assumed
ATS was eventually enabled .

But neither SMMUv3 nor virtio-iommu do support ATS and the integration
with vhost just relies on the fact those vIOMMU send UNMAP notifications
whenever the guest trigger them. This works without ATS being enabled.

This patch makes sure we get a warning if ATS is set on a device
protected by virtio-iommu or vsmmuv3, reminding that we don't have
full support of ATS on those vIOMMUs and setting ats=on on the
virtio-pci end-point is not a requirement.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v1 -> v2:
- s/enabled/capable
- tweak the error message on vhost side
---
 include/hw/virtio/virtio-bus.h |  3 +++
 hw/virtio/vhost.c              | 21 ++++++++++++++++++++-
 hw/virtio/virtio-bus.c         | 14 ++++++++++++++
 hw/virtio/virtio-pci.c         | 11 +++++++++++
 4 files changed, 48 insertions(+), 1 deletion(-)

Comments

Jason Wang Oct. 20, 2022, 3:58 a.m. UTC | #1
On Wed, Oct 19, 2022 at 8:27 PM Eric Auger <eric.auger@redhat.com> wrote:
>
> Since b68ba1ca5767 ("memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP
> IOMMUTLBNotificationType"), vhost attempts to register DEVIOTLB_UNMAP
> notifier. This latter is supported by the intel-iommu which supports
> device-iotlb if the corresponding option is set. Then 958ec334bca3
> ("vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support") allowed
> silent fallback to the legacy UNMAP notifier if the viommu does not
> support device iotlb.
>
> Initially vhost/viommu integration was introduced with intel iommu
> assuming ats=on was set on virtio-pci device and device-iotlb was set
> on the intel iommu. vhost acts as an ATS capable device since it
> implements an IOTLB on kernel side. However translated transactions
> that hit the device IOTLB do not transit through the vIOMMU. So this
> requires a limited ATS support on viommu side. Anyway this assumed
> ATS was eventually enabled .
>
> But neither SMMUv3 nor virtio-iommu do support ATS and the integration
> with vhost just relies on the fact those vIOMMU send UNMAP notifications
> whenever the guest trigger them. This works without ATS being enabled.
>
> This patch makes sure we get a warning if ATS is set on a device
> protected by virtio-iommu or vsmmuv3, reminding that we don't have
> full support of ATS on those vIOMMUs and setting ats=on on the
> virtio-pci end-point is not a requirement.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>
> ---
>
> v1 -> v2:
> - s/enabled/capable
> - tweak the error message on vhost side
> ---
>  include/hw/virtio/virtio-bus.h |  3 +++
>  hw/virtio/vhost.c              | 21 ++++++++++++++++++++-
>  hw/virtio/virtio-bus.c         | 14 ++++++++++++++
>  hw/virtio/virtio-pci.c         | 11 +++++++++++
>  4 files changed, 48 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
> index 7ab8c9dab0..23360a1daa 100644
> --- a/include/hw/virtio/virtio-bus.h
> +++ b/include/hw/virtio/virtio-bus.h
> @@ -94,6 +94,7 @@ struct VirtioBusClass {
>      bool has_variable_vring_alignment;
>      AddressSpace *(*get_dma_as)(DeviceState *d);
>      bool (*iommu_enabled)(DeviceState *d);
> +    bool (*ats_capable)(DeviceState *d);
>  };
>
>  struct VirtioBusState {
> @@ -157,4 +158,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
>  void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n);
>  /* Whether the IOMMU is enabled for this device */
>  bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev);
> +/* Whether ATS is enabled for this device */
> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev);
>  #endif /* VIRTIO_BUS_H */
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 5185c15295..3cf9efce5e 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -324,6 +324,16 @@ static bool vhost_dev_has_iommu(struct vhost_dev *dev)
>      }
>  }
>
> +static bool vhost_dev_ats_capable(struct vhost_dev *dev)

I suggest to rename this as pf_capable() since ATS is PCI specific but
vhost isn't.

> +{
> +    VirtIODevice *vdev = dev->vdev;
> +
> +    if (vdev && virtio_bus_device_ats_capable(vdev)) {
> +        return true;
> +    }
> +    return false;
> +}
> +
>  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
>                                hwaddr *plen, bool is_write)
>  {
> @@ -737,6 +747,7 @@ static void vhost_iommu_region_add(MemoryListener *listener,
>      Int128 end;
>      int iommu_idx;
>      IOMMUMemoryRegion *iommu_mr;
> +    Error *err = NULL;
>      int ret;
>
>      if (!memory_region_is_iommu(section->mr)) {
> @@ -760,8 +771,16 @@ static void vhost_iommu_region_add(MemoryListener *listener,
>      iommu->iommu_offset = section->offset_within_address_space -
>                            section->offset_within_region;
>      iommu->hdev = dev;
> -    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> +    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, &err);
>      if (ret) {
> +        if (vhost_dev_ats_capable(dev)) {
> +            error_reportf_err(err,
> +                              "%s: Although the device exposes ATS capability, "
> +                              "fallback to legacy IOMMU UNMAP notifier: ",
> +                              iommu_mr->parent_obj.name);

I'm not sure if it's a real error, or I wonder what we need to do is

1) check is ATS is enabled
2) fallback to UNMAP is ATS is not enabled

> +        } else {
> +            error_free(err);
> +        }
>          /*
>           * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
>           * UNMAP legacy message
> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> index 896feb37a1..d46c3f8ec4 100644
> --- a/hw/virtio/virtio-bus.c
> +++ b/hw/virtio/virtio-bus.c
> @@ -348,6 +348,20 @@ bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev)
>      return klass->iommu_enabled(qbus->parent);
>  }
>
> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev)
> +{
> +    DeviceState *qdev = DEVICE(vdev);
> +    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
> +    VirtioBusState *bus = VIRTIO_BUS(qbus);
> +    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> +
> +    if (!klass->ats_capable) {
> +        return false;

I think it's better to check whether or not ATS is enabled. Guest may
choose to not enable ATS for various reasons.

Thanks

> +    }
> +
> +    return klass->ats_capable(qbus->parent);
> +}
> +
>  static void virtio_bus_class_init(ObjectClass *klass, void *data)
>  {
>      BusClass *bus_class = BUS_CLASS(klass);
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index e7d80242b7..c2ceba98a6 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1141,6 +1141,16 @@ static bool virtio_pci_iommu_enabled(DeviceState *d)
>      return true;
>  }
>
> +static bool virtio_pci_ats_capable(DeviceState *d)
> +{
> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> +
> +    if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
> +        return true;
> +    }
> +    return false;
> +}
> +
>  static bool virtio_pci_queue_enabled(DeviceState *d, int n)
>  {
>      VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> @@ -2229,6 +2239,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
>      k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
>      k->get_dma_as = virtio_pci_get_dma_as;
>      k->iommu_enabled = virtio_pci_iommu_enabled;
> +    k->ats_capable = virtio_pci_ats_capable;
>      k->queue_enabled = virtio_pci_queue_enabled;
>  }
>
> --
> 2.35.3
>
Eric Auger Oct. 20, 2022, 1:03 p.m. UTC | #2
Hi Jason,

On 10/20/22 05:58, Jason Wang wrote:
> On Wed, Oct 19, 2022 at 8:27 PM Eric Auger <eric.auger@redhat.com> wrote:
>> Since b68ba1ca5767 ("memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP
>> IOMMUTLBNotificationType"), vhost attempts to register DEVIOTLB_UNMAP
>> notifier. This latter is supported by the intel-iommu which supports
>> device-iotlb if the corresponding option is set. Then 958ec334bca3
>> ("vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support") allowed
>> silent fallback to the legacy UNMAP notifier if the viommu does not
>> support device iotlb.
>>
>> Initially vhost/viommu integration was introduced with intel iommu
>> assuming ats=on was set on virtio-pci device and device-iotlb was set
>> on the intel iommu. vhost acts as an ATS capable device since it
>> implements an IOTLB on kernel side. However translated transactions
>> that hit the device IOTLB do not transit through the vIOMMU. So this
>> requires a limited ATS support on viommu side. Anyway this assumed
>> ATS was eventually enabled .
>>
>> But neither SMMUv3 nor virtio-iommu do support ATS and the integration
>> with vhost just relies on the fact those vIOMMU send UNMAP notifications
>> whenever the guest trigger them. This works without ATS being enabled.
>>
>> This patch makes sure we get a warning if ATS is set on a device
>> protected by virtio-iommu or vsmmuv3, reminding that we don't have
>> full support of ATS on those vIOMMUs and setting ats=on on the
>> virtio-pci end-point is not a requirement.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> v1 -> v2:
>> - s/enabled/capable
>> - tweak the error message on vhost side
>> ---
>>  include/hw/virtio/virtio-bus.h |  3 +++
>>  hw/virtio/vhost.c              | 21 ++++++++++++++++++++-
>>  hw/virtio/virtio-bus.c         | 14 ++++++++++++++
>>  hw/virtio/virtio-pci.c         | 11 +++++++++++
>>  4 files changed, 48 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
>> index 7ab8c9dab0..23360a1daa 100644
>> --- a/include/hw/virtio/virtio-bus.h
>> +++ b/include/hw/virtio/virtio-bus.h
>> @@ -94,6 +94,7 @@ struct VirtioBusClass {
>>      bool has_variable_vring_alignment;
>>      AddressSpace *(*get_dma_as)(DeviceState *d);
>>      bool (*iommu_enabled)(DeviceState *d);
>> +    bool (*ats_capable)(DeviceState *d);
>>  };
>>
>>  struct VirtioBusState {
>> @@ -157,4 +158,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
>>  void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n);
>>  /* Whether the IOMMU is enabled for this device */
>>  bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev);
>> +/* Whether ATS is enabled for this device */
>> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev);
>>  #endif /* VIRTIO_BUS_H */
>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>> index 5185c15295..3cf9efce5e 100644
>> --- a/hw/virtio/vhost.c
>> +++ b/hw/virtio/vhost.c
>> @@ -324,6 +324,16 @@ static bool vhost_dev_has_iommu(struct vhost_dev *dev)
>>      }
>>  }
>>
>> +static bool vhost_dev_ats_capable(struct vhost_dev *dev)
> I suggest to rename this as pf_capable() since ATS is PCI specific but
> vhost isn't.
Does pf sound for page fault?
>
>> +{
>> +    VirtIODevice *vdev = dev->vdev;
>> +
>> +    if (vdev && virtio_bus_device_ats_capable(vdev)) {
>> +        return true;
>> +    }
>> +    return false;
>> +}
>> +
>>  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
>>                                hwaddr *plen, bool is_write)
>>  {
>> @@ -737,6 +747,7 @@ static void vhost_iommu_region_add(MemoryListener *listener,
>>      Int128 end;
>>      int iommu_idx;
>>      IOMMUMemoryRegion *iommu_mr;
>> +    Error *err = NULL;
>>      int ret;
>>
>>      if (!memory_region_is_iommu(section->mr)) {
>> @@ -760,8 +771,16 @@ static void vhost_iommu_region_add(MemoryListener *listener,
>>      iommu->iommu_offset = section->offset_within_address_space -
>>                            section->offset_within_region;
>>      iommu->hdev = dev;
>> -    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
>> +    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, &err);
>>      if (ret) {
>> +        if (vhost_dev_ats_capable(dev)) {
>> +            error_reportf_err(err,
>> +                              "%s: Although the device exposes ATS capability, "
>> +                              "fallback to legacy IOMMU UNMAP notifier: ",
>> +                              iommu_mr->parent_obj.name);
> I'm not sure if it's a real error, or I wonder what we need to do is
>
> 1) check is ATS is enabled
> 2) fallback to UNMAP is ATS is not enabled
Isn't the problem due to the fact that vhost, by construction, requires
"pf" (would it be though DEVIOTLB_UNMAP or UNMAP). Don't UNMAP
notifications also implement part of ATS protocol? I guess this is the
reason why you mandated ats in the first place.

So if ATS is not enabled, shouldn't we fallback to virtio instead of vhost?

Thanks

Eric
>
>> +        } else {
>> +            error_free(err);
>> +        }
>>          /*
>>           * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
>>           * UNMAP legacy message
>> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
>> index 896feb37a1..d46c3f8ec4 100644
>> --- a/hw/virtio/virtio-bus.c
>> +++ b/hw/virtio/virtio-bus.c
>> @@ -348,6 +348,20 @@ bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev)
>>      return klass->iommu_enabled(qbus->parent);
>>  }
>>
>> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev)
>> +{
>> +    DeviceState *qdev = DEVICE(vdev);
>> +    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
>> +    VirtioBusState *bus = VIRTIO_BUS(qbus);
>> +    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
>> +
>> +    if (!klass->ats_capable) {
>> +        return false;
> I think it's better to check whether or not ATS is enabled. Guest may
> choose to not enable ATS for various reasons.
>
> Thanks
>
>> +    }
>> +
>> +    return klass->ats_capable(qbus->parent);
>> +}
>> +
>>  static void virtio_bus_class_init(ObjectClass *klass, void *data)
>>  {
>>      BusClass *bus_class = BUS_CLASS(klass);
>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>> index e7d80242b7..c2ceba98a6 100644
>> --- a/hw/virtio/virtio-pci.c
>> +++ b/hw/virtio/virtio-pci.c
>> @@ -1141,6 +1141,16 @@ static bool virtio_pci_iommu_enabled(DeviceState *d)
>>      return true;
>>  }
>>
>> +static bool virtio_pci_ats_capable(DeviceState *d)
>> +{
>> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
>> +
>> +    if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
>> +        return true;
>> +    }
>> +    return false;
>> +}
>> +
>>  static bool virtio_pci_queue_enabled(DeviceState *d, int n)
>>  {
>>      VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
>> @@ -2229,6 +2239,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
>>      k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
>>      k->get_dma_as = virtio_pci_get_dma_as;
>>      k->iommu_enabled = virtio_pci_iommu_enabled;
>> +    k->ats_capable = virtio_pci_ats_capable;
>>      k->queue_enabled = virtio_pci_queue_enabled;
>>  }
>>
>> --
>> 2.35.3
>>
Jean-Philippe Brucker Oct. 20, 2022, 2:41 p.m. UTC | #3
Hi,

On Thu, Oct 20, 2022 at 03:03:10PM +0200, Eric Auger wrote:
> Hi Jason,
> 
> On 10/20/22 05:58, Jason Wang wrote:
> > On Wed, Oct 19, 2022 at 8:27 PM Eric Auger <eric.auger@redhat.com> wrote:
> >> Since b68ba1ca5767 ("memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP
> >> IOMMUTLBNotificationType"), vhost attempts to register DEVIOTLB_UNMAP
> >> notifier. This latter is supported by the intel-iommu which supports
> >> device-iotlb if the corresponding option is set. Then 958ec334bca3
> >> ("vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support") allowed
> >> silent fallback to the legacy UNMAP notifier if the viommu does not
> >> support device iotlb.
> >>
> >> Initially vhost/viommu integration was introduced with intel iommu
> >> assuming ats=on was set on virtio-pci device and device-iotlb was set
> >> on the intel iommu. vhost acts as an ATS capable device since it
> >> implements an IOTLB on kernel side. However translated transactions
> >> that hit the device IOTLB do not transit through the vIOMMU. So this
> >> requires a limited ATS support on viommu side. Anyway this assumed
> >> ATS was eventually enabled .
> >>
> >> But neither SMMUv3 nor virtio-iommu do support ATS and the integration
> >> with vhost just relies on the fact those vIOMMU send UNMAP notifications
> >> whenever the guest trigger them. This works without ATS being enabled.
> >>
> >> This patch makes sure we get a warning if ATS is set on a device
> >> protected by virtio-iommu or vsmmuv3, reminding that we don't have
> >> full support of ATS on those vIOMMUs and setting ats=on on the
> >> virtio-pci end-point is not a requirement.
> >>
> >> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> >>
> >> ---
> >>
> >> v1 -> v2:
> >> - s/enabled/capable
> >> - tweak the error message on vhost side
> >> ---
> >>  include/hw/virtio/virtio-bus.h |  3 +++
> >>  hw/virtio/vhost.c              | 21 ++++++++++++++++++++-
> >>  hw/virtio/virtio-bus.c         | 14 ++++++++++++++
> >>  hw/virtio/virtio-pci.c         | 11 +++++++++++
> >>  4 files changed, 48 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
> >> index 7ab8c9dab0..23360a1daa 100644
> >> --- a/include/hw/virtio/virtio-bus.h
> >> +++ b/include/hw/virtio/virtio-bus.h
> >> @@ -94,6 +94,7 @@ struct VirtioBusClass {
> >>      bool has_variable_vring_alignment;
> >>      AddressSpace *(*get_dma_as)(DeviceState *d);
> >>      bool (*iommu_enabled)(DeviceState *d);
> >> +    bool (*ats_capable)(DeviceState *d);
> >>  };
> >>
> >>  struct VirtioBusState {
> >> @@ -157,4 +158,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
> >>  void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n);
> >>  /* Whether the IOMMU is enabled for this device */
> >>  bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev);
> >> +/* Whether ATS is enabled for this device */
> >> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev);
> >>  #endif /* VIRTIO_BUS_H */
> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> >> index 5185c15295..3cf9efce5e 100644
> >> --- a/hw/virtio/vhost.c
> >> +++ b/hw/virtio/vhost.c
> >> @@ -324,6 +324,16 @@ static bool vhost_dev_has_iommu(struct vhost_dev *dev)
> >>      }
> >>  }
> >>
> >> +static bool vhost_dev_ats_capable(struct vhost_dev *dev)
> > I suggest to rename this as pf_capable() since ATS is PCI specific but
> > vhost isn't.
> Does pf sound for page fault?

Maybe the name Vt-d uses for ATC, "device TLB" makes more sense as a
generic term, because page fault is a separate concept. However it might
be a little confusing for other architectures. The Arm SMMU can support
both OS-visible and transparent device TLBs:

(a) PCIe ATS requires special ATC invalidate commands, separate from the
  normal TLB invalidate commands. Like the Device-TLB invalidate command
  on Vt-d.

(b) But a SMMU can also be distributed, with multiple per-device TLBs
  talking to one translation component, and in that case the TLBs are
  coherent, invisible to the OS, and the normal TLB invalidate command
  invalidates all device TLBs.

Even though the term "device TLB" could apply to both cases, this function
is only about the first one so "ats_capable" may be clearer overall.

> >
> >> +{
> >> +    VirtIODevice *vdev = dev->vdev;
> >> +
> >> +    if (vdev && virtio_bus_device_ats_capable(vdev)) {
> >> +        return true;
> >> +    }
> >> +    return false;
> >> +}
> >> +
> >>  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
> >>                                hwaddr *plen, bool is_write)
> >>  {
> >> @@ -737,6 +747,7 @@ static void vhost_iommu_region_add(MemoryListener *listener,
> >>      Int128 end;
> >>      int iommu_idx;
> >>      IOMMUMemoryRegion *iommu_mr;
> >> +    Error *err = NULL;
> >>      int ret;
> >>
> >>      if (!memory_region_is_iommu(section->mr)) {
> >> @@ -760,8 +771,16 @@ static void vhost_iommu_region_add(MemoryListener *listener,
> >>      iommu->iommu_offset = section->offset_within_address_space -
> >>                            section->offset_within_region;
> >>      iommu->hdev = dev;
> >> -    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> >> +    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, &err);
> >>      if (ret) {
> >> +        if (vhost_dev_ats_capable(dev)) {
> >> +            error_reportf_err(err,
> >> +                              "%s: Although the device exposes ATS capability, "
> >> +                              "fallback to legacy IOMMU UNMAP notifier: ",
> >> +                              iommu_mr->parent_obj.name);
> > I'm not sure if it's a real error, or I wonder what we need to do is
> >
> > 1) check is ATS is enabled
> > 2) fallback to UNMAP is ATS is not enabled
> Isn't the problem due to the fact that vhost, by construction, requires
> "pf" (would it be though DEVIOTLB_UNMAP or UNMAP). Don't UNMAP
> notifications also implement part of ATS protocol? I guess this is the
> reason why you mandated ats in the first place.
> 
> So if ATS is not enabled, shouldn't we fallback to virtio instead of vhost?

But do we need ATS in order to support vhost?  As long as we're capable of
keeping the vhost IOTLB coherent with the IOMMU by using the vhost-iotlb
protocol, then we should be able to support vhost. The guest can send
normal TLB invalidate commands (not special dev-iotlb commands), and
doesn't need to be aware that the TLB is implemented outside of QEMU
because the notifiers can maintain coherency transparently (model (b)
above).

So I agree with the reasoning in this patch, though I'm not sure it's
worth a warning. If the vIOMMU doesn't support ATS commands, then the OS
won't enable it in PCIe devices, but vhost can work in both cases.

Thanks,
Jean

> 
> Thanks
> 
> Eric
> >
> >> +        } else {
> >> +            error_free(err);
> >> +        }
> >>          /*
> >>           * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
> >>           * UNMAP legacy message
> >> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> >> index 896feb37a1..d46c3f8ec4 100644
> >> --- a/hw/virtio/virtio-bus.c
> >> +++ b/hw/virtio/virtio-bus.c
> >> @@ -348,6 +348,20 @@ bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev)
> >>      return klass->iommu_enabled(qbus->parent);
> >>  }
> >>
> >> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev)
> >> +{
> >> +    DeviceState *qdev = DEVICE(vdev);
> >> +    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
> >> +    VirtioBusState *bus = VIRTIO_BUS(qbus);
> >> +    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> >> +
> >> +    if (!klass->ats_capable) {
> >> +        return false;
> > I think it's better to check whether or not ATS is enabled. Guest may
> > choose to not enable ATS for various reasons.
> >
> > Thanks
> >
> >> +    }
> >> +
> >> +    return klass->ats_capable(qbus->parent);
> >> +}
> >> +
> >>  static void virtio_bus_class_init(ObjectClass *klass, void *data)
> >>  {
> >>      BusClass *bus_class = BUS_CLASS(klass);
> >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> >> index e7d80242b7..c2ceba98a6 100644
> >> --- a/hw/virtio/virtio-pci.c
> >> +++ b/hw/virtio/virtio-pci.c
> >> @@ -1141,6 +1141,16 @@ static bool virtio_pci_iommu_enabled(DeviceState *d)
> >>      return true;
> >>  }
> >>
> >> +static bool virtio_pci_ats_capable(DeviceState *d)
> >> +{
> >> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> >> +
> >> +    if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
> >> +        return true;
> >> +    }
> >> +    return false;
> >> +}
> >> +
> >>  static bool virtio_pci_queue_enabled(DeviceState *d, int n)
> >>  {
> >>      VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> >> @@ -2229,6 +2239,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
> >>      k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
> >>      k->get_dma_as = virtio_pci_get_dma_as;
> >>      k->iommu_enabled = virtio_pci_iommu_enabled;
> >> +    k->ats_capable = virtio_pci_ats_capable;
> >>      k->queue_enabled = virtio_pci_queue_enabled;
> >>  }
> >>
> >> --
> >> 2.35.3
> >>
>
Jason Wang Oct. 21, 2022, 2:38 a.m. UTC | #4
On Thu, Oct 20, 2022 at 9:03 PM Eric Auger <eric.auger@redhat.com> wrote:
>
> Hi Jason,
>
> On 10/20/22 05:58, Jason Wang wrote:
> > On Wed, Oct 19, 2022 at 8:27 PM Eric Auger <eric.auger@redhat.com> wrote:
> >> Since b68ba1ca5767 ("memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP
> >> IOMMUTLBNotificationType"), vhost attempts to register DEVIOTLB_UNMAP
> >> notifier. This latter is supported by the intel-iommu which supports
> >> device-iotlb if the corresponding option is set. Then 958ec334bca3
> >> ("vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support") allowed
> >> silent fallback to the legacy UNMAP notifier if the viommu does not
> >> support device iotlb.
> >>
> >> Initially vhost/viommu integration was introduced with intel iommu
> >> assuming ats=on was set on virtio-pci device and device-iotlb was set
> >> on the intel iommu. vhost acts as an ATS capable device since it
> >> implements an IOTLB on kernel side. However translated transactions
> >> that hit the device IOTLB do not transit through the vIOMMU. So this
> >> requires a limited ATS support on viommu side. Anyway this assumed
> >> ATS was eventually enabled .
> >>
> >> But neither SMMUv3 nor virtio-iommu do support ATS and the integration
> >> with vhost just relies on the fact those vIOMMU send UNMAP notifications
> >> whenever the guest trigger them. This works without ATS being enabled.
> >>
> >> This patch makes sure we get a warning if ATS is set on a device
> >> protected by virtio-iommu or vsmmuv3, reminding that we don't have
> >> full support of ATS on those vIOMMUs and setting ats=on on the
> >> virtio-pci end-point is not a requirement.
> >>
> >> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> >>
> >> ---
> >>
> >> v1 -> v2:
> >> - s/enabled/capable
> >> - tweak the error message on vhost side
> >> ---
> >>  include/hw/virtio/virtio-bus.h |  3 +++
> >>  hw/virtio/vhost.c              | 21 ++++++++++++++++++++-
> >>  hw/virtio/virtio-bus.c         | 14 ++++++++++++++
> >>  hw/virtio/virtio-pci.c         | 11 +++++++++++
> >>  4 files changed, 48 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
> >> index 7ab8c9dab0..23360a1daa 100644
> >> --- a/include/hw/virtio/virtio-bus.h
> >> +++ b/include/hw/virtio/virtio-bus.h
> >> @@ -94,6 +94,7 @@ struct VirtioBusClass {
> >>      bool has_variable_vring_alignment;
> >>      AddressSpace *(*get_dma_as)(DeviceState *d);
> >>      bool (*iommu_enabled)(DeviceState *d);
> >> +    bool (*ats_capable)(DeviceState *d);
> >>  };
> >>
> >>  struct VirtioBusState {
> >> @@ -157,4 +158,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
> >>  void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n);
> >>  /* Whether the IOMMU is enabled for this device */
> >>  bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev);
> >> +/* Whether ATS is enabled for this device */
> >> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev);
> >>  #endif /* VIRTIO_BUS_H */
> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> >> index 5185c15295..3cf9efce5e 100644
> >> --- a/hw/virtio/vhost.c
> >> +++ b/hw/virtio/vhost.c
> >> @@ -324,6 +324,16 @@ static bool vhost_dev_has_iommu(struct vhost_dev *dev)
> >>      }
> >>  }
> >>
> >> +static bool vhost_dev_ats_capable(struct vhost_dev *dev)
> > I suggest to rename this as pf_capable() since ATS is PCI specific but
> > vhost isn't.
> Does pf sound for page fault?

Yes.

> >
> >> +{
> >> +    VirtIODevice *vdev = dev->vdev;
> >> +
> >> +    if (vdev && virtio_bus_device_ats_capable(vdev)) {
> >> +        return true;
> >> +    }
> >> +    return false;
> >> +}
> >> +
> >>  static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
> >>                                hwaddr *plen, bool is_write)
> >>  {
> >> @@ -737,6 +747,7 @@ static void vhost_iommu_region_add(MemoryListener *listener,
> >>      Int128 end;
> >>      int iommu_idx;
> >>      IOMMUMemoryRegion *iommu_mr;
> >> +    Error *err = NULL;
> >>      int ret;
> >>
> >>      if (!memory_region_is_iommu(section->mr)) {
> >> @@ -760,8 +771,16 @@ static void vhost_iommu_region_add(MemoryListener *listener,
> >>      iommu->iommu_offset = section->offset_within_address_space -
> >>                            section->offset_within_region;
> >>      iommu->hdev = dev;
> >> -    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> >> +    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, &err);
> >>      if (ret) {
> >> +        if (vhost_dev_ats_capable(dev)) {
> >> +            error_reportf_err(err,
> >> +                              "%s: Although the device exposes ATS capability, "
> >> +                              "fallback to legacy IOMMU UNMAP notifier: ",
> >> +                              iommu_mr->parent_obj.name);
> > I'm not sure if it's a real error, or I wonder what we need to do is
> >
> > 1) check is ATS is enabled
> > 2) fallback to UNMAP is ATS is not enabled
> Isn't the problem due to the fact that vhost, by construction, requires
> "pf" (would it be though DEVIOTLB_UNMAP or UNMAP).

So the device IOTLB invalidation and miss were mapped to feature
VIRTIO_F_ACCESS_PLATFORM. So when this feature is negotiated Qemu can
send device IOTLB invalidation and process IOTLB miss (without caring
about ATS). So the only issue is when to trigger the IOTLB
invalidation. It could be done by both DEVIOTLB_UNMAP or UNMAP.

> Don't UNMAP
> notifications also implement part of ATS protocol?

I'd treat UNMAP as a way to make the device work without ATS:

1) when ATS is enabled, device IOTLB invalidation needs to be sent
from guest and vIOMMU receive device IOTLB invalidation request and
trigger DEVIOTLB_UNMAP
2) when ATS is not enabled, the device should still work per PCIe
spec, so we need find way to emulate this, since guest doesn't think
there's a device IOTLB, Qemu can only receive IOTLB invalidation. In
order to make dev devices work (receive IOTLB flush), we need to use
UNMAP in this case.

>I guess this is the
> reason why you mandated ats in the first place.
>
> So if ATS is not enabled, shouldn't we fallback to virtio instead of vhost?

If we can find a way to make vhost work that would be better and
what's more important, the fallback only work for vhost-net but not
vhost-user/vhost-vdpa.

Thanks

>
> Thanks
>
> Eric
> >
> >> +        } else {
> >> +            error_free(err);
> >> +        }
> >>          /*
> >>           * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
> >>           * UNMAP legacy message
> >> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> >> index 896feb37a1..d46c3f8ec4 100644
> >> --- a/hw/virtio/virtio-bus.c
> >> +++ b/hw/virtio/virtio-bus.c
> >> @@ -348,6 +348,20 @@ bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev)
> >>      return klass->iommu_enabled(qbus->parent);
> >>  }
> >>
> >> +bool virtio_bus_device_ats_capable(VirtIODevice *vdev)
> >> +{
> >> +    DeviceState *qdev = DEVICE(vdev);
> >> +    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
> >> +    VirtioBusState *bus = VIRTIO_BUS(qbus);
> >> +    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> >> +
> >> +    if (!klass->ats_capable) {
> >> +        return false;
> > I think it's better to check whether or not ATS is enabled. Guest may
> > choose to not enable ATS for various reasons.
> >
> > Thanks
> >
> >> +    }
> >> +
> >> +    return klass->ats_capable(qbus->parent);
> >> +}
> >> +
> >>  static void virtio_bus_class_init(ObjectClass *klass, void *data)
> >>  {
> >>      BusClass *bus_class = BUS_CLASS(klass);
> >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> >> index e7d80242b7..c2ceba98a6 100644
> >> --- a/hw/virtio/virtio-pci.c
> >> +++ b/hw/virtio/virtio-pci.c
> >> @@ -1141,6 +1141,16 @@ static bool virtio_pci_iommu_enabled(DeviceState *d)
> >>      return true;
> >>  }
> >>
> >> +static bool virtio_pci_ats_capable(DeviceState *d)
> >> +{
> >> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> >> +
> >> +    if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
> >> +        return true;
> >> +    }
> >> +    return false;
> >> +}
> >> +
> >>  static bool virtio_pci_queue_enabled(DeviceState *d, int n)
> >>  {
> >>      VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> >> @@ -2229,6 +2239,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
> >>      k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
> >>      k->get_dma_as = virtio_pci_get_dma_as;
> >>      k->iommu_enabled = virtio_pci_iommu_enabled;
> >> +    k->ats_capable = virtio_pci_ats_capable;
> >>      k->queue_enabled = virtio_pci_queue_enabled;
> >>  }
> >>
> >> --
> >> 2.35.3
> >>
>
diff mbox series

Patch

diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index 7ab8c9dab0..23360a1daa 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -94,6 +94,7 @@  struct VirtioBusClass {
     bool has_variable_vring_alignment;
     AddressSpace *(*get_dma_as)(DeviceState *d);
     bool (*iommu_enabled)(DeviceState *d);
+    bool (*ats_capable)(DeviceState *d);
 };
 
 struct VirtioBusState {
@@ -157,4 +158,6 @@  int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
 void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n);
 /* Whether the IOMMU is enabled for this device */
 bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev);
+/* Whether ATS is enabled for this device */
+bool virtio_bus_device_ats_capable(VirtIODevice *vdev);
 #endif /* VIRTIO_BUS_H */
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 5185c15295..3cf9efce5e 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -324,6 +324,16 @@  static bool vhost_dev_has_iommu(struct vhost_dev *dev)
     }
 }
 
+static bool vhost_dev_ats_capable(struct vhost_dev *dev)
+{
+    VirtIODevice *vdev = dev->vdev;
+
+    if (vdev && virtio_bus_device_ats_capable(vdev)) {
+        return true;
+    }
+    return false;
+}
+
 static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
                               hwaddr *plen, bool is_write)
 {
@@ -737,6 +747,7 @@  static void vhost_iommu_region_add(MemoryListener *listener,
     Int128 end;
     int iommu_idx;
     IOMMUMemoryRegion *iommu_mr;
+    Error *err = NULL;
     int ret;
 
     if (!memory_region_is_iommu(section->mr)) {
@@ -760,8 +771,16 @@  static void vhost_iommu_region_add(MemoryListener *listener,
     iommu->iommu_offset = section->offset_within_address_space -
                           section->offset_within_region;
     iommu->hdev = dev;
-    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
+    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, &err);
     if (ret) {
+        if (vhost_dev_ats_capable(dev)) {
+            error_reportf_err(err,
+                              "%s: Although the device exposes ATS capability, "
+                              "fallback to legacy IOMMU UNMAP notifier: ",
+                              iommu_mr->parent_obj.name);
+        } else {
+            error_free(err);
+        }
         /*
          * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
          * UNMAP legacy message
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index 896feb37a1..d46c3f8ec4 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -348,6 +348,20 @@  bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev)
     return klass->iommu_enabled(qbus->parent);
 }
 
+bool virtio_bus_device_ats_capable(VirtIODevice *vdev)
+{
+    DeviceState *qdev = DEVICE(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
+    VirtioBusState *bus = VIRTIO_BUS(qbus);
+    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
+
+    if (!klass->ats_capable) {
+        return false;
+    }
+
+    return klass->ats_capable(qbus->parent);
+}
+
 static void virtio_bus_class_init(ObjectClass *klass, void *data)
 {
     BusClass *bus_class = BUS_CLASS(klass);
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index e7d80242b7..c2ceba98a6 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1141,6 +1141,16 @@  static bool virtio_pci_iommu_enabled(DeviceState *d)
     return true;
 }
 
+static bool virtio_pci_ats_capable(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+
+    if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
+        return true;
+    }
+    return false;
+}
+
 static bool virtio_pci_queue_enabled(DeviceState *d, int n)
 {
     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
@@ -2229,6 +2239,7 @@  static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
     k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
     k->get_dma_as = virtio_pci_get_dma_as;
     k->iommu_enabled = virtio_pci_iommu_enabled;
+    k->ats_capable = virtio_pci_ats_capable;
     k->queue_enabled = virtio_pci_queue_enabled;
 }