diff mbox series

[v11,4/6] iommu/s390: Force ISM devices to use IOMMU_DOMAIN_DMA

Message ID 20230717-dma_iommu-v11-4-a7a0b83c355c@linux.ibm.com
State Handled Elsewhere
Headers show
Series iommu/dma: s390 DMA API conversion and optimized IOTLB flushing | expand

Commit Message

Niklas Schnelle July 17, 2023, 11 a.m. UTC
ISM devices are virtual PCI devices used for cross-LPAR communication.
Unlike real PCI devices ISM devices do not use the hardware IOMMU but
inspects IOMMU translation tables directly on IOTLB flush (s390 RPCIT
instruction).

While ISM devices keep their DMA allocations static and only very rarely
DMA unmap at all, For each IOTLB flush that occurs after unmap the ISM
devices will inspect the area of the IOVA space indicated by the flush.
This means that for the global IOTLB flushes used by the flush queue
mechanism the entire IOVA space would be inspected. In principle this
would be fine, albeit potentially unnecessarily slow, it turns out
however that ISM devices are sensitive to seeing IOVA addresses that are
currently in use in the IOVA range being flushed. Seeing such in-use
IOVA addresses will cause the ISM device to enter an error state and
become unusable.

Fix this by forcing IOMMU_DOMAIN_DMA to be used for ISM devices. This
makes sure IOTLB flushes only cover IOVAs that have been unmapped and
also restricts the range of the IOTLB flush potentially reducing latency
spikes.

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
 drivers/iommu/s390-iommu.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

Comments

Matthew Rosato July 19, 2023, 12:56 p.m. UTC | #1
On 7/17/23 7:00 AM, Niklas Schnelle wrote:
> ISM devices are virtual PCI devices used for cross-LPAR communication.
> Unlike real PCI devices ISM devices do not use the hardware IOMMU but
> inspects IOMMU translation tables directly on IOTLB flush (s390 RPCIT
> instruction).
> 
> While ISM devices keep their DMA allocations static and only very rarely
> DMA unmap at all, For each IOTLB flush that occurs after unmap the ISM
> devices will inspect the area of the IOVA space indicated by the flush.
> This means that for the global IOTLB flushes used by the flush queue
> mechanism the entire IOVA space would be inspected. In principle this
> would be fine, albeit potentially unnecessarily slow, it turns out
> however that ISM devices are sensitive to seeing IOVA addresses that are
> currently in use in the IOVA range being flushed. Seeing such in-use
> IOVA addresses will cause the ISM device to enter an error state and
> become unusable.
> 
> Fix this by forcing IOMMU_DOMAIN_DMA to be used for ISM devices. This
> makes sure IOTLB flushes only cover IOVAs that have been unmapped and
> also restricts the range of the IOTLB flush potentially reducing latency
> spikes.
> 
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>

This makes sense to me.

Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Robin Murphy Aug. 18, 2023, 7:10 p.m. UTC | #2
On 2023-07-17 12:00, Niklas Schnelle wrote:
> ISM devices are virtual PCI devices used for cross-LPAR communication.
> Unlike real PCI devices ISM devices do not use the hardware IOMMU but
> inspects IOMMU translation tables directly on IOTLB flush (s390 RPCIT
> instruction).
> 
> While ISM devices keep their DMA allocations static and only very rarely
> DMA unmap at all, For each IOTLB flush that occurs after unmap the ISM
> devices will inspect the area of the IOVA space indicated by the flush.
> This means that for the global IOTLB flushes used by the flush queue
> mechanism the entire IOVA space would be inspected. In principle this
> would be fine, albeit potentially unnecessarily slow, it turns out
> however that ISM devices are sensitive to seeing IOVA addresses that are
> currently in use in the IOVA range being flushed. Seeing such in-use
> IOVA addresses will cause the ISM device to enter an error state and
> become unusable.
> 
> Fix this by forcing IOMMU_DOMAIN_DMA to be used for ISM devices. This
> makes sure IOTLB flushes only cover IOVAs that have been unmapped and
> also restricts the range of the IOTLB flush potentially reducing latency
> spikes.

Would it not be simpler to return false for IOMMU_CAP_DEFERRED_FLUSH for 
these devices?

Cheers,
Robin.

> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
>   drivers/iommu/s390-iommu.c | 10 ++++++++++
>   1 file changed, 10 insertions(+)
> 
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index f6d6c60e5634..020cc538e4c4 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -710,6 +710,15 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
>   	return &zdev->s390_domain->ctrs;
>   }
>   
> +static int s390_iommu_def_domain_type(struct device *dev)
> +{
> +	struct zpci_dev *zdev = to_zpci_dev(dev);
> +
> +	if (zdev->pft == PCI_FUNC_TYPE_ISM)
> +		return IOMMU_DOMAIN_DMA;
> +	return 0;
> +}
> +
>   int zpci_init_iommu(struct zpci_dev *zdev)
>   {
>   	u64 aperture_size;
> @@ -789,6 +798,7 @@ static const struct iommu_ops s390_iommu_ops = {
>   	.probe_device = s390_iommu_probe_device,
>   	.probe_finalize = s390_iommu_probe_finalize,
>   	.release_device = s390_iommu_release_device,
> +	.def_domain_type = s390_iommu_def_domain_type,
>   	.device_group = generic_device_group,
>   	.pgsize_bitmap = SZ_4K,
>   	.get_resv_regions = s390_iommu_get_resv_regions,
>
Niklas Schnelle Aug. 23, 2023, 10:53 a.m. UTC | #3
On Fri, 2023-08-18 at 20:10 +0100, Robin Murphy wrote:
> On 2023-07-17 12:00, Niklas Schnelle wrote:
> > ISM devices are virtual PCI devices used for cross-LPAR communication.
> > Unlike real PCI devices ISM devices do not use the hardware IOMMU but
> > inspects IOMMU translation tables directly on IOTLB flush (s390 RPCIT
> > instruction).
> > 
> > While ISM devices keep their DMA allocations static and only very rarely
> > DMA unmap at all, For each IOTLB flush that occurs after unmap the ISM
> > devices will inspect the area of the IOVA space indicated by the flush.
> > This means that for the global IOTLB flushes used by the flush queue
> > mechanism the entire IOVA space would be inspected. In principle this
> > would be fine, albeit potentially unnecessarily slow, it turns out
> > however that ISM devices are sensitive to seeing IOVA addresses that are
> > currently in use in the IOVA range being flushed. Seeing such in-use
> > IOVA addresses will cause the ISM device to enter an error state and
> > become unusable.
> > 
> > Fix this by forcing IOMMU_DOMAIN_DMA to be used for ISM devices. This
> > makes sure IOTLB flushes only cover IOVAs that have been unmapped and
> > also restricts the range of the IOTLB flush potentially reducing latency
> > spikes.
> 
> Would it not be simpler to return false for IOMMU_CAP_DEFERRED_FLUSH for 
> these devices?
> 
> Cheers,
> Robin.

Nice idea thank you. This is indeed less code, basically just return
zdev->pft != PCI_FUNC_TYPE_ISM for the IOMMU_CAP_DEFERRED_FLUSH check.
I think it's also semantically more clear in that we don't really care
about the domain type but about not getting deferred flushes.

Thanks,
Niklas

> 
> > Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> > ---
> >   drivers/iommu/s390-iommu.c | 10 ++++++++++
> >   1 file changed, 10 insertions(+)
> > 
> > diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> > index f6d6c60e5634..020cc538e4c4 100644
> > --- a/drivers/iommu/s390-iommu.c
> > +++ b/drivers/iommu/s390-iommu.c
> > @@ -710,6 +710,15 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
> >   	return &zdev->s390_domain->ctrs;
> >   }
> >   
> > +static int s390_iommu_def_domain_type(struct device *dev)
> > +{
> > +	struct zpci_dev *zdev = to_zpci_dev(dev);
> > +
> > +	if (zdev->pft == PCI_FUNC_TYPE_ISM)
> > +		return IOMMU_DOMAIN_DMA;
> > +	return 0;
> > +}
> > +
> >   int zpci_init_iommu(struct zpci_dev *zdev)
> >   {
> >   	u64 aperture_size;
> > @@ -789,6 +798,7 @@ static const struct iommu_ops s390_iommu_ops = {
> >   	.probe_device = s390_iommu_probe_device,
> >   	.probe_finalize = s390_iommu_probe_finalize,
> >   	.release_device = s390_iommu_release_device,
> > +	.def_domain_type = s390_iommu_def_domain_type,
> >   	.device_group = generic_device_group,
> >   	.pgsize_bitmap = SZ_4K,
> >   	.get_resv_regions = s390_iommu_get_resv_regions,
> > 
>
diff mbox series

Patch

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index f6d6c60e5634..020cc538e4c4 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -710,6 +710,15 @@  struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
 	return &zdev->s390_domain->ctrs;
 }
 
+static int s390_iommu_def_domain_type(struct device *dev)
+{
+	struct zpci_dev *zdev = to_zpci_dev(dev);
+
+	if (zdev->pft == PCI_FUNC_TYPE_ISM)
+		return IOMMU_DOMAIN_DMA;
+	return 0;
+}
+
 int zpci_init_iommu(struct zpci_dev *zdev)
 {
 	u64 aperture_size;
@@ -789,6 +798,7 @@  static const struct iommu_ops s390_iommu_ops = {
 	.probe_device = s390_iommu_probe_device,
 	.probe_finalize = s390_iommu_probe_finalize,
 	.release_device = s390_iommu_release_device,
+	.def_domain_type = s390_iommu_def_domain_type,
 	.device_group = generic_device_group,
 	.pgsize_bitmap = SZ_4K,
 	.get_resv_regions = s390_iommu_get_resv_regions,