diff mbox series

[4/7] powerpc/powernv/pci: Add device to iommu group during dma_dev_setup()

Message ID 20200406030745.24595-5-oohall@gmail.com (mailing list archive)
State Accepted
Commit 84d8cc076723058cc294f4360db6ff7758c25b74
Headers show
Series [1/7] powerpc/powernv/npu: Clean up compound table group initialisation | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (2c0ce4ff35994a7b12cc9879ced52c9e7c2e6667)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 113 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Oliver O'Halloran April 6, 2020, 3:07 a.m. UTC
Historically adding devices to their respective iommu group has been
handled by the post-init phb fixup for most devices. This was done
because:

1) The IOMMU group is tied to the PE (usually) so we can only setup the
   iommu groups after we've done resource allocation since BAR location
   determines the device's PE, and:
2) The sysfs directory for the pci_dev needs to be available since
   iommu_add_device() wants to add an attribute for the iommu group.

However, since commit 30d87ef8b38d ("powerpc/pci: Fix
pcibios_setup_device() ordering") both conditions are met when
hose->ops->dma_dev_setup() is called so there's no real need to do
this in the fixup.

Moving the call to iommu_add_device() into pnv_pci_ioda_dma_setup_dev()
is a nice cleanup since it puts all the per-device IOMMU setup into one
place. It also results in all (non-nvlink) devices getting their iommu
group via a common path rather than relying on the bus notifier hack
in pnv_tce_iommu_bus_notifier() to handle the adding VFs and
hotplugged devices to their group.

Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/platforms/powernv/npu-dma.c  |  8 ++++
 arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++----------------
 arch/powerpc/platforms/powernv/pci.c      | 20 ----------
 3 files changed, 21 insertions(+), 54 deletions(-)

Comments

Alexey Kardashevskiy April 6, 2020, 9:51 a.m. UTC | #1
On 06/04/2020 13:07, Oliver O'Halloran wrote:
> Historically adding devices to their respective iommu group has been
> handled by the post-init phb fixup for most devices. This was done
> because:
> 
> 1) The IOMMU group is tied to the PE (usually) so we can only setup the
>    iommu groups after we've done resource allocation since BAR location
>    determines the device's PE, and:
> 2) The sysfs directory for the pci_dev needs to be available since
>    iommu_add_device() wants to add an attribute for the iommu group.
> 
> However, since commit 30d87ef8b38d ("powerpc/pci: Fix
> pcibios_setup_device() ordering") both conditions are met when
> hose->ops->dma_dev_setup() is called so there's no real need to do
> this in the fixup.
> 
> Moving the call to iommu_add_device() into pnv_pci_ioda_dma_setup_dev()
> is a nice cleanup since it puts all the per-device IOMMU setup into one
> place. It also results in all (non-nvlink) devices getting their iommu
> group via a common path rather than relying on the bus notifier hack
> in pnv_tce_iommu_bus_notifier() to handle the adding VFs and
> hotplugged devices to their group.
> 
> Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>


Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


> ---
>  arch/powerpc/platforms/powernv/npu-dma.c  |  8 ++++
>  arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++----------------
>  arch/powerpc/platforms/powernv/pci.c      | 20 ----------
>  3 files changed, 21 insertions(+), 54 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 4fbbdfa8b327..df27b8d7e78f 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -469,6 +469,12 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
>  			compound_group->pgsizes = pe->table_group.pgsizes;
>  	}
>  
> +	/*
> +	 * The gpu would have been added to the iommu group that's created
> +	 * for the PE. Pull it out now.
> +	 */
> +	iommu_del_device(&gpdev->dev);
> +
>         /*
>  	* I'm not sure this is strictly required, but it's probably a good idea
>  	* since the table_group for the PE is going to be attached to the
> @@ -478,7 +484,9 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
>  	*/
>  	iommu_group_put(pe->table_group.group);
>  
> +	/* now put the GPU into the compound group */
>  	pnv_comp_attach_table_group(npucomp, pe);
> +	iommu_add_device(compound_group, &gpdev->dev);
>  
>  	return compound_group;
>  }
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index cf0aaef1b8fa..9198b7882b57 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1774,12 +1774,10 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
>  	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
>  	pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
>  	set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
> -	/*
> -	 * Note: iommu_add_device() will fail here as
> -	 * for physical PE: the device is already added by now;
> -	 * for virtual PE: sysfs entries are not ready yet and
> -	 * tce_iommu_bus_notifier will add the device to a group later.
> -	 */
> +
> +	/* PEs with a DMA weight of zero won't have a group */
> +	if (pe->table_group.group)
> +		iommu_add_device(&pe->table_group, &pdev->dev);
>  }
>  
>  /*
> @@ -2628,39 +2626,20 @@ static void pnv_pci_ioda_setup_iommu_api(void)
>  	struct pnv_ioda_pe *pe;
>  
>  	/*
> -	 * There are 4 types of PEs:
> -	 * - PNV_IODA_PE_BUS: a downstream port with an adapter,
> -	 *   created from pnv_pci_setup_bridge();
> -	 * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it,
> -	 *   created from pnv_pci_setup_bridge();
> -	 * - PNV_IODA_PE_VF: a SRIOV virtual function,
> -	 *   created from pnv_pcibios_sriov_enable();
> -	 * - PNV_IODA_PE_DEV: an NPU or OCAPI device,
> -	 *   created from pnv_pci_ioda_fixup().
> +	 * For non-nvlink devices the IOMMU group is registered when the PE is
> +	 * configured and devices are added to the group when the per-device
> +	 * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
> +	 * only initialise for "normal" IODA PHBs.
>  	 *
> -	 * Normally a PE is represented by an IOMMU group, however for
> -	 * devices with side channels the groups need to be more strict.
> +	 * For NVLink devices we need to ensure the NVLinks and the GPU end up
> +	 * in the same IOMMU group, so that's handled here.
>  	 */
>  	list_for_each_entry(hose, &hose_list, list_node) {
>  		phb = hose->private_data;
>  
> -		if (phb->type == PNV_PHB_NPU_NVLINK ||
> -		    phb->type == PNV_PHB_NPU_OCAPI)
> -			continue;
> -
> -		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
> -			struct iommu_table_group *table_group;
> -
> -			table_group = pnv_try_setup_npu_table_group(pe);
> -			if (!table_group) {
> -				if (!pnv_pci_ioda_pe_dma_weight(pe))
> -					continue;
> -
> -				table_group = &pe->table_group;
> -			}
> -			pnv_ioda_setup_bus_iommu_group(pe, table_group,
> -					pe->pbus);
> -		}
> +		if (phb->type == PNV_PHB_IODA2)
> +			list_for_each_entry(pe, &phb->ioda.pe_list, list)
> +				pnv_try_setup_npu_table_group(pe);
>  	}
>  
>  	/*
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index 5bf818246339..091fe1cf386b 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -955,28 +955,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
>  		unsigned long action, void *data)
>  {
>  	struct device *dev = data;
> -	struct pci_dev *pdev;
> -	struct pci_dn *pdn;
> -	struct pnv_ioda_pe *pe;
> -	struct pci_controller *hose;
> -	struct pnv_phb *phb;
>  
>  	switch (action) {
> -	case BUS_NOTIFY_ADD_DEVICE:
> -		pdev = to_pci_dev(dev);
> -		pdn = pci_get_pdn(pdev);
> -		hose = pci_bus_to_host(pdev->bus);
> -		phb = hose->private_data;
> -
> -		WARN_ON_ONCE(!phb);
> -		if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb)
> -			return 0;
> -
> -		pe = &phb->ioda.pe_array[pdn->pe_number];
> -		if (!pe->table_group.group)
> -			return 0;
> -		iommu_add_device(&pe->table_group, dev);
> -		return 0;
>  	case BUS_NOTIFY_DEL_DEVICE:
>  		iommu_del_device(dev);
>  		return 0;
>
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 4fbbdfa8b327..df27b8d7e78f 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -469,6 +469,12 @@  struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
 			compound_group->pgsizes = pe->table_group.pgsizes;
 	}
 
+	/*
+	 * The gpu would have been added to the iommu group that's created
+	 * for the PE. Pull it out now.
+	 */
+	iommu_del_device(&gpdev->dev);
+
        /*
 	* I'm not sure this is strictly required, but it's probably a good idea
 	* since the table_group for the PE is going to be attached to the
@@ -478,7 +484,9 @@  struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
 	*/
 	iommu_group_put(pe->table_group.group);
 
+	/* now put the GPU into the compound group */
 	pnv_comp_attach_table_group(npucomp, pe);
+	iommu_add_device(compound_group, &gpdev->dev);
 
 	return compound_group;
 }
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index cf0aaef1b8fa..9198b7882b57 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1774,12 +1774,10 @@  static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
 	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
 	pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
 	set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
-	/*
-	 * Note: iommu_add_device() will fail here as
-	 * for physical PE: the device is already added by now;
-	 * for virtual PE: sysfs entries are not ready yet and
-	 * tce_iommu_bus_notifier will add the device to a group later.
-	 */
+
+	/* PEs with a DMA weight of zero won't have a group */
+	if (pe->table_group.group)
+		iommu_add_device(&pe->table_group, &pdev->dev);
 }
 
 /*
@@ -2628,39 +2626,20 @@  static void pnv_pci_ioda_setup_iommu_api(void)
 	struct pnv_ioda_pe *pe;
 
 	/*
-	 * There are 4 types of PEs:
-	 * - PNV_IODA_PE_BUS: a downstream port with an adapter,
-	 *   created from pnv_pci_setup_bridge();
-	 * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it,
-	 *   created from pnv_pci_setup_bridge();
-	 * - PNV_IODA_PE_VF: a SRIOV virtual function,
-	 *   created from pnv_pcibios_sriov_enable();
-	 * - PNV_IODA_PE_DEV: an NPU or OCAPI device,
-	 *   created from pnv_pci_ioda_fixup().
+	 * For non-nvlink devices the IOMMU group is registered when the PE is
+	 * configured and devices are added to the group when the per-device
+	 * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
+	 * only initialise for "normal" IODA PHBs.
 	 *
-	 * Normally a PE is represented by an IOMMU group, however for
-	 * devices with side channels the groups need to be more strict.
+	 * For NVLink devices we need to ensure the NVLinks and the GPU end up
+	 * in the same IOMMU group, so that's handled here.
 	 */
 	list_for_each_entry(hose, &hose_list, list_node) {
 		phb = hose->private_data;
 
-		if (phb->type == PNV_PHB_NPU_NVLINK ||
-		    phb->type == PNV_PHB_NPU_OCAPI)
-			continue;
-
-		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
-			struct iommu_table_group *table_group;
-
-			table_group = pnv_try_setup_npu_table_group(pe);
-			if (!table_group) {
-				if (!pnv_pci_ioda_pe_dma_weight(pe))
-					continue;
-
-				table_group = &pe->table_group;
-			}
-			pnv_ioda_setup_bus_iommu_group(pe, table_group,
-					pe->pbus);
-		}
+		if (phb->type == PNV_PHB_IODA2)
+			list_for_each_entry(pe, &phb->ioda.pe_list, list)
+				pnv_try_setup_npu_table_group(pe);
 	}
 
 	/*
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 5bf818246339..091fe1cf386b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -955,28 +955,8 @@  static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
 		unsigned long action, void *data)
 {
 	struct device *dev = data;
-	struct pci_dev *pdev;
-	struct pci_dn *pdn;
-	struct pnv_ioda_pe *pe;
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
 
 	switch (action) {
-	case BUS_NOTIFY_ADD_DEVICE:
-		pdev = to_pci_dev(dev);
-		pdn = pci_get_pdn(pdev);
-		hose = pci_bus_to_host(pdev->bus);
-		phb = hose->private_data;
-
-		WARN_ON_ONCE(!phb);
-		if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb)
-			return 0;
-
-		pe = &phb->ioda.pe_array[pdn->pe_number];
-		if (!pe->table_group.group)
-			return 0;
-		iommu_add_device(&pe->table_group, dev);
-		return 0;
 	case BUS_NOTIFY_DEL_DEVICE:
 		iommu_del_device(dev);
 		return 0;