diff mbox series

[kernel] powerpc/powernv/ioda: Fix race in TCE level allocation

Message ID 20190611023103.86977-1-aik@ozlabs.ru
State Superseded
Headers show
Series [kernel] powerpc/powernv/ioda: Fix race in TCE level allocation | expand

Commit Message

Alexey Kardashevskiy June 11, 2019, 2:31 a.m. UTC
pnv_tce() returns a pointer to a TCE entry and originally a TCE table
would be pre-allocated. For the default case of 2GB window the table
needs only a single level and that is fine. However if more levels are
requested, it is possible to get a race when 2 threads want a pointer
to a TCE entry from the same page of TCEs.

This adds a spinlock to handle the race. The alloc==true case is not
possible in the real mode so spinlock is safe for KVM as well.

CC: stable@vger.kernel.org # v4.19+
Fixes: a68bd1267b72 ("powerpc/powernv/ioda: Allocate indirect TCE levels on demand")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---

This fixes EEH's from
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=110810


---
 arch/powerpc/include/asm/iommu.h              |  1 +
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 21 ++++++++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)

Comments

Alexey Kardashevskiy June 11, 2019, 4:29 a.m. UTC | #1
Please ignore this (causes lockdep warnings), v2 is coming.


On 11/06/2019 12:31, Alexey Kardashevskiy wrote:
> pnv_tce() returns a pointer to a TCE entry and originally a TCE table
> would be pre-allocated. For the default case of 2GB window the table
> needs only a single level and that is fine. However if more levels are
> requested, it is possible to get a race when 2 threads want a pointer
> to a TCE entry from the same page of TCEs.
> 
> This adds a spinlock to handle the race. The alloc==true case is not
> possible in the real mode so spinlock is safe for KVM as well.
> 
> CC: stable@vger.kernel.org # v4.19+
> Fixes: a68bd1267b72 ("powerpc/powernv/ioda: Allocate indirect TCE levels on demand")
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
> 
> This fixes EEH's from
> https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=110810
> 
> 
> ---
>  arch/powerpc/include/asm/iommu.h              |  1 +
>  arch/powerpc/platforms/powernv/pci-ioda-tce.c | 21 ++++++++++++-------
>  2 files changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
> index 2c1845e5e851..1825b4cc0097 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -111,6 +111,7 @@ struct iommu_table {
>  	struct iommu_table_ops *it_ops;
>  	struct kref    it_kref;
>  	int it_nid;
> +	spinlock_t it_lock;
>  };
>  
>  #define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
> index e28f03e1eb5e..9a19d61e2b12 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
> @@ -29,6 +29,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
>  	tbl->it_size = tce_size >> 3;
>  	tbl->it_busno = 0;
>  	tbl->it_type = TCE_PCI;
> +	spin_lock_init(&tbl->it_lock);
>  }
>  
>  static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
> @@ -60,18 +61,22 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
>  		unsigned long tce;
>  
>  		if (tmp[n] == 0) {
> -			__be64 *tmp2;
> -
>  			if (!alloc)
>  				return NULL;
>  
> -			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
> -					ilog2(tbl->it_level_size) + 3);
> -			if (!tmp2)
> -				return NULL;
> +			spin_lock(&tbl->it_lock);
> +			if (tmp[n] == 0) {
> +				__be64 *tmp2;
>  
> -			tmp[n] = cpu_to_be64(__pa(tmp2) |
> -					TCE_PCI_READ | TCE_PCI_WRITE);
> +				tmp2 = pnv_alloc_tce_level(tbl->it_nid,
> +						ilog2(tbl->it_level_size) + 3);
> +				if (tmp2)
> +					tmp[n] = cpu_to_be64(__pa(tmp2) |
> +						TCE_PCI_READ | TCE_PCI_WRITE);
> +			}
> +			spin_unlock(&tbl->it_lock);
> +			if (tmp[n] == 0)
> +				return NULL;
>  		}
>  		tce = be64_to_cpu(tmp[n]);
>  
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 2c1845e5e851..1825b4cc0097 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -111,6 +111,7 @@  struct iommu_table {
 	struct iommu_table_ops *it_ops;
 	struct kref    it_kref;
 	int it_nid;
+	spinlock_t it_lock;
 };
 
 #define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index e28f03e1eb5e..9a19d61e2b12 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -29,6 +29,7 @@  void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 	tbl->it_size = tce_size >> 3;
 	tbl->it_busno = 0;
 	tbl->it_type = TCE_PCI;
+	spin_lock_init(&tbl->it_lock);
 }
 
 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
@@ -60,18 +61,22 @@  static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
 		unsigned long tce;
 
 		if (tmp[n] == 0) {
-			__be64 *tmp2;
-
 			if (!alloc)
 				return NULL;
 
-			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
-					ilog2(tbl->it_level_size) + 3);
-			if (!tmp2)
-				return NULL;
+			spin_lock(&tbl->it_lock);
+			if (tmp[n] == 0) {
+				__be64 *tmp2;
 
-			tmp[n] = cpu_to_be64(__pa(tmp2) |
-					TCE_PCI_READ | TCE_PCI_WRITE);
+				tmp2 = pnv_alloc_tce_level(tbl->it_nid,
+						ilog2(tbl->it_level_size) + 3);
+				if (tmp2)
+					tmp[n] = cpu_to_be64(__pa(tmp2) |
+						TCE_PCI_READ | TCE_PCI_WRITE);
+			}
+			spin_unlock(&tbl->it_lock);
+			if (tmp[n] == 0)
+				return NULL;
 		}
 		tce = be64_to_cpu(tmp[n]);