diff mbox series

[v7,18/24] iommu/arm-smmu-v3: Add support for Hardware Translation Table Update

Message ID 20200519175502.2504091-19-jean-philippe@linaro.org
State New
Headers show
Series iommu: Shared Virtual Addressing for SMMUv3 | expand

Commit Message

Jean-Philippe Brucker May 19, 2020, 5:54 p.m. UTC
If the SMMU supports it and the kernel was built with HTTU support,
enable hardware update of access and dirty flags. This is essential for
shared page tables, to reduce the number of access faults on the fault
queue. Normal DMA with io-pgtables doesn't currently use the access or
dirty flags.

We can enable HTTU even if CPUs don't support it, because the kernel
always checks for HW dirty bit and updates the PTE flags atomically.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

Comments

Will Deacon May 21, 2020, 11:12 a.m. UTC | #1
On Tue, May 19, 2020 at 07:54:56PM +0200, Jean-Philippe Brucker wrote:
> If the SMMU supports it and the kernel was built with HTTU support,
> enable hardware update of access and dirty flags. This is essential for
> shared page tables, to reduce the number of access faults on the fault
> queue. Normal DMA with io-pgtables doesn't currently use the access or
> dirty flags.
> 
> We can enable HTTU even if CPUs don't support it, because the kernel
> always checks for HW dirty bit and updates the PTE flags atomically.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 24 +++++++++++++++++++++++-
>  1 file changed, 23 insertions(+), 1 deletion(-)

How does this work if the SMMU isn't cache coherent? I'm guessing we don't
want to enable any SVA stuff in that case, but I couldn't spot where that
was being enforced. Did I just miss it?

Will
Xiang Zheng May 27, 2020, 3 a.m. UTC | #2
Hi Jean,

This patch only enables HTTU bits in CDs. Is it also neccessary to enable
HTTU bits in STEs in this patch?

On 2020/5/20 1:54, Jean-Philippe Brucker wrote:
> If the SMMU supports it and the kernel was built with HTTU support,
> enable hardware update of access and dirty flags. This is essential for
> shared page tables, to reduce the number of access faults on the fault
> queue. Normal DMA with io-pgtables doesn't currently use the access or
> dirty flags.
> 
> We can enable HTTU even if CPUs don't support it, because the kernel
> always checks for HW dirty bit and updates the PTE flags atomically.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 24 +++++++++++++++++++++++-
>  1 file changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 1386d4d2bc60..6a368218f54c 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -58,6 +58,8 @@
>  #define IDR0_ASID16			(1 << 12)
>  #define IDR0_ATS			(1 << 10)
>  #define IDR0_HYP			(1 << 9)
> +#define IDR0_HD				(1 << 7)
> +#define IDR0_HA				(1 << 6)
>  #define IDR0_BTM			(1 << 5)
>  #define IDR0_COHACC			(1 << 4)
>  #define IDR0_TTF			GENMASK(3, 2)
> @@ -311,6 +313,9 @@
>  #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
>  #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
>  
> +#define CTXDESC_CD_0_TCR_HA		(1UL << 43)
> +#define CTXDESC_CD_0_TCR_HD		(1UL << 42)
> +

>  #define CTXDESC_CD_0_AA64		(1UL << 41)
>  #define CTXDESC_CD_0_S			(1UL << 44)
>  #define CTXDESC_CD_0_R			(1UL << 45)
> @@ -663,6 +668,8 @@ struct arm_smmu_device {
>  #define ARM_SMMU_FEAT_E2H		(1 << 16)
>  #define ARM_SMMU_FEAT_BTM		(1 << 17)
>  #define ARM_SMMU_FEAT_SVA		(1 << 18)
> +#define ARM_SMMU_FEAT_HA		(1 << 19)
> +#define ARM_SMMU_FEAT_HD		(1 << 20)
>  	u32				features;
>  
>  #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
> @@ -1718,10 +1725,17 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  		 * this substream's traffic
>  		 */
>  	} else { /* (1) and (2) */
> +		u64 tcr = cd->tcr;
> +
>  		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
>  		cdptr[2] = 0;
>  		cdptr[3] = cpu_to_le64(cd->mair);
>  
> +		if (!(smmu->features & ARM_SMMU_FEAT_HD))
> +			tcr &= ~CTXDESC_CD_0_TCR_HD;
> +		if (!(smmu->features & ARM_SMMU_FEAT_HA))
> +			tcr &= ~CTXDESC_CD_0_TCR_HA;
> +
>  		/*
>  		 * STE is live, and the SMMU might read dwords of this CD in any
>  		 * order. Ensure that it observes valid values before reading
> @@ -1729,7 +1743,7 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  		 */
>  		arm_smmu_sync_cd(smmu_domain, ssid, true);
>  
> -		val = cd->tcr |
> +		val = tcr |
>  #ifdef __BIG_ENDIAN
>  			CTXDESC_CD_0_ENDI |
>  #endif
> @@ -1958,10 +1972,12 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm)
>  		return old_cd;
>  	}
>  
> +	/* HA and HD will be filtered out later if not supported by the SMMU */
>  	tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - VA_BITS) |
>  	      FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) |
>  	      FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) |
>  	      FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) |
> +	      CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD |
>  	      CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
>  
>  	switch (PAGE_SIZE) {
> @@ -4454,6 +4470,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>  			smmu->features |= ARM_SMMU_FEAT_E2H;
>  	}
>  
> +	if (reg & (IDR0_HA | IDR0_HD)) {

> +		smmu->features |= ARM_SMMU_FEAT_HA;
> +		if (reg & IDR0_HD)
> +			smmu->features |= ARM_SMMU_FEAT_HD;
> +	}
> +

>  	/*
>  	 * If the CPU is using VHE, but the SMMU doesn't support it, the SMMU
>  	 * will create TLB entries for NH-EL1 world and will miss the
>
Jean-Philippe Brucker May 27, 2020, 8:41 a.m. UTC | #3
On Wed, May 27, 2020 at 11:00:29AM +0800, Xiang Zheng wrote:
> Hi Jean,
> 
> This patch only enables HTTU bits in CDs. Is it also neccessary to enable
> HTTU bits in STEs in this patch?

Only if you need HTTU for stage-2 tables. This series is only about
sharing stage-1 page tables, for which HTTU is enabled in the CD. I'll add
a statement in the commit message.

Thanks,
Jean
Zenghui Yu Aug. 28, 2020, 9:28 a.m. UTC | #4
On 2020/5/20 1:54, Jean-Philippe Brucker wrote:
> @@ -4454,6 +4470,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>   			smmu->features |= ARM_SMMU_FEAT_E2H;
>   	}
>   
> +	if (reg & (IDR0_HA | IDR0_HD)) {
> +		smmu->features |= ARM_SMMU_FEAT_HA;
> +		if (reg & IDR0_HD)
> +			smmu->features |= ARM_SMMU_FEAT_HD;
> +	}
> +

nitpick:

As per the IORT spec (DEN0049D, 3.1.1.2 SMMUv3 node, Table 10), the
"HTTU Override" flag of the SMMUv3 node can override the value in
SMMU_IDR0.HTTU. You may want to check this bit before selecting the
{HA,HD} features and shout if there is a mismatch between firmware and
the SMMU implementation. Just like how ARM_SMMU_FEAT_COHERENCY is
selected.


Thanks,
Zenghui
Jean-Philippe Brucker Sept. 16, 2020, 2:11 p.m. UTC | #5
On Fri, Aug 28, 2020 at 05:28:22PM +0800, Zenghui Yu wrote:
> On 2020/5/20 1:54, Jean-Philippe Brucker wrote:
> > @@ -4454,6 +4470,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
> >   			smmu->features |= ARM_SMMU_FEAT_E2H;
> >   	}
> > +	if (reg & (IDR0_HA | IDR0_HD)) {
> > +		smmu->features |= ARM_SMMU_FEAT_HA;
> > +		if (reg & IDR0_HD)
> > +			smmu->features |= ARM_SMMU_FEAT_HD;
> > +	}
> > +
> 
> nitpick:
> 
> As per the IORT spec (DEN0049D, 3.1.1.2 SMMUv3 node, Table 10), the
> "HTTU Override" flag of the SMMUv3 node can override the value in
> SMMU_IDR0.HTTU. You may want to check this bit before selecting the
> {HA,HD} features and shout if there is a mismatch between firmware and
> the SMMU implementation. Just like how ARM_SMMU_FEAT_COHERENCY is
> selected.

Thanks for pointing this out, I didn't know about these flags but have
added them to the patch now.

Thanks,
Jean
diff mbox series

Patch

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 1386d4d2bc60..6a368218f54c 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -58,6 +58,8 @@ 
 #define IDR0_ASID16			(1 << 12)
 #define IDR0_ATS			(1 << 10)
 #define IDR0_HYP			(1 << 9)
+#define IDR0_HD				(1 << 7)
+#define IDR0_HA				(1 << 6)
 #define IDR0_BTM			(1 << 5)
 #define IDR0_COHACC			(1 << 4)
 #define IDR0_TTF			GENMASK(3, 2)
@@ -311,6 +313,9 @@ 
 #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
 #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
 
+#define CTXDESC_CD_0_TCR_HA		(1UL << 43)
+#define CTXDESC_CD_0_TCR_HD		(1UL << 42)
+
 #define CTXDESC_CD_0_AA64		(1UL << 41)
 #define CTXDESC_CD_0_S			(1UL << 44)
 #define CTXDESC_CD_0_R			(1UL << 45)
@@ -663,6 +668,8 @@  struct arm_smmu_device {
 #define ARM_SMMU_FEAT_E2H		(1 << 16)
 #define ARM_SMMU_FEAT_BTM		(1 << 17)
 #define ARM_SMMU_FEAT_SVA		(1 << 18)
+#define ARM_SMMU_FEAT_HA		(1 << 19)
+#define ARM_SMMU_FEAT_HD		(1 << 20)
 	u32				features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
@@ -1718,10 +1725,17 @@  static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 		 * this substream's traffic
 		 */
 	} else { /* (1) and (2) */
+		u64 tcr = cd->tcr;
+
 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
 		cdptr[2] = 0;
 		cdptr[3] = cpu_to_le64(cd->mair);
 
+		if (!(smmu->features & ARM_SMMU_FEAT_HD))
+			tcr &= ~CTXDESC_CD_0_TCR_HD;
+		if (!(smmu->features & ARM_SMMU_FEAT_HA))
+			tcr &= ~CTXDESC_CD_0_TCR_HA;
+
 		/*
 		 * STE is live, and the SMMU might read dwords of this CD in any
 		 * order. Ensure that it observes valid values before reading
@@ -1729,7 +1743,7 @@  static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 		 */
 		arm_smmu_sync_cd(smmu_domain, ssid, true);
 
-		val = cd->tcr |
+		val = tcr |
 #ifdef __BIG_ENDIAN
 			CTXDESC_CD_0_ENDI |
 #endif
@@ -1958,10 +1972,12 @@  static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm)
 		return old_cd;
 	}
 
+	/* HA and HD will be filtered out later if not supported by the SMMU */
 	tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - VA_BITS) |
 	      FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) |
 	      FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) |
 	      FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) |
+	      CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD |
 	      CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
 
 	switch (PAGE_SIZE) {
@@ -4454,6 +4470,12 @@  static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 			smmu->features |= ARM_SMMU_FEAT_E2H;
 	}
 
+	if (reg & (IDR0_HA | IDR0_HD)) {
+		smmu->features |= ARM_SMMU_FEAT_HA;
+		if (reg & IDR0_HD)
+			smmu->features |= ARM_SMMU_FEAT_HD;
+	}
+
 	/*
 	 * If the CPU is using VHE, but the SMMU doesn't support it, the SMMU
 	 * will create TLB entries for NH-EL1 world and will miss the