mbox series

[RESEND,00/13] MT6779 IOMMU SUPPORT

Message ID 20191104115238.2394-1-chao.hao@mediatek.com
Headers show
Series MT6779 IOMMU SUPPORT | expand

Message

chao hao Nov. 4, 2019, 11:52 a.m. UTC
I am sorry that previous patchset(2019/10/28) has a problem with e-mail format, some websites don't recevied it,
so please based on this patchset to review, thanks!

This patchset adds mt6779 iommu support and adjusts mtk iommu software architecture mainly.
1. Add mt6779 basic function, such as smi_larb port define, registers define and so on.
2. In addition, this patchset will adjust current mtk iommu SW architecture mainly to adapt all the mtk platforms:
   Firstly, mt6779 iommu can support more HW modules, but some modules have special requirements for iova region,
   for example, CCU only can access 0x4000_0000~0x47ff_ffff, VPU only can access 0x7da0_0000~0x7fbf_ffff. Current
   architecture only support one iommu domain(include 0~4GB), all the modules allocate iova from 0~4GB region, so
   it doesn't ensure to allocate expected iova region for special module(CCU and VPU). In order to resolve the problem,
   we will create different iommu domains for special module, and every domain can include iova region which module
   needs.
   Secondly, all the iommus share one page table for current architecture by "mtk_iommu_get_m4u_data", to make the
   architecture look clearly, we will create a global page table firstly(mtk_iommu_pgtable), and all the iommus can
   use it. One page table can include 4GB iova space, so multiple iommu domains are created based on the same page
   table. New SW architecture diagram is as below:

                          iommu0   iommu1
                             |        |
                             ----------
                                  |
                         mtk_iommu_pgtable
                                  |
             ------------------------------------------
             |                    |                   |
       mtk_iommu_domain1   mtk_iommu_domain2   mtk_iommu_domain3
             |                    |                   |
        iommu_group1         iommu_group2        iommu_group3
             |                    |                   |
        iommu_domain1       iommu_domain2        iommu_domain3
             |                    |                   |
      iova region1(normal)  iova region2(CCU)   iova region3(VPU)

This patchset depends on "Improve tlb range flush"[1] and based on v5.4-rc1.
[1]http://lists.infradead.org/pipermail/linux-mediatek/2019-October/024207.html

Chao Hao (13):
  dt-bindings: mediatek: Add bindings for MT6779
  iommu/mediatek: Add mt6779 IOMMU basic support
  iommu/mediatek: Add mtk_iommu_pgtable structure
  iommu/mediatek: Remove mtk_iommu_domain_finalise
  iommu/mediatek: Remove pgtable info in mtk_iommu_domain
  iommu/mediatek: Change get the way of m4u_group
  iommu/mediatek: Add smi_larb info about device
  iommu/mediatek: Add mtk_domain_data structure
  iommu/mediatek: Remove the usage of m4u_dom variable
  iommu/mediatek: Remove mtk_iommu_get_m4u_data api
  iommu/mediatek: Add iova reserved function
  iommu/mediatek: Change single domain to multiple domains
  iommu/mediatek: Add multiple mtk_iommu_domain support for mt6779

 .../bindings/iommu/mediatek,iommu.txt         |   2 +
 drivers/iommu/mtk_iommu.c                     | 488 +++++++++++++++---
 drivers/iommu/mtk_iommu.h                     |  50 +-
 include/dt-bindings/memory/mt6779-larb-port.h | 217 ++++++++
 4 files changed, 685 insertions(+), 72 deletions(-)

--
2.18.0

Comments

Yong Wu Dec. 16, 2019, 12:07 p.m. UTC | #1
On Mon, 2019-11-04 at 19:52 +0800, Chao Hao wrote:
> 1. Add mt6779 registers define for iommu.
> 2. Add mt6779_data define to support mt6779 iommu HW init.
> 3. There are two iommus, one is mm_iommu, the other is vpu_iommu.
> MM_IOMMU is connected smi_larb to support multimedia engine to
> access DRAM, and VPU_IOMMU is connected to APU_bus to support
> VPU,MDLA,EDMA to access DRAM. MM_IOMMU and VPU_IOMMU use the same
> page table to simplify design by "mtk_iommu_get_m4u_data".
> 4. For smi_larb6, it doesn't use mm_iommu, so we can distinguish
> vpu_iommu by it when excutes iommu_probe.
> 5. For mt6779 APU_IOMMU fault id is irregular, so it was treated
> specially.
> 
> Signed-off-by: Chao Hao <chao.hao@mediatek.com>
> ---
>  drivers/iommu/mtk_iommu.c | 91 +++++++++++++++++++++++++++++++++------
>  drivers/iommu/mtk_iommu.h | 10 ++++-
>  2 files changed, 87 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> index 8ca2e99964fe..f2847e661137 100644
> --- a/drivers/iommu/mtk_iommu.c
> +++ b/drivers/iommu/mtk_iommu.c
> @@ -38,12 +38,24 @@
>  #define REG_MMU_INVLD_END_A			0x028
>  
>  #define REG_MMU_INV_SEL				0x038
> +#define REG_MMU_INV_SEL_MT6779			0x02c
>  #define F_INVLD_EN0				BIT(0)
>  #define F_INVLD_EN1				BIT(1)
>  
>  #define REG_MMU_STANDARD_AXI_MODE		0x048
> +
> +#define REG_MMU_MISC_CRTL_MT6779		0x048

Defining two register in the same offset look strange. see below.

> +#define REG_MMU_STANDARD_AXI_MODE_MT6779	(BIT(3) | BIT(19))
> +#define REG_MMU_COHERENCE_EN			(BIT(0) | BIT(16))
> +#define REG_MMU_IN_ORDER_WR_EN			(BIT(1) | BIT(17))
> +#define F_MMU_HALF_ENTRY_MODE_L			(BIT(5) | BIT(21))
> +#define F_MMU_BLOCKING_MODE_L			(BIT(4) | BIT(20))

The last four ones are not used. Please remove.

> +
>  #define REG_MMU_DCM_DIS				0x050
>  
> +#define REG_MMU_WR_LEN				0x054
> +#define F_MMU_WR_THROT_DIS			(BIT(5) |  BIT(21))
> +
>  #define REG_MMU_CTRL_REG			0x110
>  #define F_MMU_TF_PROT_TO_PROGRAM_ADDR		(2 << 4)
>  #define F_MMU_PREFETCH_RT_REPLACE_MOD		BIT(4)
> @@ -88,10 +100,14 @@
>  #define REG_MMU1_INVLD_PA			0x148
>  #define REG_MMU0_INT_ID				0x150
>  #define REG_MMU1_INT_ID				0x154
> +#define F_MMU_INT_ID_COMM_ID(a)			(((a) >> 9) & 0x7)
> +#define F_MMU_INT_ID_SUB_COMM_ID(a)		(((a) >> 7) & 0x3)
>  #define F_MMU_INT_ID_LARB_ID(a)			(((a) >> 7) & 0x7)
>  #define F_MMU_INT_ID_PORT_ID(a)			(((a) >> 2) & 0x1f)
> +#define F_MMU_INT_ID_COMM_APU_ID(a)		((a) & 0x3)
> +#define F_MMU_INT_ID_SUB_APU_ID(a)		(((a) >> 2) & 0x3)
>  
> -#define MTK_PROTECT_PA_ALIGN			128
> +#define MTK_PROTECT_PA_ALIGN			256
>  
>  /*
>   * Get the local arbiter ID and the portid within the larb arbiter
> @@ -165,7 +181,7 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
>  
>  	for_each_m4u(data) {
>  		writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
> -			       data->base + REG_MMU_INV_SEL);
> +			       data->base + data->plat_data->inv_sel_reg);
>  		writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
>  		wmb(); /* Make sure the tlb flush all done */
>  	}
> @@ -182,7 +198,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,
>  	for_each_m4u(data) {
>  		spin_lock_irqsave(&data->tlb_lock, flags);
>  		writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
> -			       data->base + REG_MMU_INV_SEL);
> +			       data->base + data->plat_data->inv_sel_reg);
>  
>  		writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
>  		writel_relaxed(iova + size - 1,
> @@ -226,7 +242,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
>  	struct mtk_iommu_data *data = dev_id;
>  	struct mtk_iommu_domain *dom = data->m4u_dom;
>  	u32 int_state, regval, fault_iova, fault_pa;
> -	unsigned int fault_larb, fault_port;
> +	unsigned int fault_larb, fault_port, sub_comm = 0;
>  	bool layer, write;
>  
>  	/* Read error info from registers */
> @@ -242,17 +258,30 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
>  	}
>  	layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
>  	write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
> -	fault_larb = F_MMU_INT_ID_LARB_ID(regval);
>  	fault_port = F_MMU_INT_ID_PORT_ID(regval);
> +	if (data->plat_data->has_sub_comm[data->m4u_id]) {
> +		/* m4u1 is VPU in mt6779.*/
> +		if (data->m4u_id && data->plat_data->m4u_plat == M4U_MT6779) {
> +			fault_larb = F_MMU_INT_ID_COMM_APU_ID(regval);
> +			sub_comm = F_MMU_INT_ID_SUB_APU_ID(regval);
> +			fault_port = 0; /* for mt6779 APU ID is irregular */
> +		} else {
> +			fault_larb = F_MMU_INT_ID_COMM_ID(regval);
> +			sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval);
> +		}
> +	} else {
> +		fault_larb = F_MMU_INT_ID_LARB_ID(regval);
> +	}
>  
> -	fault_larb = data->plat_data->larbid_remap[fault_larb];
> +	fault_larb = data->plat_data->larbid_remap[data->m4u_id][fault_larb];
>  
>  	if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
>  			       write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
>  		dev_err_ratelimited(
>  			data->dev,
> -			"fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n",
> -			int_state, fault_iova, fault_pa, fault_larb, fault_port,
> +			"fault type=0x%x iova=0x%x pa=0x%x larb=%d sub_comm=%d port=%d regval=0x%x layer=%d %s\n",
> +			int_state, fault_iova, fault_pa, fault_larb,
> +			sub_comm, fault_port, regval,
>  			layer, write ? "write" : "read");
>  	}
>  
> @@ -545,11 +574,12 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
>  		return ret;
>  	}
>  
> +	regval = readl_relaxed(data->base + REG_MMU_CTRL_REG);
>  	if (data->plat_data->m4u_plat == M4U_MT8173)
> -		regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
> +		regval |= F_MMU_PREFETCH_RT_REPLACE_MOD |
>  			 F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173;
>  	else
> -		regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR;
> +		regval |= F_MMU_TF_PROT_TO_PROGRAM_ADDR;
>  	writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
>  
>  	regval = F_L2_MULIT_HIT_EN |
> @@ -589,6 +619,20 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
>  	if (data->plat_data->reset_axi)
>  		writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE);
>  
> +	if (data->plat_data->has_wr_len) {
> +		/* write command throttling mode */
> +		regval = readl_relaxed(data->base + REG_MMU_WR_LEN);
> +		regval &= ~F_MMU_WR_THROT_DIS;
> +		writel_relaxed(regval, data->base + REG_MMU_WR_LEN);
> +	}
> +	/* special settings for mmu0 (multimedia iommu) */
> +	if (data->plat_data->has_misc_ctrl[data->m4u_id]) {
> +		regval = readl_relaxed(data->base + REG_MMU_MISC_CRTL_MT6779);
> +		/* non-standard AXI mode */
> +		regval &= ~REG_MMU_STANDARD_AXI_MODE_MT6779;
> +		writel_relaxed(regval, data->base + REG_MMU_MISC_CRTL_MT6779);
> +	}

     0x48 are REG_MMU_STANDARD_AXI_MODE in both mt8173 and mt8183, while
it is REG_MMU_MISC_CRTL in mt2712, mt6779 and the latest soc, right? I
think we can use one defining, like this:

          #define  REG_MMU_MISC_CTRL 0x48

         if (!data->plat_data->has_misc_ctrl[data->m4u_id]) {
               /* Disable standard axi mode while it is
REG_MMU_STANDARD_AXI_MODE */
		writel_relaxed(0, data->base + REG_MMU_MISC_CTRL);
	 } else if (data->m4u_id == 0) {
		regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL);
		regval &= ~REG_MMU_STANDARD_AXI_MODE_MT6779;
		writel_relaxed(regval, data->base + REG_MMU_MISC_CTRL);
         }

	 Meanwhile remove the setting for REG_MMU_STANDARD_AXI_MODE above.

> +
>  	if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
>  			     dev_name(data->dev), (void *)data)) {
>  		writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR);
> @@ -678,6 +722,9 @@ static int mtk_iommu_probe(struct platform_device *pdev)
>  		}
>  		data->larb_imu[id].dev = &plarbdev->dev;
>  
> +		if (data->plat_data->m4u1_mask == (1 << id))
> +			data->m4u_id = 1;
> +
>  		component_match_add_release(dev, &match, release_of,
>  					    compare_of, larbnode);
>  	}
> @@ -731,6 +778,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev)
>  	struct mtk_iommu_suspend_reg *reg = &data->reg;
>  	void __iomem *base = data->base;
>  
> +	reg->wr_len = readl_relaxed(base + REG_MMU_WR_LEN);
>  	reg->standard_axi_mode = readl_relaxed(base +
>  					       REG_MMU_STANDARD_AXI_MODE);
>  	reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS);
> @@ -756,6 +804,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
>  		dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret);
>  		return ret;
>  	}
> +	writel_relaxed(reg->wr_len, base + REG_MMU_WR_LEN);
>  	writel_relaxed(reg->standard_axi_mode,
>  		       base + REG_MMU_STANDARD_AXI_MODE);
>  	writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS);
> @@ -779,7 +828,20 @@ static const struct mtk_iommu_plat_data mt2712_data = {
>  	.has_4gb_mode = true,
>  	.has_bclk     = true,
>  	.has_vld_pa_rng   = true,
> -	.larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
> +	.larbid_remap[0] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
> +	.inv_sel_reg = REG_MMU_INV_SEL,
> +};
> +
> +static const struct mtk_iommu_plat_data mt6779_data = {
> +	.m4u_plat = M4U_MT6779,
> +	.larbid_remap[0] = {0, 1, 2, 3, 5, 7, 10, 9},
> +	/* vp6a, vp6b, mdla/core2, mdla/edmc*/
> +	.larbid_remap[1] = {2, 0, 3, 1},
> +	.has_sub_comm = {true, true},
> +	.has_wr_len = true,
> +	.has_misc_ctrl = {true, false},
> +	.inv_sel_reg = REG_MMU_INV_SEL_MT6779,
> +	.m4u1_mask =  BIT(6),
>  };
>  
>  static const struct mtk_iommu_plat_data mt8173_data = {
> @@ -787,17 +849,20 @@ static const struct mtk_iommu_plat_data mt8173_data = {
>  	.has_4gb_mode = true,
>  	.has_bclk     = true,
>  	.reset_axi    = true,
> -	.larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */
> +	.larbid_remap[0] = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */
> +	.inv_sel_reg = REG_MMU_INV_SEL,
>  };
>  
>  static const struct mtk_iommu_plat_data mt8183_data = {
>  	.m4u_plat     = M4U_MT8183,
>  	.reset_axi    = true,
> -	.larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1},
> +	.larbid_remap[0] = {0, 4, 5, 6, 7, 2, 3, 1},
> +	.inv_sel_reg = REG_MMU_INV_SEL,
>  };
>  
>  static const struct of_device_id mtk_iommu_of_ids[] = {
>  	{ .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
> +	{ .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data},
>  	{ .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
>  	{ .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
>  	{}
> diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> index ea949a324e33..132dc765a40b 100644
> --- a/drivers/iommu/mtk_iommu.h
> +++ b/drivers/iommu/mtk_iommu.h
> @@ -25,11 +25,13 @@ struct mtk_iommu_suspend_reg {
>  	u32				int_main_control;
>  	u32				ivrp_paddr;
>  	u32				vld_pa_rng;
> +	u32				wr_len;
>  };
>  
>  enum mtk_iommu_plat {
>  	M4U_MT2701,
>  	M4U_MT2712,
> +	M4U_MT6779,
>  	M4U_MT8173,
>  	M4U_MT8183,
>  };
> @@ -42,7 +44,12 @@ struct mtk_iommu_plat_data {
>  	bool                has_bclk;
>  	bool                has_vld_pa_rng;
>  	bool                reset_axi;
> -	unsigned char       larbid_remap[MTK_LARB_NR_MAX];
> +	bool                has_sub_comm[2];
> +	bool                has_wr_len;
> +	bool                has_misc_ctrl[2];
> +	u32                 inv_sel_reg;
> +	u32                 m4u1_mask;

alphabetically for the new ones.

> +	unsigned char       larbid_remap[2][MTK_LARB_NR_MAX];
>  };
>  
>  struct mtk_iommu_domain;
> @@ -59,6 +66,7 @@ struct mtk_iommu_data {
>  	bool                            enable_4GB;
>  	spinlock_t			tlb_lock; /* lock for tlb range flush */
>  
> +	u32				m4u_id;
>  	struct iommu_device		iommu;
>  	const struct mtk_iommu_plat_data *plat_data;

Basically this patch looks ok for me. But please split it to several
patches:

1) Extend larbid_remap to larbid_remap[2].
   Actually mt2712 also need this. this is the mt2712 definition.
   larbid_remap[0] = {0, 1, 2, 3},
   larbid_remap[1] = {4, 5, 7, 8, 9},

2) Regarding the 0x48(misc_ctrl register)

3) Add m4u1_mask to distinguish the m4u_id.

4) Add REG_MMU_WR_LEN if you need.

5) Put inv_sel_reg in the plat_data for preparing add 0x2c support in
mt6779.

6) Add new flow to get SUB_COMMON ID and VPU larbid in the translation
fault.

7) Add mt6779 support.

>
Yong Wu Dec. 16, 2019, 12:13 p.m. UTC | #2
On Mon, 2019-11-04 at 19:52 +0800, Chao Hao wrote:
> Start with this patch, we will change the SW architecture
> to support multiple domains. SW architecture will has a big change,
> so we need to modify a little bit by more than one patch.
> The new SW overall architecture is as below:
> 
> 				iommu0   iommu1
> 				  |	    |
> 				  -----------
> 					|
> 				mtk_iommu_pgtable
> 					|
> 			------------------------------------------
> 			|		     |			 |
> 		mtk_iommu_domain1   mtk_iommu_domain2  mtk_iommu_domain3
> 			|                    |                   |
> 		iommu_group1         iommu_group2           iommu_group3
> 			|                    |                   |
> 		iommu_domain1       iommu_domain2	    iommu_domain3
> 			|                    |                   |
> 		iova region1(normal)  iova region2(CCU)    iova region3(VPU)
> 
> For current structure, no matter how many iommus there are,
> they use the same page table to simplify the usage of module.
> In order to make the software architecture more explicit, this
> patch will create a global mtk_iommu_pgtable structure to describe
> page table and all the iommus use it.

Thanks for the hard work of this file. Actually this patch and the later
ones confuse me. Why do you make this flow change? 
for making the code "more explicit" or for adding multi-domain support
in 13/13.

IMHO, the change is unnecessary.
a) For me, this change has no improvement. currently we use a global
mtk_iommu_get_m4u_data to get the M4U data. I will be very glad if you
could get rid of it. But in this patchset, You use a another global
mtk_iommu_pgtable to instead. For me. It has no improvement.

b) This patchset break the original flow. device_group give you a
software chance for initializing, then you move pagetable allocating
code into it. But it isn't device_group job.

I can not decide if your flow is right. But if you only want to add
support multi-domain, I guess you could extend the current "m4u_group"
to a array "m4u_group[N]". It may be more simple. To make mt6779
progress easily, I suggest you can use this way to support multi-domain
firstly. Then you could send this new mtk_iommu_pgtable patchset for the
code "more explicit" if you insist.

> The diagram is as below:
> 
> 	mtk_iommu_data1(MM)       mtk_iommu_data2(APU)
> 		|			   |
> 		|			   |
> 		------mtk_iommu_pgtable-----
> 
> We need to create global mtk_iommu_pgtable to include all the iova
> regions firstly and special iova regions by divided based on it,
> so the information of pgtable needs to be created in device_group.
> 
> Signed-off-by: Chao Hao <chao.hao@mediatek.com>
> ---
>  drivers/iommu/mtk_iommu.c | 84 +++++++++++++++++++++++++++++++++++++++
>  drivers/iommu/mtk_iommu.h |  1 +
>  2 files changed, 85 insertions(+)
> 
> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> index f2847e661137..fcbde6b0f58d 100644
> --- a/drivers/iommu/mtk_iommu.c
> +++ b/drivers/iommu/mtk_iommu.c
> @@ -123,6 +123,12 @@ struct mtk_iommu_domain {
>  	struct iommu_domain		domain;
>  };
>  
> +struct mtk_iommu_pgtable {
> +	struct io_pgtable_cfg	cfg;
> +	struct io_pgtable_ops	*iop;
> +};
> +
> +static struct mtk_iommu_pgtable *share_pgtable;
>  static const struct iommu_ops mtk_iommu_ops;
>  
>  /*
> @@ -170,6 +176,11 @@ static struct mtk_iommu_data *mtk_iommu_get_m4u_data(void)
>  	return NULL;
>  }
>  
> +static struct mtk_iommu_pgtable *mtk_iommu_get_pgtable(void)
> +{
> +	return share_pgtable;
> +}
> +
>  static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
>  {
>  	return container_of(dom, struct mtk_iommu_domain, domain);
> @@ -322,6 +333,13 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
>  {
>  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
>  
> +	if (data->pgtable) {
> +		dom->cfg = data->pgtable->cfg;
> +		dom->iop = data->pgtable->iop;
> +		dom->domain.pgsize_bitmap = data->pgtable->cfg.pgsize_bitmap;
> +		return 0;
> +	}
> +
>  	dom->cfg = (struct io_pgtable_cfg) {
>  		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
>  			IO_PGTABLE_QUIRK_NO_PERMS |
> @@ -345,6 +363,61 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
>  	return 0;
>  }
>  
> +static struct mtk_iommu_pgtable *create_pgtable(struct mtk_iommu_data *data)
> +{
> +	struct mtk_iommu_pgtable *pgtable;
> +
> +	pgtable = kzalloc(sizeof(*pgtable), GFP_KERNEL);
> +	if (!pgtable)
> +		return ERR_PTR(-ENOMEM);
> +
> +	pgtable->cfg = (struct io_pgtable_cfg) {
> +		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> +			IO_PGTABLE_QUIRK_NO_PERMS |
> +			IO_PGTABLE_QUIRK_TLBI_ON_MAP |
> +			IO_PGTABLE_QUIRK_ARM_MTK_EXT,
> +		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
> +		.ias = 32,
> +		.oas = 34,
> +		.tlb = &mtk_iommu_flush_ops,
> +		.iommu_dev = data->dev,
> +	};
> +
> +	pgtable->iop = alloc_io_pgtable_ops(ARM_V7S, &pgtable->cfg, data);
> +	if (!pgtable->iop) {
> +		dev_err(data->dev, "Failed to alloc io pgtable\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	dev_info(data->dev, "%s create pgtable done\n", __func__);
> +
> +	return pgtable;
> +}
> +
> +static int mtk_iommu_attach_pgtable(struct mtk_iommu_data *data,
> +				    struct device *dev)
> +{
> +	struct mtk_iommu_pgtable *pgtable = mtk_iommu_get_pgtable();
> +
> +	/* create share pgtable */
> +	if (!pgtable) {
> +		pgtable = create_pgtable(data);
> +		if (IS_ERR(pgtable)) {
> +			dev_err(data->dev, "Failed to create pgtable\n");
> +			return -ENOMEM;
> +		}
> +
> +		share_pgtable = pgtable;
> +	}
> +
> +	/* binding to pgtable */
> +	data->pgtable = pgtable;
> +
> +	dev_info(data->dev, "m4u%d attach_pgtable done!\n", data->m4u_id);
> +
> +	return 0;
> +}
> +
>  static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
>  {
>  	struct mtk_iommu_domain *dom;
> @@ -508,10 +581,21 @@ static void mtk_iommu_remove_device(struct device *dev)
>  static struct iommu_group *mtk_iommu_device_group(struct device *dev)
>  {
>  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> +	struct mtk_iommu_pgtable *pgtable;
> +	int ret = 0;
>  
>  	if (!data)
>  		return ERR_PTR(-ENODEV);
>  
> +	pgtable = data->pgtable;
> +	if (!pgtable) {
> +		ret = mtk_iommu_attach_pgtable(data, dev);
> +		if (ret) {
> +			dev_err(data->dev, "Failed to device_group\n");
> +			return NULL;
> +		}
> +	}
> +
>  	/* All the client devices are in the same m4u iommu-group */
>  	if (!data->m4u_group) {
>  		data->m4u_group = iommu_group_alloc();
> diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> index 132dc765a40b..dd5f19f78b62 100644
> --- a/drivers/iommu/mtk_iommu.h
> +++ b/drivers/iommu/mtk_iommu.h
> @@ -61,6 +61,7 @@ struct mtk_iommu_data {
>  	struct clk			*bclk;
>  	phys_addr_t			protect_base; /* protect memory base */
>  	struct mtk_iommu_suspend_reg	reg;
> +	struct mtk_iommu_pgtable	*pgtable;
>  	struct mtk_iommu_domain		*m4u_dom;
>  	struct iommu_group		*m4u_group;
>  	bool                            enable_4GB;
chao hao Dec. 25, 2019, 6:58 a.m. UTC | #3
On Mon, 2019-12-16 at 20:07 +0800, Yong Wu wrote:
> On Mon, 2019-11-04 at 19:52 +0800, Chao Hao wrote:
> > 1. Add mt6779 registers define for iommu.
> > 2. Add mt6779_data define to support mt6779 iommu HW init.
> > 3. There are two iommus, one is mm_iommu, the other is vpu_iommu.
> > MM_IOMMU is connected smi_larb to support multimedia engine to
> > access DRAM, and VPU_IOMMU is connected to APU_bus to support
> > VPU,MDLA,EDMA to access DRAM. MM_IOMMU and VPU_IOMMU use the same
> > page table to simplify design by "mtk_iommu_get_m4u_data".
> > 4. For smi_larb6, it doesn't use mm_iommu, so we can distinguish
> > vpu_iommu by it when excutes iommu_probe.
> > 5. For mt6779 APU_IOMMU fault id is irregular, so it was treated
> > specially.
> > 
> > Signed-off-by: Chao Hao <chao.hao@mediatek.com>
> > ---
> >  drivers/iommu/mtk_iommu.c | 91 +++++++++++++++++++++++++++++++++------
> >  drivers/iommu/mtk_iommu.h | 10 ++++-
> >  2 files changed, 87 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > index 8ca2e99964fe..f2847e661137 100644
> > --- a/drivers/iommu/mtk_iommu.c
> > +++ b/drivers/iommu/mtk_iommu.c
> > @@ -38,12 +38,24 @@
> >  #define REG_MMU_INVLD_END_A			0x028
> >  
> >  #define REG_MMU_INV_SEL				0x038
> > +#define REG_MMU_INV_SEL_MT6779			0x02c
> >  #define F_INVLD_EN0				BIT(0)
> >  #define F_INVLD_EN1				BIT(1)
> >  
> >  #define REG_MMU_STANDARD_AXI_MODE		0x048
> > +
> > +#define REG_MMU_MISC_CRTL_MT6779		0x048
> 
> Defining two register in the same offset look strange. see below.
> 
> > +#define REG_MMU_STANDARD_AXI_MODE_MT6779	(BIT(3) | BIT(19))
> > +#define REG_MMU_COHERENCE_EN			(BIT(0) | BIT(16))
> > +#define REG_MMU_IN_ORDER_WR_EN			(BIT(1) | BIT(17))
> > +#define F_MMU_HALF_ENTRY_MODE_L			(BIT(5) | BIT(21))
> > +#define F_MMU_BLOCKING_MODE_L			(BIT(4) | BIT(20))
> 
> The last four ones are not used. Please remove.
> 
> > +
> >  #define REG_MMU_DCM_DIS				0x050
> >  
> > +#define REG_MMU_WR_LEN				0x054
> > +#define F_MMU_WR_THROT_DIS			(BIT(5) |  BIT(21))
> > +
> >  #define REG_MMU_CTRL_REG			0x110
> >  #define F_MMU_TF_PROT_TO_PROGRAM_ADDR		(2 << 4)
> >  #define F_MMU_PREFETCH_RT_REPLACE_MOD		BIT(4)
> > @@ -88,10 +100,14 @@
> >  #define REG_MMU1_INVLD_PA			0x148
> >  #define REG_MMU0_INT_ID				0x150
> >  #define REG_MMU1_INT_ID				0x154
> > +#define F_MMU_INT_ID_COMM_ID(a)			(((a) >> 9) & 0x7)
> > +#define F_MMU_INT_ID_SUB_COMM_ID(a)		(((a) >> 7) & 0x3)
> >  #define F_MMU_INT_ID_LARB_ID(a)			(((a) >> 7) & 0x7)
> >  #define F_MMU_INT_ID_PORT_ID(a)			(((a) >> 2) & 0x1f)
> > +#define F_MMU_INT_ID_COMM_APU_ID(a)		((a) & 0x3)
> > +#define F_MMU_INT_ID_SUB_APU_ID(a)		(((a) >> 2) & 0x3)
> >  
> > -#define MTK_PROTECT_PA_ALIGN			128
> > +#define MTK_PROTECT_PA_ALIGN			256
> >  
> >  /*
> >   * Get the local arbiter ID and the portid within the larb arbiter
> > @@ -165,7 +181,7 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
> >  
> >  	for_each_m4u(data) {
> >  		writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
> > -			       data->base + REG_MMU_INV_SEL);
> > +			       data->base + data->plat_data->inv_sel_reg);
> >  		writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
> >  		wmb(); /* Make sure the tlb flush all done */
> >  	}
> > @@ -182,7 +198,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,
> >  	for_each_m4u(data) {
> >  		spin_lock_irqsave(&data->tlb_lock, flags);
> >  		writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
> > -			       data->base + REG_MMU_INV_SEL);
> > +			       data->base + data->plat_data->inv_sel_reg);
> >  
> >  		writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
> >  		writel_relaxed(iova + size - 1,
> > @@ -226,7 +242,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
> >  	struct mtk_iommu_data *data = dev_id;
> >  	struct mtk_iommu_domain *dom = data->m4u_dom;
> >  	u32 int_state, regval, fault_iova, fault_pa;
> > -	unsigned int fault_larb, fault_port;
> > +	unsigned int fault_larb, fault_port, sub_comm = 0;
> >  	bool layer, write;
> >  
> >  	/* Read error info from registers */
> > @@ -242,17 +258,30 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
> >  	}
> >  	layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
> >  	write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
> > -	fault_larb = F_MMU_INT_ID_LARB_ID(regval);
> >  	fault_port = F_MMU_INT_ID_PORT_ID(regval);
> > +	if (data->plat_data->has_sub_comm[data->m4u_id]) {
> > +		/* m4u1 is VPU in mt6779.*/
> > +		if (data->m4u_id && data->plat_data->m4u_plat == M4U_MT6779) {
> > +			fault_larb = F_MMU_INT_ID_COMM_APU_ID(regval);
> > +			sub_comm = F_MMU_INT_ID_SUB_APU_ID(regval);
> > +			fault_port = 0; /* for mt6779 APU ID is irregular */
> > +		} else {
> > +			fault_larb = F_MMU_INT_ID_COMM_ID(regval);
> > +			sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval);
> > +		}
> > +	} else {
> > +		fault_larb = F_MMU_INT_ID_LARB_ID(regval);
> > +	}
> >  
> > -	fault_larb = data->plat_data->larbid_remap[fault_larb];
> > +	fault_larb = data->plat_data->larbid_remap[data->m4u_id][fault_larb];
> >  
> >  	if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
> >  			       write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
> >  		dev_err_ratelimited(
> >  			data->dev,
> > -			"fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n",
> > -			int_state, fault_iova, fault_pa, fault_larb, fault_port,
> > +			"fault type=0x%x iova=0x%x pa=0x%x larb=%d sub_comm=%d port=%d regval=0x%x layer=%d %s\n",
> > +			int_state, fault_iova, fault_pa, fault_larb,
> > +			sub_comm, fault_port, regval,
> >  			layer, write ? "write" : "read");
> >  	}
> >  
> > @@ -545,11 +574,12 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
> >  		return ret;
> >  	}
> >  
> > +	regval = readl_relaxed(data->base + REG_MMU_CTRL_REG);
> >  	if (data->plat_data->m4u_plat == M4U_MT8173)
> > -		regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
> > +		regval |= F_MMU_PREFETCH_RT_REPLACE_MOD |
> >  			 F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173;
> >  	else
> > -		regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR;
> > +		regval |= F_MMU_TF_PROT_TO_PROGRAM_ADDR;
> >  	writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
> >  
> >  	regval = F_L2_MULIT_HIT_EN |
> > @@ -589,6 +619,20 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
> >  	if (data->plat_data->reset_axi)
> >  		writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE);
> >  
> > +	if (data->plat_data->has_wr_len) {
> > +		/* write command throttling mode */
> > +		regval = readl_relaxed(data->base + REG_MMU_WR_LEN);
> > +		regval &= ~F_MMU_WR_THROT_DIS;
> > +		writel_relaxed(regval, data->base + REG_MMU_WR_LEN);
> > +	}
> > +	/* special settings for mmu0 (multimedia iommu) */
> > +	if (data->plat_data->has_misc_ctrl[data->m4u_id]) {
> > +		regval = readl_relaxed(data->base + REG_MMU_MISC_CRTL_MT6779);
> > +		/* non-standard AXI mode */
> > +		regval &= ~REG_MMU_STANDARD_AXI_MODE_MT6779;
> > +		writel_relaxed(regval, data->base + REG_MMU_MISC_CRTL_MT6779);
> > +	}
> 
>      0x48 are REG_MMU_STANDARD_AXI_MODE in both mt8173 and mt8183, while
> it is REG_MMU_MISC_CRTL in mt2712, mt6779 and the latest soc, right? I
> think we can use one defining, like this:
> 
>           #define  REG_MMU_MISC_CTRL 0x48
> 
>          if (!data->plat_data->has_misc_ctrl[data->m4u_id]) {
>                /* Disable standard axi mode while it is
> REG_MMU_STANDARD_AXI_MODE */
> 		writel_relaxed(0, data->base + REG_MMU_MISC_CTRL);
> 	 } else if (data->m4u_id == 0) {
> 		regval = readl_relaxed(data->base + REG_MMU_MISC_CTRL);
> 		regval &= ~REG_MMU_STANDARD_AXI_MODE_MT6779;
> 		writel_relaxed(regval, data->base + REG_MMU_MISC_CTRL);
>          }
> 
> 	 Meanwhile remove the setting for REG_MMU_STANDARD_AXI_MODE above.
> 
> > +
> >  	if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
> >  			     dev_name(data->dev), (void *)data)) {
> >  		writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR);
> > @@ -678,6 +722,9 @@ static int mtk_iommu_probe(struct platform_device *pdev)
> >  		}
> >  		data->larb_imu[id].dev = &plarbdev->dev;
> >  
> > +		if (data->plat_data->m4u1_mask == (1 << id))
> > +			data->m4u_id = 1;
> > +
> >  		component_match_add_release(dev, &match, release_of,
> >  					    compare_of, larbnode);
> >  	}
> > @@ -731,6 +778,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev)
> >  	struct mtk_iommu_suspend_reg *reg = &data->reg;
> >  	void __iomem *base = data->base;
> >  
> > +	reg->wr_len = readl_relaxed(base + REG_MMU_WR_LEN);
> >  	reg->standard_axi_mode = readl_relaxed(base +
> >  					       REG_MMU_STANDARD_AXI_MODE);
> >  	reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS);
> > @@ -756,6 +804,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
> >  		dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret);
> >  		return ret;
> >  	}
> > +	writel_relaxed(reg->wr_len, base + REG_MMU_WR_LEN);
> >  	writel_relaxed(reg->standard_axi_mode,
> >  		       base + REG_MMU_STANDARD_AXI_MODE);
> >  	writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS);
> > @@ -779,7 +828,20 @@ static const struct mtk_iommu_plat_data mt2712_data = {
> >  	.has_4gb_mode = true,
> >  	.has_bclk     = true,
> >  	.has_vld_pa_rng   = true,
> > -	.larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
> > +	.larbid_remap[0] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
> > +	.inv_sel_reg = REG_MMU_INV_SEL,
> > +};
> > +
> > +static const struct mtk_iommu_plat_data mt6779_data = {
> > +	.m4u_plat = M4U_MT6779,
> > +	.larbid_remap[0] = {0, 1, 2, 3, 5, 7, 10, 9},
> > +	/* vp6a, vp6b, mdla/core2, mdla/edmc*/
> > +	.larbid_remap[1] = {2, 0, 3, 1},
> > +	.has_sub_comm = {true, true},
> > +	.has_wr_len = true,
> > +	.has_misc_ctrl = {true, false},
> > +	.inv_sel_reg = REG_MMU_INV_SEL_MT6779,
> > +	.m4u1_mask =  BIT(6),
> >  };
> >  
> >  static const struct mtk_iommu_plat_data mt8173_data = {
> > @@ -787,17 +849,20 @@ static const struct mtk_iommu_plat_data mt8173_data = {
> >  	.has_4gb_mode = true,
> >  	.has_bclk     = true,
> >  	.reset_axi    = true,
> > -	.larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */
> > +	.larbid_remap[0] = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */
> > +	.inv_sel_reg = REG_MMU_INV_SEL,
> >  };
> >  
> >  static const struct mtk_iommu_plat_data mt8183_data = {
> >  	.m4u_plat     = M4U_MT8183,
> >  	.reset_axi    = true,
> > -	.larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1},
> > +	.larbid_remap[0] = {0, 4, 5, 6, 7, 2, 3, 1},
> > +	.inv_sel_reg = REG_MMU_INV_SEL,
> >  };
> >  
> >  static const struct of_device_id mtk_iommu_of_ids[] = {
> >  	{ .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
> > +	{ .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data},
> >  	{ .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
> >  	{ .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
> >  	{}
> > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> > index ea949a324e33..132dc765a40b 100644
> > --- a/drivers/iommu/mtk_iommu.h
> > +++ b/drivers/iommu/mtk_iommu.h
> > @@ -25,11 +25,13 @@ struct mtk_iommu_suspend_reg {
> >  	u32				int_main_control;
> >  	u32				ivrp_paddr;
> >  	u32				vld_pa_rng;
> > +	u32				wr_len;
> >  };
> >  
> >  enum mtk_iommu_plat {
> >  	M4U_MT2701,
> >  	M4U_MT2712,
> > +	M4U_MT6779,
> >  	M4U_MT8173,
> >  	M4U_MT8183,
> >  };
> > @@ -42,7 +44,12 @@ struct mtk_iommu_plat_data {
> >  	bool                has_bclk;
> >  	bool                has_vld_pa_rng;
> >  	bool                reset_axi;
> > -	unsigned char       larbid_remap[MTK_LARB_NR_MAX];
> > +	bool                has_sub_comm[2];
> > +	bool                has_wr_len;
> > +	bool                has_misc_ctrl[2];
> > +	u32                 inv_sel_reg;
> > +	u32                 m4u1_mask;
> 
> alphabetically for the new ones.
> 
> > +	unsigned char       larbid_remap[2][MTK_LARB_NR_MAX];
> >  };
> >  
> >  struct mtk_iommu_domain;
> > @@ -59,6 +66,7 @@ struct mtk_iommu_data {
> >  	bool                            enable_4GB;
> >  	spinlock_t			tlb_lock; /* lock for tlb range flush */
> >  
> > +	u32				m4u_id;
> >  	struct iommu_device		iommu;
> >  	const struct mtk_iommu_plat_data *plat_data;
> 
> Basically this patch looks ok for me. But please split it to several
> patches:
> 
> 1) Extend larbid_remap to larbid_remap[2].
>    Actually mt2712 also need this. this is the mt2712 definition.
>    larbid_remap[0] = {0, 1, 2, 3},
>    larbid_remap[1] = {4, 5, 7, 8, 9},
> 
> 2) Regarding the 0x48(misc_ctrl register)
> 
> 3) Add m4u1_mask to distinguish the m4u_id.
> 
> 4) Add REG_MMU_WR_LEN if you need.
> 
> 5) Put inv_sel_reg in the plat_data for preparing add 0x2c support in
> mt6779.
> 
> 6) Add new flow to get SUB_COMMON ID and VPU larbid in the translation
> fault.
> 
> 7) Add mt6779 support.
> 
> > 

ok, I will modify them for your above comments in next version, thanks

>  
> 
>
chao hao Dec. 31, 2019, 9:39 a.m. UTC | #4
On Mon, 2019-12-16 at 20:13 +0800, Yong Wu wrote:
> On Mon, 2019-11-04 at 19:52 +0800, Chao Hao wrote:
> > Start with this patch, we will change the SW architecture
> > to support multiple domains. SW architecture will has a big change,
> > so we need to modify a little bit by more than one patch.
> > The new SW overall architecture is as below:
> > 
> > 				iommu0   iommu1
> > 				  |	    |
> > 				  -----------
> > 					|
> > 				mtk_iommu_pgtable
> > 					|
> > 			------------------------------------------
> > 			|		     |			 |
> > 		mtk_iommu_domain1   mtk_iommu_domain2  mtk_iommu_domain3
> > 			|                    |                   |
> > 		iommu_group1         iommu_group2           iommu_group3
> > 			|                    |                   |
> > 		iommu_domain1       iommu_domain2	    iommu_domain3
> > 			|                    |                   |
> > 		iova region1(normal)  iova region2(CCU)    iova region3(VPU)
> > 
> > For current structure, no matter how many iommus there are,
> > they use the same page table to simplify the usage of module.
> > In order to make the software architecture more explicit, this
> > patch will create a global mtk_iommu_pgtable structure to describe
> > page table and all the iommus use it.
> 
> Thanks for the hard work of this file. Actually this patch and the later
> ones confuse me. Why do you make this flow change? 
> for making the code "more explicit" or for adding multi-domain support
> in 13/13.
> 
> IMHO, the change is unnecessary.
> a) For me, this change has no improvement. currently we use a global
> mtk_iommu_get_m4u_data to get the M4U data. I will be very glad if you
> could get rid of it. But in this patchset, You use a another global
> mtk_iommu_pgtable to instead. For me. It has no improvement.

Thanks for you advice!

For current SW arch, all the IOMMU HW use the same page table, we can
use a global mtk_iommu_pgtable to discribe the information of page table
and all the IOMMU attach it, I think that it is more clear and
unambiguous. For beginners, it maybe more easily explicable? 

> 
> b) This patchset break the original flow. device_group give you a
> software chance for initializing, then you move pagetable allocating
> code into it. But it isn't device_group job.
> 

As is shown above diagram, mtk_iommu_pgtable includes iommu_group and
iommu_domain,so we need to allocate mtk_iommu_pgtable and initialize it
in device_group firstly,and then execute the original flow, it only
changes place for creating mtk_iommu_pgtable and don't break original
device_group flow.




> I can not decide if your flow is right. But if you only want to add
> support multi-domain, I guess you could extend the current "m4u_group"
> to a array "m4u_group[N]". It may be more simple. To make mt6779
> progress easily, I suggest you can use this way to support multi-domain
> firstly. Then you could send this new mtk_iommu_pgtable patchset for the
> code "more explicit" if you insist.
> 
> > The diagram is as below:
> > 
> > 	mtk_iommu_data1(MM)       mtk_iommu_data2(APU)
> > 		|			   |
> > 		|			   |
> > 		------mtk_iommu_pgtable-----
> > 
> > We need to create global mtk_iommu_pgtable to include all the iova
> > regions firstly and special iova regions by divided based on it,
> > so the information of pgtable needs to be created in device_group.
> > 
> > Signed-off-by: Chao Hao <chao.hao@mediatek.com>
> > ---
> >  drivers/iommu/mtk_iommu.c | 84 +++++++++++++++++++++++++++++++++++++++
> >  drivers/iommu/mtk_iommu.h |  1 +
> >  2 files changed, 85 insertions(+)
> > 
> > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > index f2847e661137..fcbde6b0f58d 100644
> > --- a/drivers/iommu/mtk_iommu.c
> > +++ b/drivers/iommu/mtk_iommu.c
> > @@ -123,6 +123,12 @@ struct mtk_iommu_domain {
> >  	struct iommu_domain		domain;
> >  };
> >  
> > +struct mtk_iommu_pgtable {
> > +	struct io_pgtable_cfg	cfg;
> > +	struct io_pgtable_ops	*iop;
> > +};
> > +
> > +static struct mtk_iommu_pgtable *share_pgtable;
> >  static const struct iommu_ops mtk_iommu_ops;
> >  
> >  /*
> > @@ -170,6 +176,11 @@ static struct mtk_iommu_data *mtk_iommu_get_m4u_data(void)
> >  	return NULL;
> >  }
> >  
> > +static struct mtk_iommu_pgtable *mtk_iommu_get_pgtable(void)
> > +{
> > +	return share_pgtable;
> > +}
> > +
> >  static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
> >  {
> >  	return container_of(dom, struct mtk_iommu_domain, domain);
> > @@ -322,6 +333,13 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
> >  {
> >  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> >  
> > +	if (data->pgtable) {
> > +		dom->cfg = data->pgtable->cfg;
> > +		dom->iop = data->pgtable->iop;
> > +		dom->domain.pgsize_bitmap = data->pgtable->cfg.pgsize_bitmap;
> > +		return 0;
> > +	}
> > +
> >  	dom->cfg = (struct io_pgtable_cfg) {
> >  		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> >  			IO_PGTABLE_QUIRK_NO_PERMS |
> > @@ -345,6 +363,61 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
> >  	return 0;
> >  }
> >  
> > +static struct mtk_iommu_pgtable *create_pgtable(struct mtk_iommu_data *data)
> > +{
> > +	struct mtk_iommu_pgtable *pgtable;
> > +
> > +	pgtable = kzalloc(sizeof(*pgtable), GFP_KERNEL);
> > +	if (!pgtable)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	pgtable->cfg = (struct io_pgtable_cfg) {
> > +		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> > +			IO_PGTABLE_QUIRK_NO_PERMS |
> > +			IO_PGTABLE_QUIRK_TLBI_ON_MAP |
> > +			IO_PGTABLE_QUIRK_ARM_MTK_EXT,
> > +		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
> > +		.ias = 32,
> > +		.oas = 34,
> > +		.tlb = &mtk_iommu_flush_ops,
> > +		.iommu_dev = data->dev,
> > +	};
> > +
> > +	pgtable->iop = alloc_io_pgtable_ops(ARM_V7S, &pgtable->cfg, data);
> > +	if (!pgtable->iop) {
> > +		dev_err(data->dev, "Failed to alloc io pgtable\n");
> > +		return ERR_PTR(-EINVAL);
> > +	}
> > +
> > +	dev_info(data->dev, "%s create pgtable done\n", __func__);
> > +
> > +	return pgtable;
> > +}
> > +
> > +static int mtk_iommu_attach_pgtable(struct mtk_iommu_data *data,
> > +				    struct device *dev)
> > +{
> > +	struct mtk_iommu_pgtable *pgtable = mtk_iommu_get_pgtable();
> > +
> > +	/* create share pgtable */
> > +	if (!pgtable) {
> > +		pgtable = create_pgtable(data);
> > +		if (IS_ERR(pgtable)) {
> > +			dev_err(data->dev, "Failed to create pgtable\n");
> > +			return -ENOMEM;
> > +		}
> > +
> > +		share_pgtable = pgtable;
> > +	}
> > +
> > +	/* binding to pgtable */
> > +	data->pgtable = pgtable;
> > +
> > +	dev_info(data->dev, "m4u%d attach_pgtable done!\n", data->m4u_id);
> > +
> > +	return 0;
> > +}
> > +
> >  static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
> >  {
> >  	struct mtk_iommu_domain *dom;
> > @@ -508,10 +581,21 @@ static void mtk_iommu_remove_device(struct device *dev)
> >  static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> >  {
> >  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> > +	struct mtk_iommu_pgtable *pgtable;
> > +	int ret = 0;
> >  
> >  	if (!data)
> >  		return ERR_PTR(-ENODEV);
> >  
> > +	pgtable = data->pgtable;
> > +	if (!pgtable) {
> > +		ret = mtk_iommu_attach_pgtable(data, dev);
> > +		if (ret) {
> > +			dev_err(data->dev, "Failed to device_group\n");
> > +			return NULL;
> > +		}
> > +	}
> > +
> >  	/* All the client devices are in the same m4u iommu-group */
> >  	if (!data->m4u_group) {
> >  		data->m4u_group = iommu_group_alloc();
> > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> > index 132dc765a40b..dd5f19f78b62 100644
> > --- a/drivers/iommu/mtk_iommu.h
> > +++ b/drivers/iommu/mtk_iommu.h
> > @@ -61,6 +61,7 @@ struct mtk_iommu_data {
> >  	struct clk			*bclk;
> >  	phys_addr_t			protect_base; /* protect memory base */
> >  	struct mtk_iommu_suspend_reg	reg;
> > +	struct mtk_iommu_pgtable	*pgtable;
> >  	struct mtk_iommu_domain		*m4u_dom;
> >  	struct iommu_group		*m4u_group;
> >  	bool                            enable_4GB;
> 
>
Yong Wu Feb. 15, 2020, 12:17 p.m. UTC | #5
On Tue, 2019-12-31 at 17:39 +0800, chao hao wrote:
> On Mon, 2019-12-16 at 20:13 +0800, Yong Wu wrote:
> > On Mon, 2019-11-04 at 19:52 +0800, Chao Hao wrote:
> > > Start with this patch, we will change the SW architecture
> > > to support multiple domains. SW architecture will has a big change,
> > > so we need to modify a little bit by more than one patch.
> > > The new SW overall architecture is as below:
> > > 
> > > 				iommu0   iommu1
> > > 				  |	    |
> > > 				  -----------
> > > 					|
> > > 				mtk_iommu_pgtable
> > > 					|
> > > 			------------------------------------------
> > > 			|		     |			 |
> > > 		mtk_iommu_domain1   mtk_iommu_domain2  mtk_iommu_domain3
> > > 			|                    |                   |
> > > 		iommu_group1         iommu_group2           iommu_group3
> > > 			|                    |                   |
> > > 		iommu_domain1       iommu_domain2	    iommu_domain3
> > > 			|                    |                   |
> > > 		iova region1(normal)  iova region2(CCU)    iova region3(VPU)
> > > 
> > > For current structure, no matter how many iommus there are,
> > > they use the same page table to simplify the usage of module.
> > > In order to make the software architecture more explicit, this
> > > patch will create a global mtk_iommu_pgtable structure to describe
> > > page table and all the iommus use it.
> > 
> > Thanks for the hard work of this file. Actually this patch and the later
> > ones confuse me. Why do you make this flow change? 
> > for making the code "more explicit" or for adding multi-domain support
> > in 13/13.
> > 
> > IMHO, the change is unnecessary.
> > a) For me, this change has no improvement. currently we use a global
> > mtk_iommu_get_m4u_data to get the M4U data. I will be very glad if you
> > could get rid of it. But in this patchset, You use a another global
> > mtk_iommu_pgtable to instead. For me. It has no improvement.
> 
> Thanks for you advice!
> 
> For current SW arch, all the IOMMU HW use the same page table, we can
> use a global mtk_iommu_pgtable to discribe the information of page table

What's your plan if the 4GB iova range is not enough for us in future?
Do you plan to add a new global mtk_iommu_pgtable again?

> and all the IOMMU attach it, I think that it is more clear and
> unambiguous. For beginners, it maybe more easily explicable? 

I still don't get the necessity of this change. it is only for making
code clear from your point for view, right?

This code has been reviewed for many years, I don't know why you think
it is ambiguous. it is clear for me at lease. and I will complain that
you add a new global variable in this change.

> > 
> > b) This patchset break the original flow. device_group give you a
> > software chance for initializing, then you move pagetable allocating
> > code into it. But it isn't device_group job.
> > 
> 
> As is shown above diagram, mtk_iommu_pgtable includes iommu_group and
> iommu_domain,so we need to allocate mtk_iommu_pgtable and initialize it
> in device_group firstly,and then execute the original flow, it only
> changes place for creating mtk_iommu_pgtable and don't break original
> device_group flow.

I understand you have to do this change after you adjust the structure.
I mean that it may be not proper since allocating pagetable should not
be done in device_group logically. From here, Could we get this change
looks not good?.

> > I can not decide if your flow is right. But if you only want to add
> > support multi-domain, I guess you could extend the current "m4u_group"
> > to a array "m4u_group[N]". It may be more simple. To make mt6779
> > progress easily, I suggest you can use this way to support multi-domain
> > firstly. Then you could send this new mtk_iommu_pgtable patchset for the
> > code "more explicit" if you insist.

Could you help try this way if it could meet your requirement? Then
let's compare which one is better.


BTW, your patches(including v2) cause hangup as below since
"data->m4u_dom" was uninitialized.


Unable to handle kernel NULL pointer dereference at virtual address
0000000000000010
...
pc : mtk_iommu_tlb_flush_page_nosync+0x38/0xb8
lr : __arm_v7s_unmap+0x174/0x598
...
Call trace:
 mtk_iommu_tlb_flush_page_nosync+0x38/0xb8
 __arm_v7s_unmap+0x174/0x598
 arm_v7s_unmap+0x30/0x48
 mtk_iommu_unmap+0x20/0x28
 __iommu_unmap+0xa4/0xf8
 iommu_unmap+0x44/0x90

> > 
> > > The diagram is as below:
> > > 
> > > 	mtk_iommu_data1(MM)       mtk_iommu_data2(APU)
> > > 		|			   |
> > > 		|			   |
> > > 		------mtk_iommu_pgtable-----
> > > 
> > > We need to create global mtk_iommu_pgtable to include all the iova
> > > regions firstly and special iova regions by divided based on it,
> > > so the information of pgtable needs to be created in device_group.
> > > 
> > > Signed-off-by: Chao Hao <chao.hao@mediatek.com>
> > > ---
> > >  drivers/iommu/mtk_iommu.c | 84 +++++++++++++++++++++++++++++++++++++++
> > >  drivers/iommu/mtk_iommu.h |  1 +
> > >  2 files changed, 85 insertions(+)
> > > 
> > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > > index f2847e661137..fcbde6b0f58d 100644
> > > --- a/drivers/iommu/mtk_iommu.c
> > > +++ b/drivers/iommu/mtk_iommu.c
> > > @@ -123,6 +123,12 @@ struct mtk_iommu_domain {
> > >  	struct iommu_domain		domain;
> > >  };
> > >  
> > > +struct mtk_iommu_pgtable {
> > > +	struct io_pgtable_cfg	cfg;
> > > +	struct io_pgtable_ops	*iop;
> > > +};
> > > +
> > > +static struct mtk_iommu_pgtable *share_pgtable;
> > >  static const struct iommu_ops mtk_iommu_ops;
> > >  
> > >  /*
> > > @@ -170,6 +176,11 @@ static struct mtk_iommu_data *mtk_iommu_get_m4u_data(void)
> > >  	return NULL;
> > >  }
> > >  
> > > +static struct mtk_iommu_pgtable *mtk_iommu_get_pgtable(void)
> > > +{
> > > +	return share_pgtable;
> > > +}
> > > +
> > >  static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
> > >  {
> > >  	return container_of(dom, struct mtk_iommu_domain, domain);
> > > @@ -322,6 +333,13 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
> > >  {
> > >  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> > >  
> > > +	if (data->pgtable) {
> > > +		dom->cfg = data->pgtable->cfg;
> > > +		dom->iop = data->pgtable->iop;
> > > +		dom->domain.pgsize_bitmap = data->pgtable->cfg.pgsize_bitmap;
> > > +		return 0;
> > > +	}
> > > +
> > >  	dom->cfg = (struct io_pgtable_cfg) {
> > >  		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> > >  			IO_PGTABLE_QUIRK_NO_PERMS |
> > > @@ -345,6 +363,61 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
> > >  	return 0;
> > >  }
> > >  
> > > +static struct mtk_iommu_pgtable *create_pgtable(struct mtk_iommu_data *data)
> > > +{
> > > +	struct mtk_iommu_pgtable *pgtable;
> > > +
> > > +	pgtable = kzalloc(sizeof(*pgtable), GFP_KERNEL);
> > > +	if (!pgtable)
> > > +		return ERR_PTR(-ENOMEM);
> > > +
> > > +	pgtable->cfg = (struct io_pgtable_cfg) {
> > > +		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> > > +			IO_PGTABLE_QUIRK_NO_PERMS |
> > > +			IO_PGTABLE_QUIRK_TLBI_ON_MAP |
> > > +			IO_PGTABLE_QUIRK_ARM_MTK_EXT,
> > > +		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
> > > +		.ias = 32,
> > > +		.oas = 34,
> > > +		.tlb = &mtk_iommu_flush_ops,
> > > +		.iommu_dev = data->dev,
> > > +	};
> > > +
> > > +	pgtable->iop = alloc_io_pgtable_ops(ARM_V7S, &pgtable->cfg, data);
> > > +	if (!pgtable->iop) {
> > > +		dev_err(data->dev, "Failed to alloc io pgtable\n");
> > > +		return ERR_PTR(-EINVAL);
> > > +	}
> > > +
> > > +	dev_info(data->dev, "%s create pgtable done\n", __func__);
> > > +
> > > +	return pgtable;
> > > +}
> > > +
> > > +static int mtk_iommu_attach_pgtable(struct mtk_iommu_data *data,
> > > +				    struct device *dev)
> > > +{
> > > +	struct mtk_iommu_pgtable *pgtable = mtk_iommu_get_pgtable();
> > > +
> > > +	/* create share pgtable */
> > > +	if (!pgtable) {
> > > +		pgtable = create_pgtable(data);
> > > +		if (IS_ERR(pgtable)) {
> > > +			dev_err(data->dev, "Failed to create pgtable\n");
> > > +			return -ENOMEM;
> > > +		}
> > > +
> > > +		share_pgtable = pgtable;
> > > +	}
> > > +
> > > +	/* binding to pgtable */
> > > +	data->pgtable = pgtable;
> > > +
> > > +	dev_info(data->dev, "m4u%d attach_pgtable done!\n", data->m4u_id);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >  static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
> > >  {
> > >  	struct mtk_iommu_domain *dom;
> > > @@ -508,10 +581,21 @@ static void mtk_iommu_remove_device(struct device *dev)
> > >  static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> > >  {
> > >  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> > > +	struct mtk_iommu_pgtable *pgtable;
> > > +	int ret = 0;
> > >  
> > >  	if (!data)
> > >  		return ERR_PTR(-ENODEV);
> > >  
> > > +	pgtable = data->pgtable;
> > > +	if (!pgtable) {
> > > +		ret = mtk_iommu_attach_pgtable(data, dev);
> > > +		if (ret) {
> > > +			dev_err(data->dev, "Failed to device_group\n");
> > > +			return NULL;
> > > +		}
> > > +	}
> > > +
> > >  	/* All the client devices are in the same m4u iommu-group */
> > >  	if (!data->m4u_group) {
> > >  		data->m4u_group = iommu_group_alloc();
> > > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> > > index 132dc765a40b..dd5f19f78b62 100644
> > > --- a/drivers/iommu/mtk_iommu.h
> > > +++ b/drivers/iommu/mtk_iommu.h
> > > @@ -61,6 +61,7 @@ struct mtk_iommu_data {
> > >  	struct clk			*bclk;
> > >  	phys_addr_t			protect_base; /* protect memory base */
> > >  	struct mtk_iommu_suspend_reg	reg;
> > > +	struct mtk_iommu_pgtable	*pgtable;
> > >  	struct mtk_iommu_domain		*m4u_dom;
> > >  	struct iommu_group		*m4u_group;
> > >  	bool                            enable_4GB;
> > 
> > 
> 
>
chao hao Feb. 25, 2020, 7:25 a.m. UTC | #6
On Sat, 2020-02-15 at 20:17 +0800, Yong Wu wrote:
> On Tue, 2019-12-31 at 17:39 +0800, chao hao wrote:
> > On Mon, 2019-12-16 at 20:13 +0800, Yong Wu wrote:
> > > On Mon, 2019-11-04 at 19:52 +0800, Chao Hao wrote:
> > > > Start with this patch, we will change the SW architecture
> > > > to support multiple domains. SW architecture will has a big change,
> > > > so we need to modify a little bit by more than one patch.
> > > > The new SW overall architecture is as below:
> > > > 
> > > > 				iommu0   iommu1
> > > > 				  |	    |
> > > > 				  -----------
> > > > 					|
> > > > 				mtk_iommu_pgtable
> > > > 					|
> > > > 			------------------------------------------
> > > > 			|		     |			 |
> > > > 		mtk_iommu_domain1   mtk_iommu_domain2  mtk_iommu_domain3
> > > > 			|                    |                   |
> > > > 		iommu_group1         iommu_group2           iommu_group3
> > > > 			|                    |                   |
> > > > 		iommu_domain1       iommu_domain2	    iommu_domain3
> > > > 			|                    |                   |
> > > > 		iova region1(normal)  iova region2(CCU)    iova region3(VPU)
> > > > 
> > > > For current structure, no matter how many iommus there are,
> > > > they use the same page table to simplify the usage of module.
> > > > In order to make the software architecture more explicit, this
> > > > patch will create a global mtk_iommu_pgtable structure to describe
> > > > page table and all the iommus use it.
> > > 
> > > Thanks for the hard work of this file. Actually this patch and the later
> > > ones confuse me. Why do you make this flow change? 
> > > for making the code "more explicit" or for adding multi-domain support
> > > in 13/13.
> > > 
> > > IMHO, the change is unnecessary.
> > > a) For me, this change has no improvement. currently we use a global
> > > mtk_iommu_get_m4u_data to get the M4U data. I will be very glad if you
> > > could get rid of it. But in this patchset, You use a another global
> > > mtk_iommu_pgtable to instead. For me. It has no improvement.
> > 
> > Thanks for you advice!
> > 
> > For current SW arch, all the IOMMU HW use the same page table, we can
> > use a global mtk_iommu_pgtable to discribe the information of page table
> 
> What's your plan if the 4GB iova range is not enough for us in future?
> Do you plan to add a new global mtk_iommu_pgtable again?
> 
if the 4GB iova range is not enough, we only need to modify
mtk_domain_data structure: min_iova and max_iova, Compare with current
SW arch:
 dom->domain.geometry.aperture_start = 0;
 dom->domain.geometry.aperture_end = DMA_BIT_MASK(32);
 ==>
 dom->domain.geometry.aperture_start = pgtable->dom_region->min_iova;
 dom->domain.geometry.aperture_end = pgtable->dom_region->max_iova;
 
 struct mtk_domain_data {
          dma_addr_t      min_iova;
          dma_addr_t      max_iova;
  };

> > and all the IOMMU attach it, I think that it is more clear and
> > unambiguous. For beginners, it maybe more easily explicable? 
> 
> I still don't get the necessity of this change. it is only for making
> code clear from your point for view, right?
> 
> This code has been reviewed for many years, I don't know why you think
> it is ambiguous. it is clear for me at lease. and I will complain that
> you add a new global variable in this change.
> 
> > > 
> > > b) This patchset break the original flow. device_group give you a
> > > software chance for initializing, then you move pagetable allocating
> > > code into it. But it isn't device_group job.
> > > 
> > 
> > As is shown above diagram, mtk_iommu_pgtable includes iommu_group and
> > iommu_domain,so we need to allocate mtk_iommu_pgtable and initialize it
> > in device_group firstly,and then execute the original flow, it only
> > changes place for creating mtk_iommu_pgtable and don't break original
> > device_group flow.
> 
> I understand you have to do this change after you adjust the structure.
> I mean that it may be not proper since allocating pagetable should not
> be done in device_group logically. From here, Could we get this change
> looks not good?.
> 
 gentle ping ...

> > > I can not decide if your flow is right. But if you only want to add
> > > support multi-domain, I guess you could extend the current "m4u_group"
> > > to a array "m4u_group[N]". It may be more simple. To make mt6779
> > > progress easily, I suggest you can use this way to support multi-domain
> > > firstly. Then you could send this new mtk_iommu_pgtable patchset for the
> > > code "more explicit" if you insist.
> 
> Could you help try this way if it could meet your requirement? Then
> let's compare which one is better.
> 
> 
> BTW, your patches(including v2) cause hangup as below since
> "data->m4u_dom" was uninitialized.
> 
> 
> Unable to handle kernel NULL pointer dereference at virtual address
> 0000000000000010
> ...
> pc : mtk_iommu_tlb_flush_page_nosync+0x38/0xb8
> lr : __arm_v7s_unmap+0x174/0x598
> ...
> Call trace:
>  mtk_iommu_tlb_flush_page_nosync+0x38/0xb8
>  __arm_v7s_unmap+0x174/0x598
>  arm_v7s_unmap+0x30/0x48
>  mtk_iommu_unmap+0x20/0x28
>  __iommu_unmap+0xa4/0xf8
>  iommu_unmap+0x44/0x90
> 
yes, you are right. I will modify it in next version, thanks


> > > 
> > > > The diagram is as below:
> > > > 
> > > > 	mtk_iommu_data1(MM)       mtk_iommu_data2(APU)
> > > > 		|			   |
> > > > 		|			   |
> > > > 		------mtk_iommu_pgtable-----
> > > > 
> > > > We need to create global mtk_iommu_pgtable to include all the iova
> > > > regions firstly and special iova regions by divided based on it,
> > > > so the information of pgtable needs to be created in device_group.
> > > > 
> > > > Signed-off-by: Chao Hao <chao.hao@mediatek.com>
> > > > ---
> > > >  drivers/iommu/mtk_iommu.c | 84 +++++++++++++++++++++++++++++++++++++++
> > > >  drivers/iommu/mtk_iommu.h |  1 +
> > > >  2 files changed, 85 insertions(+)
> > > > 
> > > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > > > index f2847e661137..fcbde6b0f58d 100644
> > > > --- a/drivers/iommu/mtk_iommu.c
> > > > +++ b/drivers/iommu/mtk_iommu.c
> > > > @@ -123,6 +123,12 @@ struct mtk_iommu_domain {
> > > >  	struct iommu_domain		domain;
> > > >  };
> > > >  
> > > > +struct mtk_iommu_pgtable {
> > > > +	struct io_pgtable_cfg	cfg;
> > > > +	struct io_pgtable_ops	*iop;
> > > > +};
> > > > +
> > > > +static struct mtk_iommu_pgtable *share_pgtable;
> > > >  static const struct iommu_ops mtk_iommu_ops;
> > > >  
> > > >  /*
> > > > @@ -170,6 +176,11 @@ static struct mtk_iommu_data *mtk_iommu_get_m4u_data(void)
> > > >  	return NULL;
> > > >  }
> > > >  
> > > > +static struct mtk_iommu_pgtable *mtk_iommu_get_pgtable(void)
> > > > +{
> > > > +	return share_pgtable;
> > > > +}
> > > > +
> > > >  static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
> > > >  {
> > > >  	return container_of(dom, struct mtk_iommu_domain, domain);
> > > > @@ -322,6 +333,13 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
> > > >  {
> > > >  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> > > >  
> > > > +	if (data->pgtable) {
> > > > +		dom->cfg = data->pgtable->cfg;
> > > > +		dom->iop = data->pgtable->iop;
> > > > +		dom->domain.pgsize_bitmap = data->pgtable->cfg.pgsize_bitmap;
> > > > +		return 0;
> > > > +	}
> > > > +
> > > >  	dom->cfg = (struct io_pgtable_cfg) {
> > > >  		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> > > >  			IO_PGTABLE_QUIRK_NO_PERMS |
> > > > @@ -345,6 +363,61 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +static struct mtk_iommu_pgtable *create_pgtable(struct mtk_iommu_data *data)
> > > > +{
> > > > +	struct mtk_iommu_pgtable *pgtable;
> > > > +
> > > > +	pgtable = kzalloc(sizeof(*pgtable), GFP_KERNEL);
> > > > +	if (!pgtable)
> > > > +		return ERR_PTR(-ENOMEM);
> > > > +
> > > > +	pgtable->cfg = (struct io_pgtable_cfg) {
> > > > +		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> > > > +			IO_PGTABLE_QUIRK_NO_PERMS |
> > > > +			IO_PGTABLE_QUIRK_TLBI_ON_MAP |
> > > > +			IO_PGTABLE_QUIRK_ARM_MTK_EXT,
> > > > +		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
> > > > +		.ias = 32,
> > > > +		.oas = 34,
> > > > +		.tlb = &mtk_iommu_flush_ops,
> > > > +		.iommu_dev = data->dev,
> > > > +	};
> > > > +
> > > > +	pgtable->iop = alloc_io_pgtable_ops(ARM_V7S, &pgtable->cfg, data);
> > > > +	if (!pgtable->iop) {
> > > > +		dev_err(data->dev, "Failed to alloc io pgtable\n");
> > > > +		return ERR_PTR(-EINVAL);
> > > > +	}
> > > > +
> > > > +	dev_info(data->dev, "%s create pgtable done\n", __func__);
> > > > +
> > > > +	return pgtable;
> > > > +}
> > > > +
> > > > +static int mtk_iommu_attach_pgtable(struct mtk_iommu_data *data,
> > > > +				    struct device *dev)
> > > > +{
> > > > +	struct mtk_iommu_pgtable *pgtable = mtk_iommu_get_pgtable();
> > > > +
> > > > +	/* create share pgtable */
> > > > +	if (!pgtable) {
> > > > +		pgtable = create_pgtable(data);
> > > > +		if (IS_ERR(pgtable)) {
> > > > +			dev_err(data->dev, "Failed to create pgtable\n");
> > > > +			return -ENOMEM;
> > > > +		}
> > > > +
> > > > +		share_pgtable = pgtable;
> > > > +	}
> > > > +
> > > > +	/* binding to pgtable */
> > > > +	data->pgtable = pgtable;
> > > > +
> > > > +	dev_info(data->dev, "m4u%d attach_pgtable done!\n", data->m4u_id);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > >  static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
> > > >  {
> > > >  	struct mtk_iommu_domain *dom;
> > > > @@ -508,10 +581,21 @@ static void mtk_iommu_remove_device(struct device *dev)
> > > >  static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> > > >  {
> > > >  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> > > > +	struct mtk_iommu_pgtable *pgtable;
> > > > +	int ret = 0;
> > > >  
> > > >  	if (!data)
> > > >  		return ERR_PTR(-ENODEV);
> > > >  
> > > > +	pgtable = data->pgtable;
> > > > +	if (!pgtable) {
> > > > +		ret = mtk_iommu_attach_pgtable(data, dev);
> > > > +		if (ret) {
> > > > +			dev_err(data->dev, "Failed to device_group\n");
> > > > +			return NULL;
> > > > +		}
> > > > +	}
> > > > +
> > > >  	/* All the client devices are in the same m4u iommu-group */
> > > >  	if (!data->m4u_group) {
> > > >  		data->m4u_group = iommu_group_alloc();
> > > > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> > > > index 132dc765a40b..dd5f19f78b62 100644
> > > > --- a/drivers/iommu/mtk_iommu.h
> > > > +++ b/drivers/iommu/mtk_iommu.h
> > > > @@ -61,6 +61,7 @@ struct mtk_iommu_data {
> > > >  	struct clk			*bclk;
> > > >  	phys_addr_t			protect_base; /* protect memory base */
> > > >  	struct mtk_iommu_suspend_reg	reg;
> > > > +	struct mtk_iommu_pgtable	*pgtable;
> > > >  	struct mtk_iommu_domain		*m4u_dom;
> > > >  	struct iommu_group		*m4u_group;
> > > >  	bool                            enable_4GB;
> > > 
> > > 
> > 
> > 
> 
>
chao hao Feb. 26, 2020, 6:36 a.m. UTC | #7
On Sat, 2020-02-15 at 20:17 +0800, Yong Wu wrote:
> On Tue, 2019-12-31 at 17:39 +0800, chao hao wrote:
> > On Mon, 2019-12-16 at 20:13 +0800, Yong Wu wrote:
> > > On Mon, 2019-11-04 at 19:52 +0800, Chao Hao wrote:
> > > > Start with this patch, we will change the SW architecture
> > > > to support multiple domains. SW architecture will has a big change,
> > > > so we need to modify a little bit by more than one patch.
> > > > The new SW overall architecture is as below:
> > > > 
> > > > 				iommu0   iommu1
> > > > 				  |	    |
> > > > 				  -----------
> > > > 					|
> > > > 				mtk_iommu_pgtable
> > > > 					|
> > > > 			------------------------------------------
> > > > 			|		     |			 |
> > > > 		mtk_iommu_domain1   mtk_iommu_domain2  mtk_iommu_domain3
> > > > 			|                    |                   |
> > > > 		iommu_group1         iommu_group2           iommu_group3
> > > > 			|                    |                   |
> > > > 		iommu_domain1       iommu_domain2	    iommu_domain3
> > > > 			|                    |                   |
> > > > 		iova region1(normal)  iova region2(CCU)    iova region3(VPU)
> > > > 
> > > > For current structure, no matter how many iommus there are,
> > > > they use the same page table to simplify the usage of module.
> > > > In order to make the software architecture more explicit, this
> > > > patch will create a global mtk_iommu_pgtable structure to describe
> > > > page table and all the iommus use it.
> > > 
> > > Thanks for the hard work of this file. Actually this patch and the later
> > > ones confuse me. Why do you make this flow change? 
> > > for making the code "more explicit" or for adding multi-domain support
> > > in 13/13.
> > > 
> > > IMHO, the change is unnecessary.
> > > a) For me, this change has no improvement. currently we use a global
> > > mtk_iommu_get_m4u_data to get the M4U data. I will be very glad if you
> > > could get rid of it. But in this patchset, You use a another global
> > > mtk_iommu_pgtable to instead. For me. It has no improvement.
> > 
> > Thanks for you advice!
> > 
> > For current SW arch, all the IOMMU HW use the same page table, we can
> > use a global mtk_iommu_pgtable to discribe the information of page table
> 
> What's your plan if the 4GB iova range is not enough for us in future?
> Do you plan to add a new global mtk_iommu_pgtable again?
> 
> > and all the IOMMU attach it, I think that it is more clear and
> > unambiguous. For beginners, it maybe more easily explicable? 
> 
> I still don't get the necessity of this change. it is only for making
> code clear from your point for view, right?
> 
> This code has been reviewed for many years, I don't know why you think
> it is ambiguous. it is clear for me at lease. and I will complain that
> you add a new global variable in this change.
> 
> > > 
> > > b) This patchset break the original flow. device_group give you a
> > > software chance for initializing, then you move pagetable allocating
> > > code into it. But it isn't device_group job.
> > > 
> > 
> > As is shown above diagram, mtk_iommu_pgtable includes iommu_group and
> > iommu_domain,so we need to allocate mtk_iommu_pgtable and initialize it
> > in device_group firstly,and then execute the original flow, it only
> > changes place for creating mtk_iommu_pgtable and don't break original
> > device_group flow.
> 
> I understand you have to do this change after you adjust the structure.
> I mean that it may be not proper since allocating pagetable should not
> be done in device_group logically. From here, Could we get this change
> looks not good?.
> 
gentle ping ...

Dear Matthias and Joerg,
From mt6779 platform, mtk_iommu.c needs to support multiple domains for
different iova regions.About the change, there are some disagreements
among our internal. We hope to get your helps and advices:

Based on current SW architecture to support multiple domain, diagram is
as below:
                           iommu0   iommu1
                              |        |
                              ----------
                                   |
              ------------------------------------------
              |                    |                   |
         iommu_group1         iommu_group2        iommu_group3
              |                    |                   |
       mtk_iommu_domain1     mtk_iommu_domain2   mtk_iommu_domain3
              |                    |                   |
       iova region1(normal)  iova region2(CCU)   iova region3(VPU)
 
  PS: the information of page table is included struct mtk_iommu_domain

In my opinion, if all the iommus share the same page table(include all
iova regions) and different iova regions are created based on the page
table, we can put the information of page table to a global
structure(mtk_iommu_pgtable) and all the iommus attach it. It maybe make
the SW architecture look clearly.
New SW architecture diagram is as below(This patchset is based on it):

                           iommu0   iommu1
                              |        |
                              ----------
                                   |
                          mtk_iommu_pgtable
                                   |
              ------------------------------------------
              |                    |                   |
        mtk_iommu_domain1   mtk_iommu_domain2   mtk_iommu_domain3
              |                    |                   |
         iommu_group1         iommu_group2        iommu_group3
              |                    |                   |
         iommu_domain1       iommu_domain2        iommu_domain3
              |                    |                   |
       iova region1(normal)  iova region2(CCU)   iova region3(VPU)

From above new SW architecture, we will create a global page table
firstly(mtk_iommu_pgtable), and all the iommus can use it. The page
table can include 4GB iova space, different iova regions are created
based on it, so the information of pgtable needs to be created in
device_group.

I have two problems to consult you, can you provide some advices? Thanks
for Matthias and Joerg so much!
(1) I don't understand if the pgtable can be created in device_group ?
(2) In addition, can you help to review which SW architecture are more
reasonable or whether it is necessary to change current SW architecture
to support multiple domain ?


Dear Yong,
If I described ambiguously, please help to add extra explanation.
Thanks a lot.


> > > I can not decide if your flow is right. But if you only want to add
> > > support multi-domain, I guess you could extend the current "m4u_group"
> > > to a array "m4u_group[N]". It may be more simple. To make mt6779
> > > progress easily, I suggest you can use this way to support multi-domain
> > > firstly. Then you could send this new mtk_iommu_pgtable patchset for the
> > > code "more explicit" if you insist.
> 
> Could you help try this way if it could meet your requirement? Then
> let's compare which one is better.
> 
> 
> BTW, your patches(including v2) cause hangup as below since
> "data->m4u_dom" was uninitialized.
> 
> 
> Unable to handle kernel NULL pointer dereference at virtual address
> 0000000000000010
> ...
> pc : mtk_iommu_tlb_flush_page_nosync+0x38/0xb8
> lr : __arm_v7s_unmap+0x174/0x598
> ...
> Call trace:
>  mtk_iommu_tlb_flush_page_nosync+0x38/0xb8
>  __arm_v7s_unmap+0x174/0x598
>  arm_v7s_unmap+0x30/0x48
>  mtk_iommu_unmap+0x20/0x28
>  __iommu_unmap+0xa4/0xf8
>  iommu_unmap+0x44/0x90
> 
> > > 
> > > > The diagram is as below:
> > > > 
> > > > 	mtk_iommu_data1(MM)       mtk_iommu_data2(APU)
> > > > 		|			   |
> > > > 		|			   |
> > > > 		------mtk_iommu_pgtable-----
> > > > 
> > > > We need to create global mtk_iommu_pgtable to include all the iova
> > > > regions firstly and special iova regions by divided based on it,
> > > > so the information of pgtable needs to be created in device_group.
> > > > 
> > > > Signed-off-by: Chao Hao <chao.hao@mediatek.com>
> > > > ---
> > > >  drivers/iommu/mtk_iommu.c | 84 +++++++++++++++++++++++++++++++++++++++
> > > >  drivers/iommu/mtk_iommu.h |  1 +
> > > >  2 files changed, 85 insertions(+)
> > > > 
> > > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > > > index f2847e661137..fcbde6b0f58d 100644
> > > > --- a/drivers/iommu/mtk_iommu.c
> > > > +++ b/drivers/iommu/mtk_iommu.c
> > > > @@ -123,6 +123,12 @@ struct mtk_iommu_domain {
> > > >  	struct iommu_domain		domain;
> > > >  };
> > > >  
> > > > +struct mtk_iommu_pgtable {
> > > > +	struct io_pgtable_cfg	cfg;
> > > > +	struct io_pgtable_ops	*iop;
> > > > +};
> > > > +
> > > > +static struct mtk_iommu_pgtable *share_pgtable;
> > > >  static const struct iommu_ops mtk_iommu_ops;
> > > >  
> > > >  /*
> > > > @@ -170,6 +176,11 @@ static struct mtk_iommu_data *mtk_iommu_get_m4u_data(void)
> > > >  	return NULL;
> > > >  }
> > > >  
> > > > +static struct mtk_iommu_pgtable *mtk_iommu_get_pgtable(void)
> > > > +{
> > > > +	return share_pgtable;
> > > > +}
> > > > +
> > > >  static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
> > > >  {
> > > >  	return container_of(dom, struct mtk_iommu_domain, domain);
> > > > @@ -322,6 +333,13 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
> > > >  {
> > > >  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> > > >  
> > > > +	if (data->pgtable) {
> > > > +		dom->cfg = data->pgtable->cfg;
> > > > +		dom->iop = data->pgtable->iop;
> > > > +		dom->domain.pgsize_bitmap = data->pgtable->cfg.pgsize_bitmap;
> > > > +		return 0;
> > > > +	}
> > > > +
> > > >  	dom->cfg = (struct io_pgtable_cfg) {
> > > >  		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> > > >  			IO_PGTABLE_QUIRK_NO_PERMS |
> > > > @@ -345,6 +363,61 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +static struct mtk_iommu_pgtable *create_pgtable(struct mtk_iommu_data *data)
> > > > +{
> > > > +	struct mtk_iommu_pgtable *pgtable;
> > > > +
> > > > +	pgtable = kzalloc(sizeof(*pgtable), GFP_KERNEL);
> > > > +	if (!pgtable)
> > > > +		return ERR_PTR(-ENOMEM);
> > > > +
> > > > +	pgtable->cfg = (struct io_pgtable_cfg) {
> > > > +		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> > > > +			IO_PGTABLE_QUIRK_NO_PERMS |
> > > > +			IO_PGTABLE_QUIRK_TLBI_ON_MAP |
> > > > +			IO_PGTABLE_QUIRK_ARM_MTK_EXT,
> > > > +		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
> > > > +		.ias = 32,
> > > > +		.oas = 34,
> > > > +		.tlb = &mtk_iommu_flush_ops,
> > > > +		.iommu_dev = data->dev,
> > > > +	};
> > > > +
> > > > +	pgtable->iop = alloc_io_pgtable_ops(ARM_V7S, &pgtable->cfg, data);
> > > > +	if (!pgtable->iop) {
> > > > +		dev_err(data->dev, "Failed to alloc io pgtable\n");
> > > > +		return ERR_PTR(-EINVAL);
> > > > +	}
> > > > +
> > > > +	dev_info(data->dev, "%s create pgtable done\n", __func__);
> > > > +
> > > > +	return pgtable;
> > > > +}
> > > > +
> > > > +static int mtk_iommu_attach_pgtable(struct mtk_iommu_data *data,
> > > > +				    struct device *dev)
> > > > +{
> > > > +	struct mtk_iommu_pgtable *pgtable = mtk_iommu_get_pgtable();
> > > > +
> > > > +	/* create share pgtable */
> > > > +	if (!pgtable) {
> > > > +		pgtable = create_pgtable(data);
> > > > +		if (IS_ERR(pgtable)) {
> > > > +			dev_err(data->dev, "Failed to create pgtable\n");
> > > > +			return -ENOMEM;
> > > > +		}
> > > > +
> > > > +		share_pgtable = pgtable;
> > > > +	}
> > > > +
> > > > +	/* binding to pgtable */
> > > > +	data->pgtable = pgtable;
> > > > +
> > > > +	dev_info(data->dev, "m4u%d attach_pgtable done!\n", data->m4u_id);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > >  static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
> > > >  {
> > > >  	struct mtk_iommu_domain *dom;
> > > > @@ -508,10 +581,21 @@ static void mtk_iommu_remove_device(struct device *dev)
> > > >  static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> > > >  {
> > > >  	struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
> > > > +	struct mtk_iommu_pgtable *pgtable;
> > > > +	int ret = 0;
> > > >  
> > > >  	if (!data)
> > > >  		return ERR_PTR(-ENODEV);
> > > >  
> > > > +	pgtable = data->pgtable;
> > > > +	if (!pgtable) {
> > > > +		ret = mtk_iommu_attach_pgtable(data, dev);
> > > > +		if (ret) {
> > > > +			dev_err(data->dev, "Failed to device_group\n");
> > > > +			return NULL;
> > > > +		}
> > > > +	}
> > > > +
> > > >  	/* All the client devices are in the same m4u iommu-group */
> > > >  	if (!data->m4u_group) {
> > > >  		data->m4u_group = iommu_group_alloc();
> > > > diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
> > > > index 132dc765a40b..dd5f19f78b62 100644
> > > > --- a/drivers/iommu/mtk_iommu.h
> > > > +++ b/drivers/iommu/mtk_iommu.h
> > > > @@ -61,6 +61,7 @@ struct mtk_iommu_data {
> > > >  	struct clk			*bclk;
> > > >  	phys_addr_t			protect_base; /* protect memory base */
> > > >  	struct mtk_iommu_suspend_reg	reg;
> > > > +	struct mtk_iommu_pgtable	*pgtable;
> > > >  	struct mtk_iommu_domain		*m4u_dom;
> > > >  	struct iommu_group		*m4u_group;
> > > >  	bool                            enable_4GB;
> > > 
> > > 
> > 
> > 
> 
>