diff mbox series

[v6,6/6] iommu/tegra-smmu: Add pagetable mappings to debugfs

Message ID 20210914013858.31192-7-nicoleotsuka@gmail.com
State Not Applicable
Headers show
Series iommu/tegra-smmu: Add pagetable mappings to debugfs | expand

Commit Message

Nicolin Chen Sept. 14, 2021, 1:38 a.m. UTC
This patch dumps all active mapping entries from pagetable
to a debugfs directory named "mappings".

Attaching an example:

SWGROUP: hc
as->id: 0
as->attr: R|W|N
as->pd_dma: 0x0000000080c03000
{
        [index: 1023] 0xf0080c3e (count: 2)
        {
                PTE RANGE      | ATTR | PHYS               | IOVA               | SIZE
                [#1022, #1023] | 0x5  | 0x000000010bbf1000 | 0x00000000ffffe000 | 0x2000
        }
}
Total PDE count: 1
Total PTE count: 2

Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
---
 drivers/iommu/tegra-smmu.c | 145 +++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

Comments

Dmitry Osipenko Sept. 14, 2021, 1:29 p.m. UTC | #1
14.09.2021 04:38, Nicolin Chen пишет:
> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> +{
> +	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> +	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> +}

We know that IOVA is fixed to u32 for this controller. Can we avoid all
these dma_addr_t castings? It should make code cleaner a tad, IMO.
Nicolin Chen Sept. 14, 2021, 6:49 p.m. UTC | #2
On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
> 14.09.2021 04:38, Nicolin Chen пишет:
> > +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> > +{
> > +	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> > +	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> > +}
> 
> We know that IOVA is fixed to u32 for this controller. Can we avoid all
> these dma_addr_t castings? It should make code cleaner a tad, IMO.

Tegra210 actually supports 34-bit IOVA...
Dmitry Osipenko Sept. 14, 2021, 7:20 p.m. UTC | #3
14.09.2021 21:49, Nicolin Chen пишет:
> On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
>> 14.09.2021 04:38, Nicolin Chen пишет:
>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
>>> +{
>>> +	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
>>> +	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
>>> +}
>>
>> We know that IOVA is fixed to u32 for this controller. Can we avoid all
>> these dma_addr_t castings? It should make code cleaner a tad, IMO.
> 
> Tegra210 actually supports 34-bit IOVA...
> 

It doesn't. 34-bit is PA, 32-bit is VA.

Quote from T210 TRM:

"The SMMU is a centralized virtual-to-physical translation for MSS. It
maps a 32-bit virtual address to a 34-bit physical address. If the
client address is 40 bits then bits 39:32 are ignored."

Even if it supported more than 32bit, then the returned ulong is 32bit,
which doesn't make sense.
Nicolin Chen Sept. 15, 2021, 4:38 a.m. UTC | #4
On Tue, Sep 14, 2021 at 10:20:30PM +0300, Dmitry Osipenko wrote:
> 14.09.2021 21:49, Nicolin Chen пишет:
> > On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
> >> 14.09.2021 04:38, Nicolin Chen пишет:
> >>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> >>> +{
> >>> +	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> >>> +	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> >>> +}
> >>
> >> We know that IOVA is fixed to u32 for this controller. Can we avoid all
> >> these dma_addr_t castings? It should make code cleaner a tad, IMO.
> > 
> > Tegra210 actually supports 34-bit IOVA...
> > 
> 
> It doesn't. 34-bit is PA, 32-bit is VA.
> 
> Quote from T210 TRM:
> 
> "The SMMU is a centralized virtual-to-physical translation for MSS. It
> maps a 32-bit virtual address to a 34-bit physical address. If the
> client address is 40 bits then bits 39:32 are ignored."

If you scroll down by a couple of sections, you can see 34-bit
virtual addresses in section 18.6.1.2; and if checking one ASID
register, you can see it mention the extra two bits va[33:32].

However, the driver currently sets its geometry.aperture_end to
32-bit, and we can only get 32-bit IOVAs using PDE and PTE only,
so I think it should be safe to remove the castings here. I'll
wait for a couple of days and see if there'd be other comments
for me to address in next version.

> Even if it supported more than 32bit, then the returned ulong is 32bit,
> which doesn't make sense.

On ARM64 (Tegra210), isn't ulong 64-bit?
Dmitry Osipenko Sept. 15, 2021, 12:09 p.m. UTC | #5
15.09.2021 07:38, Nicolin Chen пишет:
> On Tue, Sep 14, 2021 at 10:20:30PM +0300, Dmitry Osipenko wrote:
>> 14.09.2021 21:49, Nicolin Chen пишет:
>>> On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
>>>> 14.09.2021 04:38, Nicolin Chen пишет:
>>>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
>>>>> +{
>>>>> +	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
>>>>> +	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
>>>>> +}
>>>>
>>>> We know that IOVA is fixed to u32 for this controller. Can we avoid all
>>>> these dma_addr_t castings? It should make code cleaner a tad, IMO.
>>>
>>> Tegra210 actually supports 34-bit IOVA...
>>>
>>
>> It doesn't. 34-bit is PA, 32-bit is VA.
>>
>> Quote from T210 TRM:
>>
>> "The SMMU is a centralized virtual-to-physical translation for MSS. It
>> maps a 32-bit virtual address to a 34-bit physical address. If the
>> client address is 40 bits then bits 39:32 are ignored."
> 
> If you scroll down by a couple of sections, you can see 34-bit
> virtual addresses in section 18.6.1.2; and if checking one ASID
> register, you can see it mention the extra two bits va[33:32].

Thanks for the pointer. It says that only certain memory clients allow
to combine 4 ASIDs to form 34bit VA space. In this case the PA space is
split into 4GB areas and there are additional bitfields which configure
the ASID mapping of each 4GB area. Still each ASID is 32bit.

This is what TRM says:

"For the GPU and other clients with 34-bit address interfaces, the ASID
registers are extended to point to four ASIDs. The SMMU supports 4GB of
virtual address space per ASID, so mapping addr[33:32] into ASID[1:0]
extends the virtual address space of a client to 16GB."

> However, the driver currently sets its geometry.aperture_end to
> 32-bit, and we can only get 32-bit IOVAs using PDE and PTE only,
> so I think it should be safe to remove the castings here. I'll
> wait for a couple of days and see if there'd be other comments
> for me to address in next version.

You will need to read the special "ASID Assignment Register" which
supports 4 sub-ASIDs to translate the PA address into the actual VA. By
default all clients are limited to a single ASID and upstream kernel
doesn't support programming of 34bit VAs. So doesn't worth the effort to
fully translate the VA, IMO.

>> Even if it supported more than 32bit, then the returned ulong is 32bit,
>> which doesn't make sense.
> 
> On ARM64 (Tegra210), isn't ulong 64-bit?

Yes, indeed.
Dmitry Osipenko Sept. 15, 2021, 12:18 p.m. UTC | #6
15.09.2021 15:09, Dmitry Osipenko пишет:
> 15.09.2021 07:38, Nicolin Chen пишет:
>> On Tue, Sep 14, 2021 at 10:20:30PM +0300, Dmitry Osipenko wrote:
>>> 14.09.2021 21:49, Nicolin Chen пишет:
>>>> On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
>>>>> 14.09.2021 04:38, Nicolin Chen пишет:
>>>>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
>>>>>> +{
>>>>>> +	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
>>>>>> +	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
>>>>>> +}
>>>>>
>>>>> We know that IOVA is fixed to u32 for this controller. Can we avoid all
>>>>> these dma_addr_t castings? It should make code cleaner a tad, IMO.
>>>>
>>>> Tegra210 actually supports 34-bit IOVA...
>>>>
>>>
>>> It doesn't. 34-bit is PA, 32-bit is VA.
>>>
>>> Quote from T210 TRM:
>>>
>>> "The SMMU is a centralized virtual-to-physical translation for MSS. It
>>> maps a 32-bit virtual address to a 34-bit physical address. If the
>>> client address is 40 bits then bits 39:32 are ignored."
>>
>> If you scroll down by a couple of sections, you can see 34-bit
>> virtual addresses in section 18.6.1.2; and if checking one ASID
>> register, you can see it mention the extra two bits va[33:32].
> 
> Thanks for the pointer. It says that only certain memory clients allow
> to combine 4 ASIDs to form 34bit VA space. In this case the PA space is
> split into 4GB areas and there are additional bitfields which configure
> the ASID mapping of each 4GB area. Still each ASID is 32bit.
> 
> This is what TRM says:
> 
> "For the GPU and other clients with 34-bit address interfaces, the ASID
> registers are extended to point to four ASIDs. The SMMU supports 4GB of
> virtual address space per ASID, so mapping addr[33:32] into ASID[1:0]
> extends the virtual address space of a client to 16GB."
> 
>> However, the driver currently sets its geometry.aperture_end to
>> 32-bit, and we can only get 32-bit IOVAs using PDE and PTE only,
>> so I think it should be safe to remove the castings here. I'll
>> wait for a couple of days and see if there'd be other comments
>> for me to address in next version.
> 
> You will need to read the special "ASID Assignment Register" which
> supports 4 sub-ASIDs to translate the PA address into the actual VA. By

* VA to PA

> default all clients are limited to a single ASID and upstream kernel
> doesn't support programming of 34bit VAs. So doesn't worth the effort to
> fully translate the VA, IMO.
> 
>>> Even if it supported more than 32bit, then the returned ulong is 32bit,
>>> which doesn't make sense.
>>
>> On ARM64 (Tegra210), isn't ulong 64-bit?
> 
> Yes, indeed.
>
Nicolin Chen Sept. 15, 2021, 10:19 p.m. UTC | #7
On Wed, Sep 15, 2021 at 03:09:48PM +0300, Dmitry Osipenko wrote:
> 15.09.2021 07:38, Nicolin Chen пишет:
> > On Tue, Sep 14, 2021 at 10:20:30PM +0300, Dmitry Osipenko wrote:
> >> 14.09.2021 21:49, Nicolin Chen пишет:
> >>> On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
> >>>> 14.09.2021 04:38, Nicolin Chen пишет:
> >>>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> >>>>> +{
> >>>>> +	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> >>>>> +	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> >>>>> +}
> >>>>
> >>>> We know that IOVA is fixed to u32 for this controller. Can we avoid all
> >>>> these dma_addr_t castings? It should make code cleaner a tad, IMO.
> >>>
> >>> Tegra210 actually supports 34-bit IOVA...
> >>>
> >>
> >> It doesn't. 34-bit is PA, 32-bit is VA.
> >>
> >> Quote from T210 TRM:
> >>
> >> "The SMMU is a centralized virtual-to-physical translation for MSS. It
> >> maps a 32-bit virtual address to a 34-bit physical address. If the
> >> client address is 40 bits then bits 39:32 are ignored."
> > 
> > If you scroll down by a couple of sections, you can see 34-bit
> > virtual addresses in section 18.6.1.2; and if checking one ASID
> > register, you can see it mention the extra two bits va[33:32].
> 
> Thanks for the pointer. It says that only certain memory clients allow
> to combine 4 ASIDs to form 34bit VA space. In this case the PA space is
> split into 4GB areas and there are additional bitfields which configure
> the ASID mapping of each 4GB area. Still each ASID is 32bit.

True.

> This is what TRM says:
> 
> "For the GPU and other clients with 34-bit address interfaces, the ASID
> registers are extended to point to four ASIDs. The SMMU supports 4GB of
> virtual address space per ASID, so mapping addr[33:32] into ASID[1:0]
> extends the virtual address space of a client to 16GB."
> 
> > However, the driver currently sets its geometry.aperture_end to
> > 32-bit, and we can only get 32-bit IOVAs using PDE and PTE only,
> > so I think it should be safe to remove the castings here. I'll
> > wait for a couple of days and see if there'd be other comments
> > for me to address in next version.
> 
> You will need to read the special "ASID Assignment Register" which
> supports 4 sub-ASIDs to translate the PA address into the actual VA. By
> default all clients are limited to a single ASID and upstream kernel
> doesn't support programming of 34bit VAs. So doesn't worth the effort to
> fully translate the VA, IMO.

Yea. It'd be easier to just stay in 32-bit. I will remove those
castings in the next version, waiting for Thierry taking a look
at this v6 first.
Thierry Reding Oct. 7, 2021, 5:13 p.m. UTC | #8
On Mon, Sep 13, 2021 at 06:38:58PM -0700, Nicolin Chen wrote:
> This patch dumps all active mapping entries from pagetable
> to a debugfs directory named "mappings".
> 
> Attaching an example:
> 
> SWGROUP: hc
> as->id: 0
> as->attr: R|W|N
> as->pd_dma: 0x0000000080c03000
> {
>         [index: 1023] 0xf0080c3e (count: 2)
>         {
>                 PTE RANGE      | ATTR | PHYS               | IOVA               | SIZE
>                 [#1022, #1023] | 0x5  | 0x000000010bbf1000 | 0x00000000ffffe000 | 0x2000
>         }
> }
> Total PDE count: 1
> Total PTE count: 2
> 
> Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
> ---
>  drivers/iommu/tegra-smmu.c | 145 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 145 insertions(+)
> 
> diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
> index 68c34a4a0ecc..aac977e181f6 100644
> --- a/drivers/iommu/tegra-smmu.c
> +++ b/drivers/iommu/tegra-smmu.c
> @@ -46,6 +46,7 @@ struct tegra_smmu {
>  	struct list_head list;
>  
>  	struct dentry *debugfs;
> +	struct dentry *debugfs_mappings;
>  
>  	struct iommu_device iommu;	/* IOMMU Core code handle */
>  };
> @@ -153,6 +154,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
>  
>  #define SMMU_PDE_ATTR		(SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
>  				 SMMU_PDE_NONSECURE)
> +#define SMMU_PTE_ATTR		(SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
> +				 SMMU_PTE_NONSECURE)
> +#define SMMU_PTE_ATTR_SHIFT	29
>  
>  static unsigned int iova_pd_index(unsigned long iova)
>  {
> @@ -164,6 +168,12 @@ static unsigned int iova_pt_index(unsigned long iova)
>  	return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
>  }
>  
> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> +{
> +	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> +	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> +}
> +
>  static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr)
>  {
>  	addr >>= 12;
> @@ -496,6 +506,8 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
>  	mutex_unlock(&smmu->lock);
>  }
>  
> +static const struct file_operations tegra_smmu_debugfs_mappings_fops;

Could the implementation be moved up here to avoid the forward
declaration?

> +
>  static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
>  				 struct tegra_smmu_as *as,
>  				 unsigned int swgroup)
> @@ -517,6 +529,12 @@ static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
>  			dev_warn(smmu->dev,
>  				 "overwriting group->as for swgroup: %s\n", swgrp->name);
>  		group->as = as;
> +
> +		if (smmu->debugfs_mappings)
> +			debugfs_create_file(group->swgrp->name, 0444,
> +					    smmu->debugfs_mappings, group,
> +					    &tegra_smmu_debugfs_mappings_fops);
> +
>  		break;
>  	}
>  
> @@ -541,6 +559,12 @@ static void tegra_smmu_detach_as(struct tegra_smmu *smmu,
>  		if (group->swgrp != swgrp)
>  			continue;
>  		group->as = NULL;
> +
> +		if (smmu->debugfs_mappings) {
> +			d = debugfs_lookup(group->swgrp->name, smmu->debugfs_mappings);
> +			debugfs_remove(d);
> +		}
> +
>  		break;
>  	}
>  
> @@ -1124,6 +1148,125 @@ static int tegra_smmu_clients_show(struct seq_file *s, void *data)
>  
>  DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
>  
> +static int tegra_smmu_debugfs_mappings_show(struct seq_file *s, void *data)
> +{
> +	struct tegra_smmu_group *group = s->private;
> +	const struct tegra_smmu_swgroup *swgrp;
> +	struct tegra_smmu_as *as;
> +	struct tegra_smmu *smmu;
> +	unsigned int pd_index;
> +	unsigned int pt_index;
> +	unsigned long flags;
> +	u64 pte_count = 0;
> +	u32 pde_count = 0;
> +	u32 *pd, val;
> +
> +	if (!group || !group->as || !group->swgrp)
> +		return 0;
> +
> +	swgrp = group->swgrp;
> +	smmu = group->smmu;
> +	as = group->as;
> +
> +	mutex_lock(&smmu->lock);
> +
> +	val = smmu_readl(smmu, swgrp->reg) & SMMU_ASID_ENABLE;
> +	if (!val)
> +		goto unlock;
> +
> +	pd = page_address(as->pd);
> +	if (!pd)
> +		goto unlock;
> +
> +	seq_printf(s, "\nSWGROUP: %s\n", swgrp->name);
> +	seq_printf(s, "as->id: %d\nas->attr: %c|%c|%s\nas->pd_dma: %pad\n", as->id,
> +		   as->attr & SMMU_PD_READABLE ? 'R' : '-',
> +		   as->attr & SMMU_PD_WRITABLE ? 'W' : '-',
> +		   as->attr & SMMU_PD_NONSECURE ? "NS" : "S",
> +		   &as->pd_dma);
> +	seq_puts(s, "{\n");

Maybe this can be more compact by putting the name, ID, attributes and
base address onto a single line? Maybe also use "'-' : 'S'" for the
non-secure attribute to keep in line with what you've done for readable
and writable attributes.

Then again, this is going to be very verbose output anyway, so maybe it
isn't worth it.

Thierry
Nicolin Chen Oct. 7, 2021, 8:41 p.m. UTC | #9
On Thu, Oct 07, 2021 at 07:13:25PM +0200, Thierry Reding wrote:
> > @@ -496,6 +506,8 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
> >  	mutex_unlock(&smmu->lock);
> >  }
> >  
> > +static const struct file_operations tegra_smmu_debugfs_mappings_fops;
> 
> Could the implementation be moved up here to avoid the forward
> declaration?

I thought that keeping all debugfs fops together would be preferable.
But yes, I will move it if you prefer no-additional forward declare.

> > +	seq_printf(s, "\nSWGROUP: %s\n", swgrp->name);
> > +	seq_printf(s, "as->id: %d\nas->attr: %c|%c|%s\nas->pd_dma: %pad\n", as->id,
> > +		   as->attr & SMMU_PD_READABLE ? 'R' : '-',
> > +		   as->attr & SMMU_PD_WRITABLE ? 'W' : '-',
> > +		   as->attr & SMMU_PD_NONSECURE ? "NS" : "S",
> > +		   &as->pd_dma);
> > +	seq_puts(s, "{\n");
> 
> Maybe this can be more compact by putting the name, ID, attributes and
> base address onto a single line? Maybe also use "'-' : 'S'" for the
> non-secure attribute to keep in line with what you've done for readable
> and writable attributes.

Okay. Will change that.

> Then again, this is going to be very verbose output anyway, so maybe it
> isn't worth it.

Are you saying the whole debugfs thing or just attributes? Yet, for
either case, I don't think so, as mappings info would help for sure
from our past experience while the attributes are just one line...
diff mbox series

Patch

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 68c34a4a0ecc..aac977e181f6 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -46,6 +46,7 @@  struct tegra_smmu {
 	struct list_head list;
 
 	struct dentry *debugfs;
+	struct dentry *debugfs_mappings;
 
 	struct iommu_device iommu;	/* IOMMU Core code handle */
 };
@@ -153,6 +154,9 @@  static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
 
 #define SMMU_PDE_ATTR		(SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
 				 SMMU_PDE_NONSECURE)
+#define SMMU_PTE_ATTR		(SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
+				 SMMU_PTE_NONSECURE)
+#define SMMU_PTE_ATTR_SHIFT	29
 
 static unsigned int iova_pd_index(unsigned long iova)
 {
@@ -164,6 +168,12 @@  static unsigned int iova_pt_index(unsigned long iova)
 	return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
 }
 
+static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
+{
+	return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
+	       ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
+}
+
 static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr)
 {
 	addr >>= 12;
@@ -496,6 +506,8 @@  static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
 	mutex_unlock(&smmu->lock);
 }
 
+static const struct file_operations tegra_smmu_debugfs_mappings_fops;
+
 static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
 				 struct tegra_smmu_as *as,
 				 unsigned int swgroup)
@@ -517,6 +529,12 @@  static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
 			dev_warn(smmu->dev,
 				 "overwriting group->as for swgroup: %s\n", swgrp->name);
 		group->as = as;
+
+		if (smmu->debugfs_mappings)
+			debugfs_create_file(group->swgrp->name, 0444,
+					    smmu->debugfs_mappings, group,
+					    &tegra_smmu_debugfs_mappings_fops);
+
 		break;
 	}
 
@@ -541,6 +559,12 @@  static void tegra_smmu_detach_as(struct tegra_smmu *smmu,
 		if (group->swgrp != swgrp)
 			continue;
 		group->as = NULL;
+
+		if (smmu->debugfs_mappings) {
+			d = debugfs_lookup(group->swgrp->name, smmu->debugfs_mappings);
+			debugfs_remove(d);
+		}
+
 		break;
 	}
 
@@ -1124,6 +1148,125 @@  static int tegra_smmu_clients_show(struct seq_file *s, void *data)
 
 DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
 
+static int tegra_smmu_debugfs_mappings_show(struct seq_file *s, void *data)
+{
+	struct tegra_smmu_group *group = s->private;
+	const struct tegra_smmu_swgroup *swgrp;
+	struct tegra_smmu_as *as;
+	struct tegra_smmu *smmu;
+	unsigned int pd_index;
+	unsigned int pt_index;
+	unsigned long flags;
+	u64 pte_count = 0;
+	u32 pde_count = 0;
+	u32 *pd, val;
+
+	if (!group || !group->as || !group->swgrp)
+		return 0;
+
+	swgrp = group->swgrp;
+	smmu = group->smmu;
+	as = group->as;
+
+	mutex_lock(&smmu->lock);
+
+	val = smmu_readl(smmu, swgrp->reg) & SMMU_ASID_ENABLE;
+	if (!val)
+		goto unlock;
+
+	pd = page_address(as->pd);
+	if (!pd)
+		goto unlock;
+
+	seq_printf(s, "\nSWGROUP: %s\n", swgrp->name);
+	seq_printf(s, "as->id: %d\nas->attr: %c|%c|%s\nas->pd_dma: %pad\n", as->id,
+		   as->attr & SMMU_PD_READABLE ? 'R' : '-',
+		   as->attr & SMMU_PD_WRITABLE ? 'W' : '-',
+		   as->attr & SMMU_PD_NONSECURE ? "NS" : "S",
+		   &as->pd_dma);
+	seq_puts(s, "{\n");
+
+	spin_lock_irqsave(&as->lock, flags);
+
+	for (pd_index = 0; pd_index < SMMU_NUM_PDE; pd_index++) {
+		struct page *pt_page;
+		unsigned int i;
+		u32 *addr;
+
+		/* An empty PDE should not have a pte use count */
+		WARN_ON_ONCE(!pd[pd_index] ^ !as->count[pd_index]);
+
+		/* Skip this empty PDE */
+		if (!pd[pd_index])
+			continue;
+
+		pde_count++;
+		pte_count += as->count[pd_index];
+		seq_printf(s, "\t[index: %u] 0x%x (count: %d)\n",
+			   pd_index, pd[pd_index], as->count[pd_index]);
+		pt_page = as->pts[pd_index];
+		addr = page_address(pt_page);
+
+		seq_puts(s, "\t{\n");
+		seq_printf(s, "\t\t%-14s | %-4s | %-10s%s | %-10s%s | %-11s\n",
+			   "PTE RANGE", "ATTR",
+			   "PHYS", sizeof(phys_addr_t) > 4 ? "        " : "",
+			   "IOVA", sizeof(dma_addr_t)  > 4 ? "        " : "",
+			   "SIZE");
+		for (pt_index = 0; pt_index < SMMU_NUM_PTE; pt_index += i) {
+			size_t size = SMMU_SIZE_PT;
+			dma_addr_t iova;
+			phys_addr_t pa;
+
+			i = 1;
+
+			if (!addr[pt_index])
+				continue;
+
+			iova = pd_pt_index_iova(pd_index, pt_index);
+			pa = SMMU_PFN_PHYS(addr[pt_index] & ~SMMU_PTE_ATTR);
+
+			/* Check contiguous mappings and increase size */
+			while (pt_index + i < SMMU_NUM_PTE) {
+				dma_addr_t next_iova;
+				phys_addr_t next_pa;
+
+				if (!addr[pt_index + i])
+					break;
+
+				next_iova = pd_pt_index_iova(pd_index, pt_index + i);
+				next_pa = SMMU_PFN_PHYS(addr[pt_index + i] & ~SMMU_PTE_ATTR);
+
+				/* Break at the end of a linear mapping */
+				if ((next_iova - iova != SMMU_SIZE_PT * i) ||
+				    (next_pa - pa != SMMU_SIZE_PT * i))
+					break;
+
+				i++;
+			}
+
+			seq_printf(s, "\t\t[#%-4u, #%-4u] | 0x%-2x | %pa | %pad | 0x%-9zx\n",
+				   pt_index, pt_index + i - 1,
+				   addr[pt_index] >> SMMU_PTE_ATTR_SHIFT,
+				   &pa, &iova, size * i);
+		}
+		seq_puts(s, "\t}\n");
+	}
+
+	spin_unlock_irqrestore(&as->lock, flags);
+
+	seq_puts(s, "}\n");
+	seq_printf(s, "Total PDE count: %u\n", pde_count);
+	seq_printf(s, "Total PTE count: %llu\n", pte_count);
+
+unlock:
+	mutex_unlock(&smmu->lock);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(tegra_smmu_debugfs_mappings);
+
 static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
 {
 	smmu->debugfs = debugfs_create_dir("smmu", NULL);
@@ -1134,6 +1277,8 @@  static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
 			    &tegra_smmu_swgroups_fops);
 	debugfs_create_file("clients", S_IRUGO, smmu->debugfs, smmu,
 			    &tegra_smmu_clients_fops);
+
+	smmu->debugfs_mappings = debugfs_create_dir("mappings", smmu->debugfs);
 }
 
 static void tegra_smmu_debugfs_exit(struct tegra_smmu *smmu)