Patchwork [RFC,V3,06/17] ppc/pnv: allocate pe->iommu_table dynamically

login
register
mail settings
Submitter Wei Yang
Date June 10, 2014, 1:56 a.m.
Message ID <1402365399-5121-7-git-send-email-weiyang@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/357690/
State Superseded
Headers show

Comments

Wei Yang - June 10, 2014, 1:56 a.m.
Current iommu_table of a PE is a static field. This will have a problem when
iommu_free_table is called.

This patch allocate iommu_table dynamically.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/iommu.h          |    3 +++
 arch/powerpc/platforms/powernv/pci-ioda.c |   24 +++++++++++++-----------
 arch/powerpc/platforms/powernv/pci.h      |    2 +-
 3 files changed, 17 insertions(+), 12 deletions(-)
Alexey Kardashevskiy - June 24, 2014, 10:06 a.m.
On 06/10/2014 11:56 AM, Wei Yang wrote:
> Current iommu_table of a PE is a static field. This will have a problem when
> iommu_free_table is called.

What kind of problem? This table is per PE and PE is not going anywhere.


> 
> This patch allocate iommu_table dynamically.
> 
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/iommu.h          |    3 +++
>  arch/powerpc/platforms/powernv/pci-ioda.c |   24 +++++++++++++-----------
>  arch/powerpc/platforms/powernv/pci.h      |    2 +-
>  3 files changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
> index 42632c7..0fedacb 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -78,6 +78,9 @@ struct iommu_table {
>  	struct iommu_group *it_group;
>  #endif
>  	void (*set_bypass)(struct iommu_table *tbl, bool enable);
> +#ifdef CONFIG_PPC_POWERNV
> +	void           *data;
> +#endif
>  };
>  
>  /* Pure 2^n version of get_order */
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 9715351..8ca3926 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -608,6 +608,10 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
>  		return;
>  	}
>  
> +	pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
> +			GFP_KERNEL, hose->node);
> +	pe->tce32_table->data = pe;
> +
>  	/* Associate it with all child devices */
>  	pnv_ioda_setup_same_PE(bus, pe);
>  
> @@ -675,7 +679,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
>  
>  	pe = &phb->ioda.pe_array[pdn->pe_number];
>  	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
> -	set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
> +	set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
>  }
>  
>  static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
> @@ -702,7 +706,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
>  	} else {
>  		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
>  		set_dma_ops(&pdev->dev, &dma_iommu_ops);
> -		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
> +		set_iommu_table_base(&pdev->dev, pe->tce32_table);
>  	}
>  	return 0;
>  }
> @@ -712,7 +716,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
>  	struct pci_dev *dev;
>  
>  	list_for_each_entry(dev, &bus->devices, bus_list) {
> -		set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
> +		set_iommu_table_base_and_group(&dev->dev, pe->tce32_table);
>  		if (dev->subordinate)
>  			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
>  	}
> @@ -798,8 +802,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
>  void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
>  				 __be64 *startp, __be64 *endp, bool rm)
>  {
> -	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
> -					      tce32_table);
> +	struct pnv_ioda_pe *pe = tbl->data;
>  	struct pnv_phb *phb = pe->phb;
>  
>  	if (phb->type == PNV_PHB_IODA1)
> @@ -862,7 +865,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
>  	}
>  
>  	/* Setup linux iommu table */
> -	tbl = &pe->tce32_table;
> +	tbl = pe->tce32_table;
>  	pnv_pci_setup_iommu_table(tbl, addr, PNV_TCE32_TAB_SIZE * segs,
>  				  base << PNV_TCE32_SEG_SHIFT);
>  
> @@ -900,8 +903,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
>  
>  static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
>  {
> -	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
> -					      tce32_table);
> +	struct pnv_ioda_pe *pe = tbl->data;
>  	uint16_t window_id = (pe->pe_number << 1 ) + 1;
>  	int64_t rc;
>  
> @@ -942,10 +944,10 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
>  	pe->tce_bypass_base = 1ull << 59;
>  
>  	/* Install set_bypass callback for VFIO */
> -	pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
> +	pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
>  
>  	/* Enable bypass by default */
> -	pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
> +	pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
>  }
>  
>  static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> @@ -993,7 +995,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
>  	}
>  
>  	/* Setup linux iommu table */
> -	tbl = &pe->tce32_table;
> +	tbl = pe->tce32_table;
>  	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
>  
>  	/* OPAL variant of PHB3 invalidated TCEs */
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 90f6da4..9fbf7c0 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -60,7 +60,7 @@ struct pnv_ioda_pe {
>  	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
>  	int			tce32_seg;
>  	int			tce32_segcount;
> -	struct iommu_table	tce32_table;
> +	struct iommu_table	*tce32_table;
>  	phys_addr_t		tce_inval_reg_phys;
>  
>  	/* 64-bit TCE bypass region */
>
Wei Yang - June 25, 2014, 1:12 a.m.
On Tue, Jun 24, 2014 at 08:06:32PM +1000, Alexey Kardashevskiy wrote:
>On 06/10/2014 11:56 AM, Wei Yang wrote:
>> Current iommu_table of a PE is a static field. This will have a problem when
>> iommu_free_table is called.
>
>What kind of problem? This table is per PE and PE is not going anywhere.
>

Yes, for Bus PE, they will always sit in the system. When VF PE introduced,
they could be released on the fly. When they are released, so do the iommu
table for the PE.
Alexey Kardashevskiy - June 25, 2014, 4:12 a.m.
On 06/25/2014 11:12 AM, Wei Yang wrote:
> On Tue, Jun 24, 2014 at 08:06:32PM +1000, Alexey Kardashevskiy wrote:
>> On 06/10/2014 11:56 AM, Wei Yang wrote:
>>> Current iommu_table of a PE is a static field. This will have a problem when
>>> iommu_free_table is called.
>>
>> What kind of problem? This table is per PE and PE is not going anywhere.
>>
> 
> Yes, for Bus PE, they will always sit in the system. When VF PE introduced,
> they could be released on the fly. When they are released, so do the iommu
> table for the PE.

iommu_table is a part of PE struct. When PE is released, iommu_table will
go with it as well. Why to make is a pointer? I would understand it if you
added reference counting there but no - iommu_table's lifetime is equal to
PE lifetime.
Wei Yang - June 25, 2014, 5:27 a.m.
On Wed, Jun 25, 2014 at 02:12:34PM +1000, Alexey Kardashevskiy wrote:
>On 06/25/2014 11:12 AM, Wei Yang wrote:
>> On Tue, Jun 24, 2014 at 08:06:32PM +1000, Alexey Kardashevskiy wrote:
>>> On 06/10/2014 11:56 AM, Wei Yang wrote:
>>>> Current iommu_table of a PE is a static field. This will have a problem when
>>>> iommu_free_table is called.
>>>
>>> What kind of problem? This table is per PE and PE is not going anywhere.
>>>
>> 
>> Yes, for Bus PE, they will always sit in the system. When VF PE introduced,
>> they could be released on the fly. When they are released, so do the iommu
>> table for the PE.
>
>iommu_table is a part of PE struct. When PE is released, iommu_table will
>go with it as well. Why to make is a pointer? I would understand it if you
>added reference counting there but no - iommu_table's lifetime is equal to
>PE lifetime.
>

Yes, iommu_talbe's life time equals to PE lifetime, so when releasing a PE we
need to release the iommu table. Currently, there is one function to release
the iommu table, iommu_free_table() which takes a pointer of the iommu_table
and release it.

If the iommu table in PE is just a part of PE, it will have some problem to
release it with iommu_free_table(). That's why I make it a pointer in PE
structure.

>
>
>-- 
>Alexey
Alexey Kardashevskiy - June 25, 2014, 7:50 a.m.
On 06/25/2014 03:27 PM, Wei Yang wrote:
> On Wed, Jun 25, 2014 at 02:12:34PM +1000, Alexey Kardashevskiy wrote:
>> On 06/25/2014 11:12 AM, Wei Yang wrote:
>>> On Tue, Jun 24, 2014 at 08:06:32PM +1000, Alexey Kardashevskiy wrote:
>>>> On 06/10/2014 11:56 AM, Wei Yang wrote:
>>>>> Current iommu_table of a PE is a static field. This will have a problem when
>>>>> iommu_free_table is called.
>>>>
>>>> What kind of problem? This table is per PE and PE is not going anywhere.
>>>>
>>>
>>> Yes, for Bus PE, they will always sit in the system. When VF PE introduced,
>>> they could be released on the fly. When they are released, so do the iommu
>>> table for the PE.
>>
>> iommu_table is a part of PE struct. When PE is released, iommu_table will
>> go with it as well. Why to make is a pointer? I would understand it if you
>> added reference counting there but no - iommu_table's lifetime is equal to
>> PE lifetime.
>>
> 
> Yes, iommu_talbe's life time equals to PE lifetime, so when releasing a PE we
> need to release the iommu table. Currently, there is one function to release
> the iommu table, iommu_free_table() which takes a pointer of the iommu_table
> and release it.
> 
> If the iommu table in PE is just a part of PE, it will have some problem to
> release it with iommu_free_table(). That's why I make it a pointer in PE
> structure.

So you are saying that you want to release PE by one kfree() and release
iommu_table by another kfree (embedded into iommu_free_table()). For me
that means that PE and iommu_table have different lifetime.

And I cannot find the exact place in this patchset where you call
iommu_free_table(), what do I miss?
Benjamin Herrenschmidt - June 25, 2014, 7:56 a.m.
On Wed, 2014-06-25 at 17:50 +1000, Alexey Kardashevskiy wrote:

> > Yes, iommu_talbe's life time equals to PE lifetime, so when releasing a PE we
> > need to release the iommu table. Currently, there is one function to release
> > the iommu table, iommu_free_table() which takes a pointer of the iommu_table
> > and release it.
> > 
> > If the iommu table in PE is just a part of PE, it will have some problem to
> > release it with iommu_free_table(). That's why I make it a pointer in PE
> > structure.
> 
> So you are saying that you want to release PE by one kfree() and release
> iommu_table by another kfree (embedded into iommu_free_table()). For me
> that means that PE and iommu_table have different lifetime.
> 
> And I cannot find the exact place in this patchset where you call
> iommu_free_table(), what do I miss?

He has a point though... iommu_free_table() does a whole bunch of things
in addition to kfree at the end.

This is a discrepancy in the iommu.c code, we don't allocate the table,
it's allocated by our callers, but we do free it in iommu_free_table().

My gut feeling is that we should fix that in the core by moving the
kfree() out of iommu_free_table() and back into vio.c and
pseries/iommu.c, the only two callers, otherwise we can't wrap the table
structure inside another object if we are going to ever free it.

Cheers,
Ben.
Wei Yang - June 25, 2014, 9:13 a.m.
On Wed, Jun 25, 2014 at 05:50:08PM +1000, Alexey Kardashevskiy wrote:
>On 06/25/2014 03:27 PM, Wei Yang wrote:
>> On Wed, Jun 25, 2014 at 02:12:34PM +1000, Alexey Kardashevskiy wrote:
>>> On 06/25/2014 11:12 AM, Wei Yang wrote:
>>>> On Tue, Jun 24, 2014 at 08:06:32PM +1000, Alexey Kardashevskiy wrote:
>>>>> On 06/10/2014 11:56 AM, Wei Yang wrote:
>>>>>> Current iommu_table of a PE is a static field. This will have a problem when
>>>>>> iommu_free_table is called.
>>>>>
>>>>> What kind of problem? This table is per PE and PE is not going anywhere.
>>>>>
>>>>
>>>> Yes, for Bus PE, they will always sit in the system. When VF PE introduced,
>>>> they could be released on the fly. When they are released, so do the iommu
>>>> table for the PE.
>>>
>>> iommu_table is a part of PE struct. When PE is released, iommu_table will
>>> go with it as well. Why to make is a pointer? I would understand it if you
>>> added reference counting there but no - iommu_table's lifetime is equal to
>>> PE lifetime.
>>>
>> 
>> Yes, iommu_talbe's life time equals to PE lifetime, so when releasing a PE we
>> need to release the iommu table. Currently, there is one function to release
>> the iommu table, iommu_free_table() which takes a pointer of the iommu_table
>> and release it.
>> 
>> If the iommu table in PE is just a part of PE, it will have some problem to
>> release it with iommu_free_table(). That's why I make it a pointer in PE
>> structure.
>
>So you are saying that you want to release PE by one kfree() and release
>iommu_table by another kfree (embedded into iommu_free_table()). For me
>that means that PE and iommu_table have different lifetime.
>

Hmm... it is right, the lifetime of these two may have some difference.

>And I cannot find the exact place in this patchset where you call
>iommu_free_table(), what do I miss?
>

This is called in pnv_pci_release_dev_dma(), which is introduced in the commit
cd740988: powerpc/powernv: allocate VF PE

>
>
>
>-- 
>Alexey
Wei Yang - June 25, 2014, 9:18 a.m.
On Wed, Jun 25, 2014 at 05:56:37PM +1000, Benjamin Herrenschmidt wrote:
>On Wed, 2014-06-25 at 17:50 +1000, Alexey Kardashevskiy wrote:
>
>> > Yes, iommu_talbe's life time equals to PE lifetime, so when releasing a PE we
>> > need to release the iommu table. Currently, there is one function to release
>> > the iommu table, iommu_free_table() which takes a pointer of the iommu_table
>> > and release it.
>> > 
>> > If the iommu table in PE is just a part of PE, it will have some problem to
>> > release it with iommu_free_table(). That's why I make it a pointer in PE
>> > structure.
>> 
>> So you are saying that you want to release PE by one kfree() and release
>> iommu_table by another kfree (embedded into iommu_free_table()). For me
>> that means that PE and iommu_table have different lifetime.
>> 
>> And I cannot find the exact place in this patchset where you call
>> iommu_free_table(), what do I miss?
>
>He has a point though... iommu_free_table() does a whole bunch of things
>in addition to kfree at the end.
>
>This is a discrepancy in the iommu.c code, we don't allocate the table,
>it's allocated by our callers, but we do free it in iommu_free_table().
>
>My gut feeling is that we should fix that in the core by moving the
>kfree() out of iommu_free_table() and back into vio.c and
>pseries/iommu.c, the only two callers, otherwise we can't wrap the table
>structure inside another object if we are going to ever free it.
>

Yes, this is another option. Move the kfree() outside could keep some logic in
current code, like in pnv_pci_ioda_tce_invalidate(). We could get the tbl from
a PE structure directly, instead of adding a field in tbl to point to the PE
structure.

>Cheers,
>Ben.
>
>
>
David Laight - June 25, 2014, 9:20 a.m.
From: Wei Yang

> On Wed, Jun 25, 2014 at 02:12:34PM +1000, Alexey Kardashevskiy wrote:

> >On 06/25/2014 11:12 AM, Wei Yang wrote:

> >> On Tue, Jun 24, 2014 at 08:06:32PM +1000, Alexey Kardashevskiy wrote:

> >>> On 06/10/2014 11:56 AM, Wei Yang wrote:

> >>>> Current iommu_table of a PE is a static field. This will have a problem when

> >>>> iommu_free_table is called.

> >>>

> >>> What kind of problem? This table is per PE and PE is not going anywhere.

> >>>

> >>

> >> Yes, for Bus PE, they will always sit in the system. When VF PE introduced,

> >> they could be released on the fly. When they are released, so do the iommu

> >> table for the PE.

> >

> >iommu_table is a part of PE struct. When PE is released, iommu_table will

> >go with it as well. Why to make is a pointer? I would understand it if you

> >added reference counting there but no - iommu_table's lifetime is equal to

> >PE lifetime.

> >

> 

> Yes, iommu_talbe's life time equals to PE lifetime, so when releasing a PE we

> need to release the iommu table. Currently, there is one function to release

> the iommu table, iommu_free_table() which takes a pointer of the iommu_table

> and release it.

> 

> If the iommu table in PE is just a part of PE, it will have some problem to

> release it with iommu_free_table(). That's why I make it a pointer in PE

> structure.


What are the sizes of the iommu table and the PE structure?
If the table is a round number of pages then you probably don't want to
embed it inside the PE structure.

	David
Wei Yang - June 25, 2014, 9:31 a.m.
On Wed, Jun 25, 2014 at 09:20:11AM +0000, David Laight wrote:
>From: Wei Yang
>> On Wed, Jun 25, 2014 at 02:12:34PM +1000, Alexey Kardashevskiy wrote:
>> >On 06/25/2014 11:12 AM, Wei Yang wrote:
>> >> On Tue, Jun 24, 2014 at 08:06:32PM +1000, Alexey Kardashevskiy wrote:
>> >>> On 06/10/2014 11:56 AM, Wei Yang wrote:
>> >>>> Current iommu_table of a PE is a static field. This will have a problem when
>> >>>> iommu_free_table is called.
>> >>>
>> >>> What kind of problem? This table is per PE and PE is not going anywhere.
>> >>>
>> >>
>> >> Yes, for Bus PE, they will always sit in the system. When VF PE introduced,
>> >> they could be released on the fly. When they are released, so do the iommu
>> >> table for the PE.
>> >
>> >iommu_table is a part of PE struct. When PE is released, iommu_table will
>> >go with it as well. Why to make is a pointer? I would understand it if you
>> >added reference counting there but no - iommu_table's lifetime is equal to
>> >PE lifetime.
>> >
>> 
>> Yes, iommu_talbe's life time equals to PE lifetime, so when releasing a PE we
>> need to release the iommu table. Currently, there is one function to release
>> the iommu table, iommu_free_table() which takes a pointer of the iommu_table
>> and release it.
>> 
>> If the iommu table in PE is just a part of PE, it will have some problem to
>> release it with iommu_free_table(). That's why I make it a pointer in PE
>> structure.
>
>What are the sizes of the iommu table and the PE structure?

I calculated it in my mind, the size of iommu_table, defined in
arch/powerpc/include/asm/iommu.h is 256 bytes.

>If the table is a round number of pages then you probably don't want to
>embed it inside the PE structure.

If my understanding is correct, the iommu table structure size is not that
big.

>
>	David
>
Alexey Kardashevskiy - June 25, 2014, 10:30 a.m.
On 06/25/2014 07:20 PM, David Laight wrote:
> From: Wei Yang
>> On Wed, Jun 25, 2014 at 02:12:34PM +1000, Alexey Kardashevskiy wrote:
>>> On 06/25/2014 11:12 AM, Wei Yang wrote:
>>>> On Tue, Jun 24, 2014 at 08:06:32PM +1000, Alexey Kardashevskiy wrote:
>>>>> On 06/10/2014 11:56 AM, Wei Yang wrote:
>>>>>> Current iommu_table of a PE is a static field. This will have a problem when
>>>>>> iommu_free_table is called.
>>>>>
>>>>> What kind of problem? This table is per PE and PE is not going anywhere.
>>>>>
>>>>
>>>> Yes, for Bus PE, they will always sit in the system. When VF PE introduced,
>>>> they could be released on the fly. When they are released, so do the iommu
>>>> table for the PE.
>>>
>>> iommu_table is a part of PE struct. When PE is released, iommu_table will
>>> go with it as well. Why to make is a pointer? I would understand it if you
>>> added reference counting there but no - iommu_table's lifetime is equal to
>>> PE lifetime.
>>>
>>
>> Yes, iommu_talbe's life time equals to PE lifetime, so when releasing a PE we
>> need to release the iommu table. Currently, there is one function to release
>> the iommu table, iommu_free_table() which takes a pointer of the iommu_table
>> and release it.
>>
>> If the iommu table in PE is just a part of PE, it will have some problem to
>> release it with iommu_free_table(). That's why I make it a pointer in PE
>> structure.
> 
> What are the sizes of the iommu table and the PE structure?

This is all about iommu_table struct (which is just a descriptor), not
IOMMU table per se (which may be megabytes) :)


> If the table is a round number of pages then you probably don't want to
> embed it inside the PE structure.
Benjamin Herrenschmidt - July 14, 2014, 3:12 a.m.
On Wed, 2014-06-25 at 09:20 +0000, David Laight wrote:
> What are the sizes of the iommu table and the PE structure? 
> If the table is a round number of pages then you probably don't want
> to embed it inside the PE structure. 

The problem isn't the table itself but the struct iommu_table which
contains the pointer to the actual table and various other bits of
controlling state.

Cheers,
Ben.

Patch

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 42632c7..0fedacb 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -78,6 +78,9 @@  struct iommu_table {
 	struct iommu_group *it_group;
 #endif
 	void (*set_bypass)(struct iommu_table *tbl, bool enable);
+#ifdef CONFIG_PPC_POWERNV
+	void           *data;
+#endif
 };
 
 /* Pure 2^n version of get_order */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9715351..8ca3926 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -608,6 +608,10 @@  static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 		return;
 	}
 
+	pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+			GFP_KERNEL, hose->node);
+	pe->tce32_table->data = pe;
+
 	/* Associate it with all child devices */
 	pnv_ioda_setup_same_PE(bus, pe);
 
@@ -675,7 +679,7 @@  static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 
 	pe = &phb->ioda.pe_array[pdn->pe_number];
 	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
-	set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
+	set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
 }
 
 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
@@ -702,7 +706,7 @@  static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
 	} else {
 		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
 		set_dma_ops(&pdev->dev, &dma_iommu_ops);
-		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+		set_iommu_table_base(&pdev->dev, pe->tce32_table);
 	}
 	return 0;
 }
@@ -712,7 +716,7 @@  static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
 	struct pci_dev *dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
-		set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
+		set_iommu_table_base_and_group(&dev->dev, pe->tce32_table);
 		if (dev->subordinate)
 			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
 	}
@@ -798,8 +802,7 @@  static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 				 __be64 *startp, __be64 *endp, bool rm)
 {
-	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
-					      tce32_table);
+	struct pnv_ioda_pe *pe = tbl->data;
 	struct pnv_phb *phb = pe->phb;
 
 	if (phb->type == PNV_PHB_IODA1)
@@ -862,7 +865,7 @@  static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	}
 
 	/* Setup linux iommu table */
-	tbl = &pe->tce32_table;
+	tbl = pe->tce32_table;
 	pnv_pci_setup_iommu_table(tbl, addr, PNV_TCE32_TAB_SIZE * segs,
 				  base << PNV_TCE32_SEG_SHIFT);
 
@@ -900,8 +903,7 @@  static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 
 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
 {
-	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
-					      tce32_table);
+	struct pnv_ioda_pe *pe = tbl->data;
 	uint16_t window_id = (pe->pe_number << 1 ) + 1;
 	int64_t rc;
 
@@ -942,10 +944,10 @@  static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
 	pe->tce_bypass_base = 1ull << 59;
 
 	/* Install set_bypass callback for VFIO */
-	pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
+	pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
 
 	/* Enable bypass by default */
-	pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
+	pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
 }
 
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
@@ -993,7 +995,7 @@  static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	}
 
 	/* Setup linux iommu table */
-	tbl = &pe->tce32_table;
+	tbl = pe->tce32_table;
 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
 
 	/* OPAL variant of PHB3 invalidated TCEs */
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 90f6da4..9fbf7c0 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -60,7 +60,7 @@  struct pnv_ioda_pe {
 	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
 	int			tce32_seg;
 	int			tce32_segcount;
-	struct iommu_table	tce32_table;
+	struct iommu_table	*tce32_table;
 	phys_addr_t		tce_inval_reg_phys;
 
 	/* 64-bit TCE bypass region */