diff mbox series

[kernel,v2,2/4] powerpc/pseries: Allow not having ibm, hypertas-functions::hcall-multi-tce for DDW

Message ID 20191216041924.42318-3-aik@ozlabs.ru (mailing list archive)
State Accepted
Commit 7559d3d295f3365ea7ac0c0274c05e633fe4f594
Headers show
Series Enable IOMMU support for pseries Secure VMs | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (69b349a06f979f2c93a5d6902e57e3e19dcd0475)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 2 warnings, 2 checks, 109 lines checked

Commit Message

Alexey Kardashevskiy Dec. 16, 2019, 4:19 a.m. UTC
By default a pseries guest supports a H_PUT_TCE hypercall which maps
a single IOMMU page in a DMA window. Additionally the hypervisor may
support H_PUT_TCE_INDIRECT/H_STUFF_TCE which update multiple TCEs at once;
this is advertised via the device tree /rtas/ibm,hypertas-functions
property which Linux converts to FW_FEATURE_MULTITCE.

FW_FEATURE_MULTITCE is checked when dma_iommu_ops is used; however
the code managing the huge DMA window (DDW) ignores it and calls
H_PUT_TCE_INDIRECT even if it is explicitly disabled via
the "multitce=off" kernel command line parameter.

This adds FW_FEATURE_MULTITCE checking to the DDW code path.

This changes tce_build_pSeriesLP to take liobn and page size as
the huge window does not have iommu_table descriptor which usually
the place to store these numbers.

Fixes: 4e8b0cf46b25 ("powerpc/pseries: Add support for dynamic dma windows")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---

I've put "Fixes" which is from 2011-02-10 but probably should remove it,
or otherwise all these "stable backport branch" scripts will react on
"Fixes" and try pulling this back and we do not really want this as
this patch won't help anyone with anything useful.

---
Changes:
v2
* added "fixes"
---
 arch/powerpc/platforms/pseries/iommu.c | 44 ++++++++++++++++++--------
 1 file changed, 30 insertions(+), 14 deletions(-)

Comments

Thiago Jung Bauermann Dec. 16, 2019, 11:07 p.m. UTC | #1
Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> By default a pseries guest supports a H_PUT_TCE hypercall which maps
> a single IOMMU page in a DMA window. Additionally the hypervisor may
> support H_PUT_TCE_INDIRECT/H_STUFF_TCE which update multiple TCEs at once;
> this is advertised via the device tree /rtas/ibm,hypertas-functions
> property which Linux converts to FW_FEATURE_MULTITCE.
>
> FW_FEATURE_MULTITCE is checked when dma_iommu_ops is used; however
> the code managing the huge DMA window (DDW) ignores it and calls
> H_PUT_TCE_INDIRECT even if it is explicitly disabled via
> the "multitce=off" kernel command line parameter.
>
> This adds FW_FEATURE_MULTITCE checking to the DDW code path.
>
> This changes tce_build_pSeriesLP to take liobn and page size as
> the huge window does not have iommu_table descriptor which usually
> the place to store these numbers.
>
> Fixes: 4e8b0cf46b25 ("powerpc/pseries: Add support for dynamic dma windows")
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Tested-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>

Some minor nits below. Feel free to ignore.

> @@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  	int ret = 0;
>  	long tcenum_start = tcenum, npages_start = npages;
>
> -	rpn = __pa(uaddr) >> TCE_SHIFT;
> +	rpn = __pa(uaddr) >> tceshift;
>  	proto_tce = TCE_PCI_READ;
>  	if (direction != DMA_TO_DEVICE)
>  		proto_tce |= TCE_PCI_WRITE;
>
>  	while (npages--) {
> -		tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
> -		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
> +		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
> +		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);

Is it necessary to cast to u64 here? plpar_tce_put() takes unsigned long
for both arguments.

> @@ -261,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  	return ret;
>  }
>
> -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
> +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
>  {
>  	u64 rc;
>
>  	while (npages--) {
> -		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
> +		rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);

Same comment regarding cast to u64.

> @@ -400,6 +402,20 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
>  	u64 rc = 0;
>  	long l, limit;
>
> +	if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
> +		unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
> +		unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
> +				be64_to_cpu(maprange->dma_base);
> +		unsigned long tcenum = dmastart >> tceshift;
> +		unsigned long npages = num_pfn << PAGE_SHIFT >>
> +				be32_to_cpu(maprange->tce_shift);

Could use the tceshift variable here.

> +		void *uaddr = __va(start_pfn << PAGE_SHIFT);
> +
> +		return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
> +				tcenum, tceshift, npages, (unsigned long) uaddr,
> +				DMA_BIDIRECTIONAL, 0);
> +	}
> +
>  	local_irq_disable();	/* to protect tcep and the page behind it */
>  	tcep = __this_cpu_read(tce_page);


--
Thiago Jung Bauermann
IBM Linux Technology Center
Alexey Kardashevskiy Dec. 17, 2019, 12:09 a.m. UTC | #2
On 17/12/2019 10:07, Thiago Jung Bauermann wrote:
> 
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> 
>> By default a pseries guest supports a H_PUT_TCE hypercall which maps
>> a single IOMMU page in a DMA window. Additionally the hypervisor may
>> support H_PUT_TCE_INDIRECT/H_STUFF_TCE which update multiple TCEs at once;
>> this is advertised via the device tree /rtas/ibm,hypertas-functions
>> property which Linux converts to FW_FEATURE_MULTITCE.
>>
>> FW_FEATURE_MULTITCE is checked when dma_iommu_ops is used; however
>> the code managing the huge DMA window (DDW) ignores it and calls
>> H_PUT_TCE_INDIRECT even if it is explicitly disabled via
>> the "multitce=off" kernel command line parameter.
>>
>> This adds FW_FEATURE_MULTITCE checking to the DDW code path.
>>
>> This changes tce_build_pSeriesLP to take liobn and page size as
>> the huge window does not have iommu_table descriptor which usually
>> the place to store these numbers.
>>
>> Fixes: 4e8b0cf46b25 ("powerpc/pseries: Add support for dynamic dma windows")
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
> Tested-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
> 
> Some minor nits below. Feel free to ignore.
> 
>> @@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
>>  	int ret = 0;
>>  	long tcenum_start = tcenum, npages_start = npages;
>>
>> -	rpn = __pa(uaddr) >> TCE_SHIFT;
>> +	rpn = __pa(uaddr) >> tceshift;
>>  	proto_tce = TCE_PCI_READ;
>>  	if (direction != DMA_TO_DEVICE)
>>  		proto_tce |= TCE_PCI_WRITE;
>>
>>  	while (npages--) {
>> -		tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
>> -		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
>> +		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
>> +		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
> 
> Is it necessary to cast to u64 here? plpar_tce_put() takes unsigned long
> for both arguments.


Looked as an unrelated change. Small but still unrelated.


> 
>> @@ -261,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>>  	return ret;
>>  }
>>
>> -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
>> +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
>>  {
>>  	u64 rc;
>>
>>  	while (npages--) {
>> -		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
>> +		rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
> 
> Same comment regarding cast to u64.
> 
>> @@ -400,6 +402,20 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
>>  	u64 rc = 0;
>>  	long l, limit;
>>
>> +	if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
>> +		unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
>> +		unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
>> +				be64_to_cpu(maprange->dma_base);
>> +		unsigned long tcenum = dmastart >> tceshift;
>> +		unsigned long npages = num_pfn << PAGE_SHIFT >>
>> +				be32_to_cpu(maprange->tce_shift);
> 
> Could use the tceshift variable here.


True, overlooked.
Thanks for the reviews!


> 
>> +		void *uaddr = __va(start_pfn << PAGE_SHIFT);
>> +
>> +		return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
>> +				tcenum, tceshift, npages, (unsigned long) uaddr,
>> +				DMA_BIDIRECTIONAL, 0);
>> +	}
>> +
>>  	local_irq_disable();	/* to protect tcep and the page behind it */
>>  	tcep = __this_cpu_read(tce_page);
> 
> 
> --
> Thiago Jung Bauermann
> IBM Linux Technology Center
>
Thiago Jung Bauermann Dec. 17, 2019, 2:06 a.m. UTC | #3
Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> On 17/12/2019 10:07, Thiago Jung Bauermann wrote:
>>
>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>>
>>> By default a pseries guest supports a H_PUT_TCE hypercall which maps
>>> a single IOMMU page in a DMA window. Additionally the hypervisor may
>>> support H_PUT_TCE_INDIRECT/H_STUFF_TCE which update multiple TCEs at once;
>>> this is advertised via the device tree /rtas/ibm,hypertas-functions
>>> property which Linux converts to FW_FEATURE_MULTITCE.
>>>
>>> FW_FEATURE_MULTITCE is checked when dma_iommu_ops is used; however
>>> the code managing the huge DMA window (DDW) ignores it and calls
>>> H_PUT_TCE_INDIRECT even if it is explicitly disabled via
>>> the "multitce=off" kernel command line parameter.
>>>
>>> This adds FW_FEATURE_MULTITCE checking to the DDW code path.
>>>
>>> This changes tce_build_pSeriesLP to take liobn and page size as
>>> the huge window does not have iommu_table descriptor which usually
>>> the place to store these numbers.
>>>
>>> Fixes: 4e8b0cf46b25 ("powerpc/pseries: Add support for dynamic dma windows")
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>
>> Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
>> Tested-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
>>
>> Some minor nits below. Feel free to ignore.
>>
>>> @@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
>>>  	int ret = 0;
>>>  	long tcenum_start = tcenum, npages_start = npages;
>>>
>>> -	rpn = __pa(uaddr) >> TCE_SHIFT;
>>> +	rpn = __pa(uaddr) >> tceshift;
>>>  	proto_tce = TCE_PCI_READ;
>>>  	if (direction != DMA_TO_DEVICE)
>>>  		proto_tce |= TCE_PCI_WRITE;
>>>
>>>  	while (npages--) {
>>> -		tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
>>> -		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
>>> +		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
>>> +		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
>>
>> Is it necessary to cast to u64 here? plpar_tce_put() takes unsigned long
>> for both arguments.
>
> Looked as an unrelated change. Small but still unrelated.

Ah, I hadn't noticed that the cast was already in the original code.

>>> @@ -400,6 +402,20 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
>>>  	u64 rc = 0;
>>>  	long l, limit;
>>>
>>> +	if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
>>> +		unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
>>> +		unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
>>> +				be64_to_cpu(maprange->dma_base);
>>> +		unsigned long tcenum = dmastart >> tceshift;
>>> +		unsigned long npages = num_pfn << PAGE_SHIFT >>
>>> +				be32_to_cpu(maprange->tce_shift);
>>
>> Could use the tceshift variable here.
>
>
> True, overlooked.
> Thanks for the reviews!

Thank you for the patches!

--
Thiago Jung Bauermann
IBM Linux Technology Center
Ram Pai Jan. 2, 2020, 10:02 p.m. UTC | #4
On Mon, Dec 16, 2019 at 03:19:22PM +1100, Alexey Kardashevskiy wrote:
> By default a pseries guest supports a H_PUT_TCE hypercall which maps
> a single IOMMU page in a DMA window. Additionally the hypervisor may
> support H_PUT_TCE_INDIRECT/H_STUFF_TCE which update multiple TCEs at once;
> this is advertised via the device tree /rtas/ibm,hypertas-functions
> property which Linux converts to FW_FEATURE_MULTITCE.
> 
> FW_FEATURE_MULTITCE is checked when dma_iommu_ops is used; however
> the code managing the huge DMA window (DDW) ignores it and calls
> H_PUT_TCE_INDIRECT even if it is explicitly disabled via
> the "multitce=off" kernel command line parameter.
> 
> This adds FW_FEATURE_MULTITCE checking to the DDW code path.
> 
> This changes tce_build_pSeriesLP to take liobn and page size as
> the huge window does not have iommu_table descriptor which usually
> the place to store these numbers.
> 
> Fixes: 4e8b0cf46b25 ("powerpc/pseries: Add support for dynamic dma windows")
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
> 
> I've put "Fixes" which is from 2011-02-10 but probably should remove it,
> or otherwise all these "stable backport branch" scripts will react on
> "Fixes" and try pulling this back and we do not really want this as
> this patch won't help anyone with anything useful.
> 
> ---
> Changes:
> v2
> * added "fixes"
> ---
>  arch/powerpc/platforms/pseries/iommu.c | 44 ++++++++++++++++++--------
>  1 file changed, 30 insertions(+), 14 deletions(-)

some minor comments below.

> 
> diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> index df7db33ca93b..f6e9b87c82fc 100644
> --- a/arch/powerpc/platforms/pseries/iommu.c
> +++ b/arch/powerpc/platforms/pseries/iommu.c
> @@ -132,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
>  	return be64_to_cpu(*tcep);
>  }
> 
> -static void tce_free_pSeriesLP(struct iommu_table*, long, long);
> +static void tce_free_pSeriesLP(unsigned long liobn, long, long);
>  static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
> 
> -static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
> +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
>  				long npages, unsigned long uaddr,
>  				enum dma_data_direction direction,
>  				unsigned long attrs)
> @@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  	int ret = 0;
>  	long tcenum_start = tcenum, npages_start = npages;
> 
> -	rpn = __pa(uaddr) >> TCE_SHIFT;
> +	rpn = __pa(uaddr) >> tceshift;
>  	proto_tce = TCE_PCI_READ;
>  	if (direction != DMA_TO_DEVICE)
>  		proto_tce |= TCE_PCI_WRITE;
> 
>  	while (npages--) {
> -		tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
> -		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
> +		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
> +		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
> 
>  		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
>  			ret = (int)rc;
> -			tce_free_pSeriesLP(tbl, tcenum_start,
> +			tce_free_pSeriesLP(liobn, tcenum_start,
>  			                   (npages_start - (npages + 1)));
>  			break;
>  		}
> 
>  		if (rc && printk_ratelimit()) {
>  			printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
> -			printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
> +			printk("\tindex   = 0x%llx\n", (u64)liobn);
>  			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
>  			printk("\ttce val = 0x%llx\n", tce );
>  			dump_stack();
> @@ -193,7 +193,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  	unsigned long flags;
> 
>  	if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
> -		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
> +		return tce_build_pSeriesLP(tbl->it_index, tcenum,
> +					   tbl->it_page_shift, npages, uaddr,
>  		                           direction, attrs);
>  	}
> 
> @@ -209,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  		/* If allocation fails, fall back to the loop implementation */
>  		if (!tcep) {
>  			local_irq_restore(flags);
> -			return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
> -					    direction, attrs);
> +			return tce_build_pSeriesLP(tbl->it_index, tcenum,
> +					tbl->it_page_shift,
> +					npages, uaddr, direction, attrs);
>  		}
>  		__this_cpu_write(tce_page, tcep);
>  	}
> @@ -261,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  	return ret;
>  }
> 
> -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
> +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
>  {
>  	u64 rc;
> 
>  	while (npages--) {
> -		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
> +		rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);

minor nitpick.  The magic number '12' can be replaced by TCE_SHIFT

> 
>  		if (rc && printk_ratelimit()) {
>  			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
> -			printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
> +			printk("\tindex   = 0x%llx\n", (u64)liobn);
>  			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
>  			dump_stack();
>  		}
> @@ -285,7 +287,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
>  	u64 rc;
> 
>  	if (!firmware_has_feature(FW_FEATURE_MULTITCE))
> -		return tce_free_pSeriesLP(tbl, tcenum, npages);
> +		return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
> 
>  	rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);

Same here.

> 
> @@ -400,6 +402,20 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
>  	u64 rc = 0;
>  	long l, limit;
> 
> +	if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
> +		unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
> +		unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
> +				be64_to_cpu(maprange->dma_base);
> +		unsigned long tcenum = dmastart >> tceshift;
> +		unsigned long npages = num_pfn << PAGE_SHIFT >>
> +				be32_to_cpu(maprange->tce_shift);
				^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      this can be 'tceshift'. Saves some cpu-cycles.



> +		void *uaddr = __va(start_pfn << PAGE_SHIFT);
> +
> +		return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
> +				tcenum, tceshift, npages, (unsigned long) uaddr,
> +				DMA_BIDIRECTIONAL, 0);
> +	}
> +

The code under the if statement, go into a static inline function of its own?


>  	local_irq_disable();	/* to protect tcep and the page behind it */
>  	tcep = __this_cpu_read(tce_page);
> 


Reviewed-by: Ram Pai <linuxram@us.ibm.com>
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index df7db33ca93b..f6e9b87c82fc 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -132,10 +132,10 @@  static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 	return be64_to_cpu(*tcep);
 }
 
-static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_free_pSeriesLP(unsigned long liobn, long, long);
 static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
 
-static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
 				long npages, unsigned long uaddr,
 				enum dma_data_direction direction,
 				unsigned long attrs)
@@ -146,25 +146,25 @@  static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	int ret = 0;
 	long tcenum_start = tcenum, npages_start = npages;
 
-	rpn = __pa(uaddr) >> TCE_SHIFT;
+	rpn = __pa(uaddr) >> tceshift;
 	proto_tce = TCE_PCI_READ;
 	if (direction != DMA_TO_DEVICE)
 		proto_tce |= TCE_PCI_WRITE;
 
 	while (npages--) {
-		tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
-		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
+		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
+		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
 
 		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 			ret = (int)rc;
-			tce_free_pSeriesLP(tbl, tcenum_start,
+			tce_free_pSeriesLP(liobn, tcenum_start,
 			                   (npages_start - (npages + 1)));
 			break;
 		}
 
 		if (rc && printk_ratelimit()) {
 			printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
-			printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+			printk("\tindex   = 0x%llx\n", (u64)liobn);
 			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 			printk("\ttce val = 0x%llx\n", tce );
 			dump_stack();
@@ -193,7 +193,8 @@  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	unsigned long flags;
 
 	if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
-		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+		return tce_build_pSeriesLP(tbl->it_index, tcenum,
+					   tbl->it_page_shift, npages, uaddr,
 		                           direction, attrs);
 	}
 
@@ -209,8 +210,9 @@  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		/* If allocation fails, fall back to the loop implementation */
 		if (!tcep) {
 			local_irq_restore(flags);
-			return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
-					    direction, attrs);
+			return tce_build_pSeriesLP(tbl->it_index, tcenum,
+					tbl->it_page_shift,
+					npages, uaddr, direction, attrs);
 		}
 		__this_cpu_write(tce_page, tcep);
 	}
@@ -261,16 +263,16 @@  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	return ret;
 }
 
-static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
 {
 	u64 rc;
 
 	while (npages--) {
-		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
+		rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
 
 		if (rc && printk_ratelimit()) {
 			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
-			printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+			printk("\tindex   = 0x%llx\n", (u64)liobn);
 			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
 			dump_stack();
 		}
@@ -285,7 +287,7 @@  static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
 	u64 rc;
 
 	if (!firmware_has_feature(FW_FEATURE_MULTITCE))
-		return tce_free_pSeriesLP(tbl, tcenum, npages);
+		return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
 
 	rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
 
@@ -400,6 +402,20 @@  static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
 	u64 rc = 0;
 	long l, limit;
 
+	if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
+		unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
+		unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
+				be64_to_cpu(maprange->dma_base);
+		unsigned long tcenum = dmastart >> tceshift;
+		unsigned long npages = num_pfn << PAGE_SHIFT >>
+				be32_to_cpu(maprange->tce_shift);
+		void *uaddr = __va(start_pfn << PAGE_SHIFT);
+
+		return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
+				tcenum, tceshift, npages, (unsigned long) uaddr,
+				DMA_BIDIRECTIONAL, 0);
+	}
+
 	local_irq_disable();	/* to protect tcep and the page behind it */
 	tcep = __this_cpu_read(tce_page);