diff mbox series

[kernel,v2,1/2] dma: Allow mixing bypass and normal IOMMU operation

Message ID 20201027101841.96056-2-aik@ozlabs.ru
State Superseded
Headers show
Series DMA, powerpc/dma: Fallback to dma_ops when persistent memory present | expand

Checks

Context Check Description
snowpatch_ozlabs/needsstable success Patch has no Fixes tags
snowpatch_ozlabs/checkpatch success
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (8cb17737940b156329cb5210669b9c9b23f4dd56)

Commit Message

Alexey Kardashevskiy Oct. 27, 2020, 10:18 a.m. UTC
At the moment we allow bypassing DMA ops only when we can do this for
the entire RAM. However there are configs with mixed type memory
where we could still allow bypassing IOMMU in most cases;
POWERPC with persistent memory is one example.

This adds another check for the bus limit to determine where bypass
can still work and we invoke direct DMA API; when DMA handle is outside
that limit, we fall back to DMA ops.

This adds a CONFIG_DMA_OPS_BYPASS_BUS_LIMIT config option which is off
by default and will be enable for PPC_PSERIES in the following patch.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 kernel/dma/mapping.c | 61 ++++++++++++++++++++++++++++++++++++++++++--
 kernel/dma/Kconfig   |  4 +++
 2 files changed, 63 insertions(+), 2 deletions(-)

Comments

Christoph Hellwig Oct. 27, 2020, 4:48 p.m. UTC | #1
> +static inline bool dma_handle_direct(struct device *dev, dma_addr_t dma_handle)
> +{
> +       return dma_handle >= dev->archdata.dma_offset;
> +}

This won't compile except for powerpc, and directly accesing arch members
in common code is a bad idea.  Maybe both your helpers need to be
supplied by arch code to better abstract this out.

>  	if (dma_map_direct(dev, ops))
>  		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
> +	else if (dev->bus_dma_limit &&
> +		 can_map_direct(dev, (phys_addr_t) page_to_phys(page) + offset + size))
> +		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
> +#endif

I don't think page_to_phys needs a phys_addr_t on the return value.
I'd also much prefer if we make this a little more beautiful, here
are a few suggestions:

 - hide the bus_dma_limit check inside can_map_direct, and provide a
   stub so that we can avoid the ifdef
 - use a better name for can_map_direct, and maybe also a better calling
   convention by passing the page (the sg code also has the page), and
   maybe even hide the dma_map_direct inside it.

	if (dma_map_direct(dev, ops) ||
	    arch_dma_map_page_direct(dev, page, offset, size))
		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);

>  	BUG_ON(!valid_dma_direction(dir));
>  	if (dma_map_direct(dev, ops))
>  		dma_direct_unmap_page(dev, addr, size, dir, attrs);
> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
> +	else if (dev->bus_dma_limit && dma_handle_direct(dev, addr + size))
> +		dma_direct_unmap_page(dev, addr, size, dir, attrs);
> +#endif

Same here.

>  	if (dma_map_direct(dev, ops))
>  		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
> +	else if (dev->bus_dma_limit) {
> +		struct scatterlist *s;
> +		bool direct = true;
> +		int i;
> +
> +		for_each_sg(sg, s, nents, i) {
> +			direct = can_map_direct(dev, sg_phys(s) + s->offset + s->length);
> +			if (!direct)
> +				break;
> +		}
> +		if (direct)
> +			ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
> +		else
> +			ents = ops->map_sg(dev, sg, nents, dir, attrs);
> +	}
> +#endif

This needs to go into a helper as well.  I think the same style as
above would work pretty nicely as well:

 	if (dma_map_direct(dev, ops) ||
	    arch_dma_map_sg_direct(dev, sg, nents))
 		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
 	else
 		ents = ops->map_sg(dev, sg, nents, dir, attrs);

> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
> +	if (dev->bus_dma_limit) {
> +		struct scatterlist *s;
> +		bool direct = true;
> +		int i;
> +
> +		for_each_sg(sg, s, nents, i) {
> +			direct = dma_handle_direct(dev, s->dma_address + s->length);
> +			if (!direct)
> +				break;
> +		}
> +		if (direct) {
> +			dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
> +			return;
> +		}
> +	}
> +#endif

One more time here..
Alexey Kardashevskiy Oct. 28, 2020, 6:55 a.m. UTC | #2
On 28/10/2020 03:48, Christoph Hellwig wrote:
>> +static inline bool dma_handle_direct(struct device *dev, dma_addr_t dma_handle)
>> +{
>> +       return dma_handle >= dev->archdata.dma_offset;
>> +}
> 
> This won't compile except for powerpc, and directly accesing arch members
> in common code is a bad idea.  Maybe both your helpers need to be
> supplied by arch code to better abstract this out.


rats, overlooked it :( bus_dma_limit is generic but dma_offset is in 
archdata :-/


> 
>>   	if (dma_map_direct(dev, ops))
>>   		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
>> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
>> +	else if (dev->bus_dma_limit &&
>> +		 can_map_direct(dev, (phys_addr_t) page_to_phys(page) + offset + size))
>> +		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
>> +#endif
> 
> I don't think page_to_phys needs a phys_addr_t on the return value.
> I'd also much prefer if we make this a little more beautiful, here
> are a few suggestions:
> 
>   - hide the bus_dma_limit check inside can_map_direct, and provide a
>     stub so that we can avoid the ifdef
>   - use a better name for can_map_direct, and maybe also a better calling
>     convention by passing the page (the sg code also has the page), 

It is passing an address of the end of the mapped area so passing a page 
struct means passing page and offset which is an extra parameter and we 
do not want to do anything with the page in those hooks anyway so I'd 
keep it as is.


> and
>     maybe even hide the dma_map_direct inside it.

Call dma_map_direct() from arch_dma_map_page_direct() if 
arch_dma_map_page_direct() is defined? Seems suboptimal as it is going 
to be bypass=true in most cases and we save one call by avoiding calling 
arch_dma_map_page_direct(). Unless I missed something?


> 
> 	if (dma_map_direct(dev, ops) ||
> 	    arch_dma_map_page_direct(dev, page, offset, size))
> 		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
> 
>>   	BUG_ON(!valid_dma_direction(dir));
>>   	if (dma_map_direct(dev, ops))
>>   		dma_direct_unmap_page(dev, addr, size, dir, attrs);
>> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
>> +	else if (dev->bus_dma_limit && dma_handle_direct(dev, addr + size))
>> +		dma_direct_unmap_page(dev, addr, size, dir, attrs);
>> +#endif
> 
> Same here.
> 
>>   	if (dma_map_direct(dev, ops))
>>   		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
>> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
>> +	else if (dev->bus_dma_limit) {
>> +		struct scatterlist *s;
>> +		bool direct = true;
>> +		int i;
>> +
>> +		for_each_sg(sg, s, nents, i) {
>> +			direct = can_map_direct(dev, sg_phys(s) + s->offset + s->length);
>> +			if (!direct)
>> +				break;
>> +		}
>> +		if (direct)
>> +			ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
>> +		else
>> +			ents = ops->map_sg(dev, sg, nents, dir, attrs);
>> +	}
>> +#endif
> 
> This needs to go into a helper as well.  I think the same style as
> above would work pretty nicely as well:

Yup. I'll repost v3 soon with this change. Thanks for the review.


> 
>   	if (dma_map_direct(dev, ops) ||
> 	    arch_dma_map_sg_direct(dev, sg, nents))
>   		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
>   	else
>   		ents = ops->map_sg(dev, sg, nents, dir, attrs);
> 
>> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
>> +	if (dev->bus_dma_limit) {
>> +		struct scatterlist *s;
>> +		bool direct = true;
>> +		int i;
>> +
>> +		for_each_sg(sg, s, nents, i) {
>> +			direct = dma_handle_direct(dev, s->dma_address + s->length);
>> +			if (!direct)
>> +				break;
>> +		}
>> +		if (direct) {
>> +			dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
>> +			return;
>> +		}
>> +	}
>> +#endif
> 
> One more time here..
>
Christoph Hellwig Oct. 28, 2020, 5:21 p.m. UTC | #3
On Wed, Oct 28, 2020 at 05:55:23PM +1100, Alexey Kardashevskiy wrote:
>
> It is passing an address of the end of the mapped area so passing a page 
> struct means passing page and offset which is an extra parameter and we do 
> not want to do anything with the page in those hooks anyway so I'd keep it 
> as is.
>
>
>> and
>>     maybe even hide the dma_map_direct inside it.
>
> Call dma_map_direct() from arch_dma_map_page_direct() if 
> arch_dma_map_page_direct() is defined? Seems suboptimal as it is going to 
> be bypass=true in most cases and we save one call by avoiding calling 
> arch_dma_map_page_direct(). Unless I missed something?

C does not even evaluate the right hand side of a || expression if the
left hand evaluates to true.
Alexey Kardashevskiy Oct. 28, 2020, 11:11 p.m. UTC | #4
On 29/10/2020 04:21, Christoph Hellwig wrote:
> On Wed, Oct 28, 2020 at 05:55:23PM +1100, Alexey Kardashevskiy wrote:
>>
>> It is passing an address of the end of the mapped area so passing a page
>> struct means passing page and offset which is an extra parameter and we do
>> not want to do anything with the page in those hooks anyway so I'd keep it
>> as is.
>>
>>
>>> and
>>>      maybe even hide the dma_map_direct inside it.
>>
>> Call dma_map_direct() from arch_dma_map_page_direct() if
>> arch_dma_map_page_direct() is defined? Seems suboptimal as it is going to
>> be bypass=true in most cases and we save one call by avoiding calling
>> arch_dma_map_page_direct(). Unless I missed something?
> 
> C does not even evaluate the right hand side of a || expression if the
> left hand evaluates to true.

Right, this is what I meant. dma_map_direct() is inline and fast so I 
did not want it inside the arch hook which is not inline.
diff mbox series

Patch

diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 51bb8fa8eb89..0f4f998e6c72 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -137,6 +137,18 @@  static inline bool dma_map_direct(struct device *dev,
 	return dma_go_direct(dev, *dev->dma_mask, ops);
 }
 
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+static inline bool can_map_direct(struct device *dev, phys_addr_t addr)
+{
+	return dev->bus_dma_limit >= phys_to_dma(dev, addr);
+}
+
+static inline bool dma_handle_direct(struct device *dev, dma_addr_t dma_handle)
+{
+	return dma_handle >= dev->archdata.dma_offset;
+}
+#endif
+
 dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
 		size_t offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
@@ -151,6 +163,11 @@  dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
 
 	if (dma_map_direct(dev, ops))
 		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+	else if (dev->bus_dma_limit &&
+		 can_map_direct(dev, (phys_addr_t) page_to_phys(page) + offset + size))
+		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+#endif
 	else
 		addr = ops->map_page(dev, page, offset, size, dir, attrs);
 	debug_dma_map_page(dev, page, offset, size, dir, addr);
@@ -167,6 +184,10 @@  void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
 	BUG_ON(!valid_dma_direction(dir));
 	if (dma_map_direct(dev, ops))
 		dma_direct_unmap_page(dev, addr, size, dir, attrs);
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+	else if (dev->bus_dma_limit && dma_handle_direct(dev, addr + size))
+		dma_direct_unmap_page(dev, addr, size, dir, attrs);
+#endif
 	else if (ops->unmap_page)
 		ops->unmap_page(dev, addr, size, dir, attrs);
 	debug_dma_unmap_page(dev, addr, size, dir);
@@ -190,6 +211,23 @@  int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
 
 	if (dma_map_direct(dev, ops))
 		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+	else if (dev->bus_dma_limit) {
+		struct scatterlist *s;
+		bool direct = true;
+		int i;
+
+		for_each_sg(sg, s, nents, i) {
+			direct = can_map_direct(dev, sg_phys(s) + s->offset + s->length);
+			if (!direct)
+				break;
+		}
+		if (direct)
+			ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
+		else
+			ents = ops->map_sg(dev, sg, nents, dir, attrs);
+	}
+#endif
 	else
 		ents = ops->map_sg(dev, sg, nents, dir, attrs);
 	BUG_ON(ents < 0);
@@ -207,9 +245,28 @@  void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
 
 	BUG_ON(!valid_dma_direction(dir));
 	debug_dma_unmap_sg(dev, sg, nents, dir);
-	if (dma_map_direct(dev, ops))
+	if (dma_map_direct(dev, ops)) {
 		dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
-	else if (ops->unmap_sg)
+		return;
+	}
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+	if (dev->bus_dma_limit) {
+		struct scatterlist *s;
+		bool direct = true;
+		int i;
+
+		for_each_sg(sg, s, nents, i) {
+			direct = dma_handle_direct(dev, s->dma_address + s->length);
+			if (!direct)
+				break;
+		}
+		if (direct) {
+			dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
+			return;
+		}
+	}
+#endif
+	if (ops->unmap_sg)
 		ops->unmap_sg(dev, sg, nents, dir, attrs);
 }
 EXPORT_SYMBOL(dma_unmap_sg_attrs);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index c99de4a21458..02fa174fbdec 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -20,6 +20,10 @@  config DMA_OPS
 config DMA_OPS_BYPASS
 	bool
 
+# IOMMU driver limited by a DMA window size may switch between bypass and window
+config DMA_OPS_BYPASS_BUS_LIMIT
+	bool
+
 config NEED_SG_DMA_LENGTH
 	bool