diff mbox series

[v2,2/4] ARC: allow to use IOC and non-IOC DMA devices simultaneously

Message ID 20180730162636.3556-3-Eugeniy.Paltsev@synopsys.com
State New
Headers show
Series ARC: allow to use IOC and non-IOC DMA devices simultaneously | expand

Commit Message

Eugeniy Paltsev July 30, 2018, 4:26 p.m. UTC
The ARC HS processor provides an IOC port (I/O coherency bus
interface) that allows external devices such as DMA devices
to access memory through the cache hierarchy, providing
coherency between I/O transactions and the complete memory
hierarchy.

Some recent SoC with ARC HS (like HSDK) allow to select bus
port (IOC or non-IOC port) for connecting DMA devices in runtime.

With this patch we can use both HW-coherent and regular DMA
peripherals simultaneously.

For example we can connect USB and SDIO controllers through IOC port
(so we don't need to need to maintain cache coherency for these
devices manualy. All cache sync ops will be nop)
And we can connect Ethernet directly to RAM port (so we had to
maintain cache coherency manualy. Cache sync ops will be real
flush/invalidate operations)

Cache ops are set per-device and depends on "dma-coherent" device
tree property:
"dma_noncoherent_ops" are used if no "dma-coherent" property is
present (or IOC is disabled)
"dma_direct_ops" are used if "dma-coherent" property is present.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
---
Changes v1->v2 (Thanks to Christoph):
 * Don't select DMA_DIRECT_OPS explicitly as it is already selected by
   DMA_NONCOHERENT_OPS

 arch/arc/include/asm/dma-mapping.h | 13 +++++++++++++
 arch/arc/mm/cache.c                | 17 ++---------------
 arch/arc/mm/dma.c                  | 34 +++++++++++++++++++---------------
 3 files changed, 34 insertions(+), 30 deletions(-)
 create mode 100644 arch/arc/include/asm/dma-mapping.h

Comments

Vineet Gupta Aug. 13, 2018, 4:24 p.m. UTC | #1
On 07/30/2018 09:26 AM, Eugeniy Paltsev wrote:
> @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void)
>  	if (is_isa_arcv2() && ioc_enable)
>  		arc_ioc_setup();
>  
> -	if (is_isa_arcv2() && ioc_enable) {
> -		__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
> -		__dma_cache_inv = __dma_cache_inv_ioc;
> -		__dma_cache_wback = __dma_cache_wback_ioc;
> -	} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
> +	if (is_isa_arcv2() && l2_line_sz && slc_enable) {
>  		__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
>  		__dma_cache_inv = __dma_cache_inv_slc;
>  		__dma_cache_wback = __dma_cache_wback_slc;
> diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
> index cefb776a99ff..4d1466905e48 100644
> --- a/arch/arc/mm/dma.c
> +++ b/arch/arc/mm/dma.c
> @@ -33,19 +33,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
>  	if (!page)
>  		return NULL;
>  
> -	/*
> -	 * IOC relies on all data (even coherent DMA data) being in cache
> -	 * Thus allocate normal cached memory
> -	 *
> -	 * The gains with IOC are two pronged:
> -	 *   -For streaming data, elides need for cache maintenance, saving
> -	 *    cycles in flush code, and bus bandwidth as all the lines of a
> -	 *    buffer need to be flushed out to memory
> -	 *   -For coherent data, Read/Write to buffers terminate early in cache
> -	 *   (vs. always going to memory - thus are faster)
> -	 */
> -	if ((is_isa_arcv2() && ioc_enable) ||
> -	    (attrs & DMA_ATTR_NON_CONSISTENT))
> +	if (attrs & DMA_ATTR_NON_CONSISTENT)
>  		need_coh = 0;
>  
>  	/*
> @@ -95,8 +83,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
>  	struct page *page = virt_to_page(paddr);
>  	int is_non_coh = 1;
>  
> -	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
> -			(is_isa_arcv2() && ioc_enable);
> +	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT);
>  
>  	if (PageHighMem(page) || !is_non_coh)
>  		iounmap((void __force __iomem *)vaddr);
> @@ -182,3 +169,20 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
>  		break;
>  	}
>  }

I think we have some shenanigans with @ioc_enable now.
Do note that it was more of a debug hack using when the hw feature was introduced
to be able to run same kernel on various FPGA bitfiles but just flicking a global
variable via the debugger.

So per code below, if @ioc_enable is NOT set, we still use software assisted cache
maintenance, but dma_{alloc,free} don't do use that variable. Have you tried
testing the combination when @ioc_enable is set to 0 before boot ? And is that works ?

> +
> +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> +			const struct iommu_ops *iommu, bool coherent)
> +{
> +	/*
> +	 * IOC hardware snoops all DMA traffic keeping the caches consistent
> +	 * with memory - eliding need for any explicit cache maintenance of
> +	 * DMA buffers - so we can use dma_direct cache ops.
> +	 */
> +	if (is_isa_arcv2() && ioc_enable && coherent) {
> +		set_dma_ops(dev, &dma_direct_ops);
> +		dev_info(dev, "use dma_direct_ops cache ops\n");
> +	} else {
> +		set_dma_ops(dev, &dma_noncoherent_ops);
> +		dev_info(dev, "use dma_noncoherent_ops cache ops\n");
> +	}
> +}
Eugeniy Paltsev Aug. 13, 2018, 5:08 p.m. UTC | #2
On Mon, 2018-08-13 at 16:24 +0000, Vineet Gupta wrote:
> On 07/30/2018 09:26 AM, Eugeniy Paltsev wrote:
> > @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void)
> >  	if (is_isa_arcv2() && ioc_enable)
> >  		arc_ioc_setup();
> >  
> > -	if (is_isa_arcv2() && ioc_enable) {
> > -		__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
> > -		__dma_cache_inv = __dma_cache_inv_ioc;
> > -		__dma_cache_wback = __dma_cache_wback_ioc;
> > -	} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
> > +	if (is_isa_arcv2() && l2_line_sz && slc_enable) {
> >  		__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
> >  		__dma_cache_inv = __dma_cache_inv_slc;
> >  		__dma_cache_wback = __dma_cache_wback_slc;
> > diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
> > index cefb776a99ff..4d1466905e48 100644
> > --- a/arch/arc/mm/dma.c
> > +++ b/arch/arc/mm/dma.c
> > @@ -33,19 +33,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> >  	if (!page)
> >  		return NULL;
> >  
> > -	/*
> > -	 * IOC relies on all data (even coherent DMA data) being in cache
> > -	 * Thus allocate normal cached memory
> > -	 *
> > -	 * The gains with IOC are two pronged:
> > -	 *   -For streaming data, elides need for cache maintenance, saving
> > -	 *    cycles in flush code, and bus bandwidth as all the lines of a
> > -	 *    buffer need to be flushed out to memory
> > -	 *   -For coherent data, Read/Write to buffers terminate early in cache
> > -	 *   (vs. always going to memory - thus are faster)
> > -	 */
> > -	if ((is_isa_arcv2() && ioc_enable) ||
> > -	    (attrs & DMA_ATTR_NON_CONSISTENT))
> > +	if (attrs & DMA_ATTR_NON_CONSISTENT)
> >  		need_coh = 0;
> >  
> >  	/*
> > @@ -95,8 +83,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> >  	struct page *page = virt_to_page(paddr);
> >  	int is_non_coh = 1;
> >  
> > -	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
> > -			(is_isa_arcv2() && ioc_enable);
> > +	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT);
> >  
> >  	if (PageHighMem(page) || !is_non_coh)
> >  		iounmap((void __force __iomem *)vaddr);
> > @@ -182,3 +169,20 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
> >  		break;
> >  	}
> >  }
> 
> I think we have some shenanigans with @ioc_enable now.
> Do note that it was more of a debug hack using when the hw feature was introduced
> to be able to run same kernel on various FPGA bitfiles but just flicking a global
> variable via the debugger.
> 
> So per code below, if @ioc_enable is NOT set, we still use software assisted cache
> maintenance, but dma_{alloc,free} don't do use that variable. Have you tried
> testing the combination when @ioc_enable is set to 0 before boot ? And is that works ?

Yep, I tested that.
And it works fine with both @ioc_enable == 0 and @ioc_enable == 1
Note that we check this variable in arch_setup_dma_ops() function now.

So this arch_dma_{alloc,free} are used ONLY in case of software assisted cache maintenance.
That's why we had to do MMU mapping to enforce non-cachability regardless of @ioc_enable.


Previously [before this patch] we used this ops for both HW/SW assisted cache maintenance
that's why we checked @ioc_enable in arch_dma_{alloc,free}. (in case of HW assisted cache
maintenance we only allocate memory, and in case of SW assisted cache maintenance we 
allocate memory and do MMU mapping to enforce non-cachability)

> > +
> > +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> > +			const struct iommu_ops *iommu, bool coherent)
> > +{
> > +	/*
> > +	 * IOC hardware snoops all DMA traffic keeping the caches consistent
> > +	 * with memory - eliding need for any explicit cache maintenance of
> > +	 * DMA buffers - so we can use dma_direct cache ops.
> > +	 */
> > +	if (is_isa_arcv2() && ioc_enable && coherent) {
> > +		set_dma_ops(dev, &dma_direct_ops);
> > +		dev_info(dev, "use dma_direct_ops cache ops\n");
> > +	} else {
> > +		set_dma_ops(dev, &dma_noncoherent_ops);
> > +		dev_info(dev, "use dma_noncoherent_ops cache ops\n");
> > +	}
> > +}
> 
>
Vineet Gupta Aug. 20, 2018, 10:34 p.m. UTC | #3
On 08/13/2018 10:08 AM, Eugeniy Paltsev wrote:
> On Mon, 2018-08-13 at 16:24 +0000, Vineet Gupta wrote:
>> On 07/30/2018 09:26 AM, Eugeniy Paltsev wrote:
>>> @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void)
>>>  	if (is_isa_arcv2() && ioc_enable)
>>>  		arc_ioc_setup();
>>>  
>>> -	if (is_isa_arcv2() && ioc_enable) {
>>> -		__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
>>> -		__dma_cache_inv = __dma_cache_inv_ioc;
>>> -		__dma_cache_wback = __dma_cache_wback_ioc;
>>> -	} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {

For the casual reader I'd add a comment why this was deleted.

>>> +	if (is_isa_arcv2() && l2_line_sz && slc_enable) {
>>>  		__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
>>>  		__dma_cache_inv = __dma_cache_inv_slc;
>>>  		__dma_cache_wback = __dma_cache_wback_slc;

[snip]

>>>  
>>> -	/*
>>> -	 * IOC relies on all data (even coherent DMA data) being in cache
>>> -	 * Thus allocate normal cached memory
>>> -	 *
>>> -	 * The gains with IOC are two pronged:
>>> -	 *   -For streaming data, elides need for cache maintenance, saving
>>> -	 *    cycles in flush code, and bus bandwidth as all the lines of a
>>> -	 *    buffer need to be flushed out to memory
>>> -	 *   -For coherent data, Read/Write to buffers terminate early in cache
>>> -	 *   (vs. always going to memory - thus are faster)
>>> -	 */
>>> -	if ((is_isa_arcv2() && ioc_enable) ||
>>> -	    (attrs & DMA_ATTR_NON_CONSISTENT))
>>> +	if (attrs & DMA_ATTR_NON_CONSISTENT)
>>>  		need_coh = 0;
>>>  

[snip]

> Yep, I tested that.
> And it works fine with both @ioc_enable == 0 and @ioc_enable == 1
> Note that we check this variable in arch_setup_dma_ops() function now.
> 
> So this arch_dma_{alloc,free} are used ONLY in case of software assisted cache maintenance.
> That's why we had to do MMU mapping to enforce non-cachability regardless of @ioc_enable.

Reading kernel/dma/* I see what you mean. We check @ioc_enable at the time of
registering the dma op for coherent vs. non coherent case, so there's common vs.
ARC versions of alloc/free for coherent vs. noncoherent. But then I'm curious why
do we bother to check the following in new arch_dma_(alloc|free) at all.

	if (attrs & DMA_ATTR_NON_CONSISTENT)

Isn't it supposed to be NON_CONSISTENT always given the way new code works ?
Eugeniy Paltsev Aug. 22, 2018, 6:40 p.m. UTC | #4
Hi Vineet,

On Mon, 2018-08-20 at 15:34 -0700, Vineet Gupta wrote:
> On 08/13/2018 10:08 AM, Eugeniy Paltsev wrote:
> > On Mon, 2018-08-13 at 16:24 +0000, Vineet Gupta wrote:
> > > On 07/30/2018 09:26 AM, Eugeniy Paltsev wrote:
> > > > @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void)
> > > >  	if (is_isa_arcv2() && ioc_enable)
> > > >  		arc_ioc_setup();
> > > >  
> > > > -	if (is_isa_arcv2() && ioc_enable) {
> > > > -		__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
> > > > -		__dma_cache_inv = __dma_cache_inv_ioc;
> > > > -		__dma_cache_wback = __dma_cache_wback_ioc;
> > > > -	} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
> 
> For the casual reader I'd add a comment why this was deleted.
Do you mean comment in commit description or comment right in sources?

> 
> > > > +	if (is_isa_arcv2() && l2_line_sz && slc_enable) {
> > > >  		__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
> > > >  		__dma_cache_inv = __dma_cache_inv_slc;
> > > >  		__dma_cache_wback = __dma_cache_wback_slc;
> 
> [snip]
> 
> > > >  
> > > > -	/*
> > > > -	 * IOC relies on all data (even coherent DMA data) being in cache
> > > > -	 * Thus allocate normal cached memory
> > > > -	 *
> > > > -	 * The gains with IOC are two pronged:
> > > > -	 *   -For streaming data, elides need for cache maintenance, saving
> > > > -	 *    cycles in flush code, and bus bandwidth as all the lines of a
> > > > -	 *    buffer need to be flushed out to memory
> > > > -	 *   -For coherent data, Read/Write to buffers terminate early in cache
> > > > -	 *   (vs. always going to memory - thus are faster)
> > > > -	 */
> > > > -	if ((is_isa_arcv2() && ioc_enable) ||
> > > > -	    (attrs & DMA_ATTR_NON_CONSISTENT))
> > > > +	if (attrs & DMA_ATTR_NON_CONSISTENT)
> > > >  		need_coh = 0;
> > > >  
> 
> [snip]
> 
> > Yep, I tested that.
> > And it works fine with both @ioc_enable == 0 and @ioc_enable == 1
> > Note that we check this variable in arch_setup_dma_ops() function now.
> > 
> > So this arch_dma_{alloc,free} are used ONLY in case of software assisted cache maintenance.
> > That's why we had to do MMU mapping to enforce non-cachability regardless of @ioc_enable.
> 
> Reading kernel/dma/* I see what you mean. We check @ioc_enable at the time of
> registering the dma op for coherent vs. non coherent case, so there's common vs.
> ARC versions of alloc/free for coherent vs. noncoherent.

Just to be sure that we understand both each other and source code correctly: 
- In coherent case we use dma_direct_* ops which doesn't use ARC alloc functions (arch_dma_{alloc|free})
- In non-coherent case we use dma_noncoherent_* ops which uses ARC alloc functions (arch_dma_{alloc|free})

> But then I'm curious why
> do we bother to check the following in new arch_dma_(alloc|free) at all.
> 
> 	if (attrs & DMA_ATTR_NON_CONSISTENT)
> 
> Isn't it supposed to be NON_CONSISTENT always given the way new code works ?

DMA_ATTR_NON_CONSISTENT flag is not related to IOC topic.
It is a flag which we can pass to dma_{alloc|free}_attrs function from driver side.

According to documentation:
  DMA_ATTR_NON_CONSISTENT: Lets the platform to choose to return either
  consistent or non-consistent memory as it sees fit.

We check this flag in arch_dma_alloc (which are used in non-coherent case) to
skip MMU mapping if we are advertised that consistency is not required.

So, actually we can get rid of this flag checking in arch_dma_alloc and 
simply always do MMU mapping to enforce non-cachability and return
non-cacheable memory even if DMA_ATTR_NON_CONSISTENT is passed.
But I don't sure we want to do that.

BTW: dma_alloc_coherent is simply dma_alloc_attrs with attrs == 0.
Christoph Hellwig Aug. 23, 2018, 2:05 p.m. UTC | #5
Btw, given that I assume this is 4.20 material now, any chance we
could merge it through the dma-mapping tree?  I have some major changes
pending that would clash if done in a different tree, so I'd rather
get it all together.

> We check this flag in arch_dma_alloc (which are used in non-coherent case) to
> skip MMU mapping if we are advertised that consistency is not required.
> 
> So, actually we can get rid of this flag checking in arch_dma_alloc and 
> simply always do MMU mapping to enforce non-cachability and return
> non-cacheable memory even if DMA_ATTR_NON_CONSISTENT is passed.
> But I don't sure we want to do that.

I plan to kill this flag for 4.20 (or 4.20 at latest) in favor
of a better interface.  But your implementation looks ok, so I'm
fine with keeping it for now.
Vineet Gupta Sept. 4, 2018, 6:13 p.m. UTC | #6
Hi,

On 08/22/2018 11:40 AM, Eugeniy Paltsev wrote:
>
>> Reading kernel/dma/* I see what you mean. We check @ioc_enable at the time of
>> registering the dma op for coherent vs. non coherent case, so there's common vs.
>> ARC versions of alloc/free for coherent vs. noncoherent.
> Just to be sure that we understand both each other and source code correctly: 
> - In coherent case we use dma_direct_* ops which doesn't use ARC alloc functions (arch_dma_{alloc|free})
> - In non-coherent case we use dma_noncoherent_* ops which uses ARC alloc functions (arch_dma_{alloc|free})

Right I see that.

>> But then I'm curious why
>> do we bother to check the following in new arch_dma_(alloc|free) at all.
>>
>> 	if (attrs & DMA_ATTR_NON_CONSISTENT)
>>
>> Isn't it supposed to be NON_CONSISTENT always given the way new code works ?
> DMA_ATTR_NON_CONSISTENT flag is not related to IOC topic.
> It is a flag which we can pass to dma_{alloc|free}_attrs function from driver side.
>
> According to documentation:
>   DMA_ATTR_NON_CONSISTENT: Lets the platform to choose to return either
>   consistent or non-consistent memory as it sees fit.

Right I'd them mixed up. But then in case of direct dma ops, the attr is simply
ignored in dma_alloc_attrs() -> dma_direct_alloc(). User always gets coherent memory.

>
> We check this flag in arch_dma_alloc (which are used in non-coherent case) to
> skip MMU mapping if we are advertised that consistency is not required.
>
> So, actually we can get rid of this flag checking in arch_dma_alloc and 
> simply always do MMU mapping to enforce non-cachability and return
> non-cacheable memory even if DMA_ATTR_NON_CONSISTENT is passed.
> But I don't sure we want to do that.
>
> BTW: dma_alloc_coherent is simply dma_alloc_attrs with attrs == 0.
>
diff mbox series

Patch

diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h
new file mode 100644
index 000000000000..c946c0a83e76
--- /dev/null
+++ b/arch/arc/include/asm/dma-mapping.h
@@ -0,0 +1,13 @@ 
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2018 Synopsys, Inc. (www.synopsys.com)
+
+#ifndef ASM_ARC_DMA_MAPPING_H
+#define ASM_ARC_DMA_MAPPING_H
+
+#include <asm-generic/dma-mapping.h>
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent);
+#define arch_setup_dma_ops arch_setup_dma_ops
+
+#endif
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index b95365e1253a..dac12afbb93a 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -65,7 +65,7 @@  char *arc_cache_mumbojumbo(int c, char *buf, int len)
 
 	n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
 		       perip_base,
-		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency "));
+		       IS_AVAIL3(ioc_exists, ioc_enable, ", per device IO-Coherency "));
 
 	return buf;
 }
@@ -896,15 +896,6 @@  static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz)
 	slc_op(start, sz, OP_FLUSH);
 }
 
-/*
- * DMA ops for systems with IOC
- * IOC hardware snoops all DMA traffic keeping the caches consistent with
- * memory - eliding need for any explicit cache maintenance of DMA buffers
- */
-static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {}
-static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {}
-static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {}
-
 /*
  * Exported DMA API
  */
@@ -1263,11 +1254,7 @@  void __init arc_cache_init_master(void)
 	if (is_isa_arcv2() && ioc_enable)
 		arc_ioc_setup();
 
-	if (is_isa_arcv2() && ioc_enable) {
-		__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
-		__dma_cache_inv = __dma_cache_inv_ioc;
-		__dma_cache_wback = __dma_cache_wback_ioc;
-	} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
+	if (is_isa_arcv2() && l2_line_sz && slc_enable) {
 		__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
 		__dma_cache_inv = __dma_cache_inv_slc;
 		__dma_cache_wback = __dma_cache_wback_slc;
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index cefb776a99ff..4d1466905e48 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -33,19 +33,7 @@  void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	if (!page)
 		return NULL;
 
-	/*
-	 * IOC relies on all data (even coherent DMA data) being in cache
-	 * Thus allocate normal cached memory
-	 *
-	 * The gains with IOC are two pronged:
-	 *   -For streaming data, elides need for cache maintenance, saving
-	 *    cycles in flush code, and bus bandwidth as all the lines of a
-	 *    buffer need to be flushed out to memory
-	 *   -For coherent data, Read/Write to buffers terminate early in cache
-	 *   (vs. always going to memory - thus are faster)
-	 */
-	if ((is_isa_arcv2() && ioc_enable) ||
-	    (attrs & DMA_ATTR_NON_CONSISTENT))
+	if (attrs & DMA_ATTR_NON_CONSISTENT)
 		need_coh = 0;
 
 	/*
@@ -95,8 +83,7 @@  void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 	struct page *page = virt_to_page(paddr);
 	int is_non_coh = 1;
 
-	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
-			(is_isa_arcv2() && ioc_enable);
+	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT);
 
 	if (PageHighMem(page) || !is_non_coh)
 		iounmap((void __force __iomem *)vaddr);
@@ -182,3 +169,20 @@  void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 		break;
 	}
 }
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent)
+{
+	/*
+	 * IOC hardware snoops all DMA traffic keeping the caches consistent
+	 * with memory - eliding need for any explicit cache maintenance of
+	 * DMA buffers - so we can use dma_direct cache ops.
+	 */
+	if (is_isa_arcv2() && ioc_enable && coherent) {
+		set_dma_ops(dev, &dma_direct_ops);
+		dev_info(dev, "use dma_direct_ops cache ops\n");
+	} else {
+		set_dma_ops(dev, &dma_noncoherent_ops);
+		dev_info(dev, "use dma_noncoherent_ops cache ops\n");
+	}
+}