Message ID | 20180730162636.3556-3-Eugeniy.Paltsev@synopsys.com |
---|---|
State | New |
Headers | show |
Series | ARC: allow to use IOC and non-IOC DMA devices simultaneously | expand |
On 07/30/2018 09:26 AM, Eugeniy Paltsev wrote: > @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void) > if (is_isa_arcv2() && ioc_enable) > arc_ioc_setup(); > > - if (is_isa_arcv2() && ioc_enable) { > - __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; > - __dma_cache_inv = __dma_cache_inv_ioc; > - __dma_cache_wback = __dma_cache_wback_ioc; > - } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { > + if (is_isa_arcv2() && l2_line_sz && slc_enable) { > __dma_cache_wback_inv = __dma_cache_wback_inv_slc; > __dma_cache_inv = __dma_cache_inv_slc; > __dma_cache_wback = __dma_cache_wback_slc; > diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c > index cefb776a99ff..4d1466905e48 100644 > --- a/arch/arc/mm/dma.c > +++ b/arch/arc/mm/dma.c > @@ -33,19 +33,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, > if (!page) > return NULL; > > - /* > - * IOC relies on all data (even coherent DMA data) being in cache > - * Thus allocate normal cached memory > - * > - * The gains with IOC are two pronged: > - * -For streaming data, elides need for cache maintenance, saving > - * cycles in flush code, and bus bandwidth as all the lines of a > - * buffer need to be flushed out to memory > - * -For coherent data, Read/Write to buffers terminate early in cache > - * (vs. always going to memory - thus are faster) > - */ > - if ((is_isa_arcv2() && ioc_enable) || > - (attrs & DMA_ATTR_NON_CONSISTENT)) > + if (attrs & DMA_ATTR_NON_CONSISTENT) > need_coh = 0; > > /* > @@ -95,8 +83,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, > struct page *page = virt_to_page(paddr); > int is_non_coh = 1; > > - is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || > - (is_isa_arcv2() && ioc_enable); > + is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT); > > if (PageHighMem(page) || !is_non_coh) > iounmap((void __force __iomem *)vaddr); > @@ -182,3 +169,20 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, > break; > } > } I think we have some shenanigans with @ioc_enable now. Do note that it was more of a debug hack using when the hw feature was introduced to be able to run same kernel on various FPGA bitfiles but just flicking a global variable via the debugger. So per code below, if @ioc_enable is NOT set, we still use software assisted cache maintenance, but dma_{alloc,free} don't do use that variable. Have you tried testing the combination when @ioc_enable is set to 0 before boot ? And is that works ? > + > +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, > + const struct iommu_ops *iommu, bool coherent) > +{ > + /* > + * IOC hardware snoops all DMA traffic keeping the caches consistent > + * with memory - eliding need for any explicit cache maintenance of > + * DMA buffers - so we can use dma_direct cache ops. > + */ > + if (is_isa_arcv2() && ioc_enable && coherent) { > + set_dma_ops(dev, &dma_direct_ops); > + dev_info(dev, "use dma_direct_ops cache ops\n"); > + } else { > + set_dma_ops(dev, &dma_noncoherent_ops); > + dev_info(dev, "use dma_noncoherent_ops cache ops\n"); > + } > +}
On Mon, 2018-08-13 at 16:24 +0000, Vineet Gupta wrote: > On 07/30/2018 09:26 AM, Eugeniy Paltsev wrote: > > @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void) > > if (is_isa_arcv2() && ioc_enable) > > arc_ioc_setup(); > > > > - if (is_isa_arcv2() && ioc_enable) { > > - __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; > > - __dma_cache_inv = __dma_cache_inv_ioc; > > - __dma_cache_wback = __dma_cache_wback_ioc; > > - } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { > > + if (is_isa_arcv2() && l2_line_sz && slc_enable) { > > __dma_cache_wback_inv = __dma_cache_wback_inv_slc; > > __dma_cache_inv = __dma_cache_inv_slc; > > __dma_cache_wback = __dma_cache_wback_slc; > > diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c > > index cefb776a99ff..4d1466905e48 100644 > > --- a/arch/arc/mm/dma.c > > +++ b/arch/arc/mm/dma.c > > @@ -33,19 +33,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, > > if (!page) > > return NULL; > > > > - /* > > - * IOC relies on all data (even coherent DMA data) being in cache > > - * Thus allocate normal cached memory > > - * > > - * The gains with IOC are two pronged: > > - * -For streaming data, elides need for cache maintenance, saving > > - * cycles in flush code, and bus bandwidth as all the lines of a > > - * buffer need to be flushed out to memory > > - * -For coherent data, Read/Write to buffers terminate early in cache > > - * (vs. always going to memory - thus are faster) > > - */ > > - if ((is_isa_arcv2() && ioc_enable) || > > - (attrs & DMA_ATTR_NON_CONSISTENT)) > > + if (attrs & DMA_ATTR_NON_CONSISTENT) > > need_coh = 0; > > > > /* > > @@ -95,8 +83,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, > > struct page *page = virt_to_page(paddr); > > int is_non_coh = 1; > > > > - is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || > > - (is_isa_arcv2() && ioc_enable); > > + is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT); > > > > if (PageHighMem(page) || !is_non_coh) > > iounmap((void __force __iomem *)vaddr); > > @@ -182,3 +169,20 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, > > break; > > } > > } > > I think we have some shenanigans with @ioc_enable now. > Do note that it was more of a debug hack using when the hw feature was introduced > to be able to run same kernel on various FPGA bitfiles but just flicking a global > variable via the debugger. > > So per code below, if @ioc_enable is NOT set, we still use software assisted cache > maintenance, but dma_{alloc,free} don't do use that variable. Have you tried > testing the combination when @ioc_enable is set to 0 before boot ? And is that works ? Yep, I tested that. And it works fine with both @ioc_enable == 0 and @ioc_enable == 1 Note that we check this variable in arch_setup_dma_ops() function now. So this arch_dma_{alloc,free} are used ONLY in case of software assisted cache maintenance. That's why we had to do MMU mapping to enforce non-cachability regardless of @ioc_enable. Previously [before this patch] we used this ops for both HW/SW assisted cache maintenance that's why we checked @ioc_enable in arch_dma_{alloc,free}. (in case of HW assisted cache maintenance we only allocate memory, and in case of SW assisted cache maintenance we allocate memory and do MMU mapping to enforce non-cachability) > > + > > +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, > > + const struct iommu_ops *iommu, bool coherent) > > +{ > > + /* > > + * IOC hardware snoops all DMA traffic keeping the caches consistent > > + * with memory - eliding need for any explicit cache maintenance of > > + * DMA buffers - so we can use dma_direct cache ops. > > + */ > > + if (is_isa_arcv2() && ioc_enable && coherent) { > > + set_dma_ops(dev, &dma_direct_ops); > > + dev_info(dev, "use dma_direct_ops cache ops\n"); > > + } else { > > + set_dma_ops(dev, &dma_noncoherent_ops); > > + dev_info(dev, "use dma_noncoherent_ops cache ops\n"); > > + } > > +} > >
On 08/13/2018 10:08 AM, Eugeniy Paltsev wrote: > On Mon, 2018-08-13 at 16:24 +0000, Vineet Gupta wrote: >> On 07/30/2018 09:26 AM, Eugeniy Paltsev wrote: >>> @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void) >>> if (is_isa_arcv2() && ioc_enable) >>> arc_ioc_setup(); >>> >>> - if (is_isa_arcv2() && ioc_enable) { >>> - __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; >>> - __dma_cache_inv = __dma_cache_inv_ioc; >>> - __dma_cache_wback = __dma_cache_wback_ioc; >>> - } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { For the casual reader I'd add a comment why this was deleted. >>> + if (is_isa_arcv2() && l2_line_sz && slc_enable) { >>> __dma_cache_wback_inv = __dma_cache_wback_inv_slc; >>> __dma_cache_inv = __dma_cache_inv_slc; >>> __dma_cache_wback = __dma_cache_wback_slc; [snip] >>> >>> - /* >>> - * IOC relies on all data (even coherent DMA data) being in cache >>> - * Thus allocate normal cached memory >>> - * >>> - * The gains with IOC are two pronged: >>> - * -For streaming data, elides need for cache maintenance, saving >>> - * cycles in flush code, and bus bandwidth as all the lines of a >>> - * buffer need to be flushed out to memory >>> - * -For coherent data, Read/Write to buffers terminate early in cache >>> - * (vs. always going to memory - thus are faster) >>> - */ >>> - if ((is_isa_arcv2() && ioc_enable) || >>> - (attrs & DMA_ATTR_NON_CONSISTENT)) >>> + if (attrs & DMA_ATTR_NON_CONSISTENT) >>> need_coh = 0; >>> [snip] > Yep, I tested that. > And it works fine with both @ioc_enable == 0 and @ioc_enable == 1 > Note that we check this variable in arch_setup_dma_ops() function now. > > So this arch_dma_{alloc,free} are used ONLY in case of software assisted cache maintenance. > That's why we had to do MMU mapping to enforce non-cachability regardless of @ioc_enable. Reading kernel/dma/* I see what you mean. We check @ioc_enable at the time of registering the dma op for coherent vs. non coherent case, so there's common vs. ARC versions of alloc/free for coherent vs. noncoherent. But then I'm curious why do we bother to check the following in new arch_dma_(alloc|free) at all. if (attrs & DMA_ATTR_NON_CONSISTENT) Isn't it supposed to be NON_CONSISTENT always given the way new code works ?
Hi Vineet, On Mon, 2018-08-20 at 15:34 -0700, Vineet Gupta wrote: > On 08/13/2018 10:08 AM, Eugeniy Paltsev wrote: > > On Mon, 2018-08-13 at 16:24 +0000, Vineet Gupta wrote: > > > On 07/30/2018 09:26 AM, Eugeniy Paltsev wrote: > > > > @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void) > > > > if (is_isa_arcv2() && ioc_enable) > > > > arc_ioc_setup(); > > > > > > > > - if (is_isa_arcv2() && ioc_enable) { > > > > - __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; > > > > - __dma_cache_inv = __dma_cache_inv_ioc; > > > > - __dma_cache_wback = __dma_cache_wback_ioc; > > > > - } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { > > For the casual reader I'd add a comment why this was deleted. Do you mean comment in commit description or comment right in sources? > > > > > + if (is_isa_arcv2() && l2_line_sz && slc_enable) { > > > > __dma_cache_wback_inv = __dma_cache_wback_inv_slc; > > > > __dma_cache_inv = __dma_cache_inv_slc; > > > > __dma_cache_wback = __dma_cache_wback_slc; > > [snip] > > > > > > > > > - /* > > > > - * IOC relies on all data (even coherent DMA data) being in cache > > > > - * Thus allocate normal cached memory > > > > - * > > > > - * The gains with IOC are two pronged: > > > > - * -For streaming data, elides need for cache maintenance, saving > > > > - * cycles in flush code, and bus bandwidth as all the lines of a > > > > - * buffer need to be flushed out to memory > > > > - * -For coherent data, Read/Write to buffers terminate early in cache > > > > - * (vs. always going to memory - thus are faster) > > > > - */ > > > > - if ((is_isa_arcv2() && ioc_enable) || > > > > - (attrs & DMA_ATTR_NON_CONSISTENT)) > > > > + if (attrs & DMA_ATTR_NON_CONSISTENT) > > > > need_coh = 0; > > > > > > [snip] > > > Yep, I tested that. > > And it works fine with both @ioc_enable == 0 and @ioc_enable == 1 > > Note that we check this variable in arch_setup_dma_ops() function now. > > > > So this arch_dma_{alloc,free} are used ONLY in case of software assisted cache maintenance. > > That's why we had to do MMU mapping to enforce non-cachability regardless of @ioc_enable. > > Reading kernel/dma/* I see what you mean. We check @ioc_enable at the time of > registering the dma op for coherent vs. non coherent case, so there's common vs. > ARC versions of alloc/free for coherent vs. noncoherent. Just to be sure that we understand both each other and source code correctly: - In coherent case we use dma_direct_* ops which doesn't use ARC alloc functions (arch_dma_{alloc|free}) - In non-coherent case we use dma_noncoherent_* ops which uses ARC alloc functions (arch_dma_{alloc|free}) > But then I'm curious why > do we bother to check the following in new arch_dma_(alloc|free) at all. > > if (attrs & DMA_ATTR_NON_CONSISTENT) > > Isn't it supposed to be NON_CONSISTENT always given the way new code works ? DMA_ATTR_NON_CONSISTENT flag is not related to IOC topic. It is a flag which we can pass to dma_{alloc|free}_attrs function from driver side. According to documentation: DMA_ATTR_NON_CONSISTENT: Lets the platform to choose to return either consistent or non-consistent memory as it sees fit. We check this flag in arch_dma_alloc (which are used in non-coherent case) to skip MMU mapping if we are advertised that consistency is not required. So, actually we can get rid of this flag checking in arch_dma_alloc and simply always do MMU mapping to enforce non-cachability and return non-cacheable memory even if DMA_ATTR_NON_CONSISTENT is passed. But I don't sure we want to do that. BTW: dma_alloc_coherent is simply dma_alloc_attrs with attrs == 0.
Btw, given that I assume this is 4.20 material now, any chance we could merge it through the dma-mapping tree? I have some major changes pending that would clash if done in a different tree, so I'd rather get it all together. > We check this flag in arch_dma_alloc (which are used in non-coherent case) to > skip MMU mapping if we are advertised that consistency is not required. > > So, actually we can get rid of this flag checking in arch_dma_alloc and > simply always do MMU mapping to enforce non-cachability and return > non-cacheable memory even if DMA_ATTR_NON_CONSISTENT is passed. > But I don't sure we want to do that. I plan to kill this flag for 4.20 (or 4.20 at latest) in favor of a better interface. But your implementation looks ok, so I'm fine with keeping it for now.
Hi, On 08/22/2018 11:40 AM, Eugeniy Paltsev wrote: > >> Reading kernel/dma/* I see what you mean. We check @ioc_enable at the time of >> registering the dma op for coherent vs. non coherent case, so there's common vs. >> ARC versions of alloc/free for coherent vs. noncoherent. > Just to be sure that we understand both each other and source code correctly: > - In coherent case we use dma_direct_* ops which doesn't use ARC alloc functions (arch_dma_{alloc|free}) > - In non-coherent case we use dma_noncoherent_* ops which uses ARC alloc functions (arch_dma_{alloc|free}) Right I see that. >> But then I'm curious why >> do we bother to check the following in new arch_dma_(alloc|free) at all. >> >> if (attrs & DMA_ATTR_NON_CONSISTENT) >> >> Isn't it supposed to be NON_CONSISTENT always given the way new code works ? > DMA_ATTR_NON_CONSISTENT flag is not related to IOC topic. > It is a flag which we can pass to dma_{alloc|free}_attrs function from driver side. > > According to documentation: > DMA_ATTR_NON_CONSISTENT: Lets the platform to choose to return either > consistent or non-consistent memory as it sees fit. Right I'd them mixed up. But then in case of direct dma ops, the attr is simply ignored in dma_alloc_attrs() -> dma_direct_alloc(). User always gets coherent memory. > > We check this flag in arch_dma_alloc (which are used in non-coherent case) to > skip MMU mapping if we are advertised that consistency is not required. > > So, actually we can get rid of this flag checking in arch_dma_alloc and > simply always do MMU mapping to enforce non-cachability and return > non-cacheable memory even if DMA_ATTR_NON_CONSISTENT is passed. > But I don't sure we want to do that. > > BTW: dma_alloc_coherent is simply dma_alloc_attrs with attrs == 0. >
diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h new file mode 100644 index 000000000000..c946c0a83e76 --- /dev/null +++ b/arch/arc/include/asm/dma-mapping.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// (C) 2018 Synopsys, Inc. (www.synopsys.com) + +#ifndef ASM_ARC_DMA_MAPPING_H +#define ASM_ARC_DMA_MAPPING_H + +#include <asm-generic/dma-mapping.h> + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent); +#define arch_setup_dma_ops arch_setup_dma_ops + +#endif diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index b95365e1253a..dac12afbb93a 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -65,7 +65,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", perip_base, - IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); + IS_AVAIL3(ioc_exists, ioc_enable, ", per device IO-Coherency ")); return buf; } @@ -896,15 +896,6 @@ static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz) slc_op(start, sz, OP_FLUSH); } -/* - * DMA ops for systems with IOC - * IOC hardware snoops all DMA traffic keeping the caches consistent with - * memory - eliding need for any explicit cache maintenance of DMA buffers - */ -static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {} -static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {} -static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {} - /* * Exported DMA API */ @@ -1263,11 +1254,7 @@ void __init arc_cache_init_master(void) if (is_isa_arcv2() && ioc_enable) arc_ioc_setup(); - if (is_isa_arcv2() && ioc_enable) { - __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; - __dma_cache_inv = __dma_cache_inv_ioc; - __dma_cache_wback = __dma_cache_wback_ioc; - } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { + if (is_isa_arcv2() && l2_line_sz && slc_enable) { __dma_cache_wback_inv = __dma_cache_wback_inv_slc; __dma_cache_inv = __dma_cache_inv_slc; __dma_cache_wback = __dma_cache_wback_slc; diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index cefb776a99ff..4d1466905e48 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -33,19 +33,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!page) return NULL; - /* - * IOC relies on all data (even coherent DMA data) being in cache - * Thus allocate normal cached memory - * - * The gains with IOC are two pronged: - * -For streaming data, elides need for cache maintenance, saving - * cycles in flush code, and bus bandwidth as all the lines of a - * buffer need to be flushed out to memory - * -For coherent data, Read/Write to buffers terminate early in cache - * (vs. always going to memory - thus are faster) - */ - if ((is_isa_arcv2() && ioc_enable) || - (attrs & DMA_ATTR_NON_CONSISTENT)) + if (attrs & DMA_ATTR_NON_CONSISTENT) need_coh = 0; /* @@ -95,8 +83,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, struct page *page = virt_to_page(paddr); int is_non_coh = 1; - is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || - (is_isa_arcv2() && ioc_enable); + is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT); if (PageHighMem(page) || !is_non_coh) iounmap((void __force __iomem *)vaddr); @@ -182,3 +169,20 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, break; } } + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + /* + * IOC hardware snoops all DMA traffic keeping the caches consistent + * with memory - eliding need for any explicit cache maintenance of + * DMA buffers - so we can use dma_direct cache ops. + */ + if (is_isa_arcv2() && ioc_enable && coherent) { + set_dma_ops(dev, &dma_direct_ops); + dev_info(dev, "use dma_direct_ops cache ops\n"); + } else { + set_dma_ops(dev, &dma_noncoherent_ops); + dev_info(dev, "use dma_noncoherent_ops cache ops\n"); + } +}