diff mbox series

[13/31] nds32: DMA mapping API

Message ID 763e6a252de1ad8ccd28344fd0676ae2f92f796d.1510118606.git.green.hu@gmail.com
State Not Applicable, archived
Delegated to: David Miller
Headers show
Series Andes(nds32) Linux Kernel Port | expand

Commit Message

Greentime Hu Nov. 8, 2017, 5:55 a.m. UTC
From: Greentime Hu <greentime@andestech.com>

Signed-off-by: Vincent Chen <vincentc@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/include/asm/dma-mapping.h |   27 ++
 arch/nds32/kernel/dma.c              |  478 ++++++++++++++++++++++++++++++++++
 2 files changed, 505 insertions(+)
 create mode 100644 arch/nds32/include/asm/dma-mapping.h
 create mode 100644 arch/nds32/kernel/dma.c

Comments

Arnd Bergmann Nov. 8, 2017, 9:09 a.m. UTC | #1
On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu <green.hu@gmail.com> wrote:

> +static void consistent_sync(void *vaddr, size_t size, int direction)
> +{
> +       unsigned long start = (unsigned long)vaddr;
> +       unsigned long end = start + size;
> +
> +       switch (direction) {
> +       case DMA_FROM_DEVICE:   /* invalidate only */
> +               cpu_dma_inval_range(start, end);
> +               break;
> +       case DMA_TO_DEVICE:     /* writeback only */
> +               cpu_dma_wb_range(start, end);
> +               break;
> +       case DMA_BIDIRECTIONAL: /* writeback and invalidate */
> +               cpu_dma_wbinval_range(start, end);
> +               break;
> +       default:
> +               BUG();
> +       }
> +}

> +
> +static void
> +nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
> +                             size_t size, enum dma_data_direction dir)
> +{
> +       consistent_sync((void *)dma_to_virt(dev, handle), size, dir);
> +}
> +
> +static void
> +nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
> +                                size_t size, enum dma_data_direction dir)
> +{
> +       consistent_sync((void *)dma_to_virt(dev, handle), size, dir);
> +}

You do the same cache operations for _to_cpu and _to_device, which
usually works,
but is more expensive than you need. It's better to take the ownership into
account and only do what you need.

     Arnd
Greentime Hu Nov. 9, 2017, 7:12 a.m. UTC | #2
2017-11-08 17:09 GMT+08:00 Arnd Bergmann <arnd@arndb.de>:
> On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu <green.hu@gmail.com> wrote:
>
>> +static void consistent_sync(void *vaddr, size_t size, int direction)
>> +{
>> +       unsigned long start = (unsigned long)vaddr;
>> +       unsigned long end = start + size;
>> +
>> +       switch (direction) {
>> +       case DMA_FROM_DEVICE:   /* invalidate only */
>> +               cpu_dma_inval_range(start, end);
>> +               break;
>> +       case DMA_TO_DEVICE:     /* writeback only */
>> +               cpu_dma_wb_range(start, end);
>> +               break;
>> +       case DMA_BIDIRECTIONAL: /* writeback and invalidate */
>> +               cpu_dma_wbinval_range(start, end);
>> +               break;
>> +       default:
>> +               BUG();
>> +       }
>> +}
>
>> +
>> +static void
>> +nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
>> +                             size_t size, enum dma_data_direction dir)
>> +{
>> +       consistent_sync((void *)dma_to_virt(dev, handle), size, dir);
>> +}
>> +
>> +static void
>> +nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
>> +                                size_t size, enum dma_data_direction dir)
>> +{
>> +       consistent_sync((void *)dma_to_virt(dev, handle), size, dir);
>> +}
>
> You do the same cache operations for _to_cpu and _to_device, which
> usually works,
> but is more expensive than you need. It's better to take the ownership into
> account and only do what you need.
>

Thanks.

Like this?

static void
nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
                              size_t size, enum dma_data_direction dir)
{
        consistent_sync((void *)dma_to_virt(dev, handle), size,
DMA_FROM_DEVICE);
}

static void
nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
                                 size_t size, enum dma_data_direction dir)
{
        consistent_sync((void *)dma_to_virt(dev, handle), size,
DMA_TO_DEVICE);
}
Arnd Bergmann Nov. 9, 2017, 10:14 a.m. UTC | #3
On Thu, Nov 9, 2017 at 8:12 AM, Greentime Hu <green.hu@gmail.com> wrote:
> 2017-11-08 17:09 GMT+08:00 Arnd Bergmann <arnd@arndb.de>:
>> On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu <green.hu@gmail.com> wrote:
>>

>> You do the same cache operations for _to_cpu and _to_device, which
>> usually works,
>> but is more expensive than you need. It's better to take the ownership into
>> account and only do what you need.
>>
> Like this?
>
> static void
> nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
>                               size_t size, enum dma_data_direction dir)
> {
>         consistent_sync((void *)dma_to_virt(dev, handle), size,
> DMA_FROM_DEVICE);
> }
>
> static void
> nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
>                                  size_t size, enum dma_data_direction dir)
> {
>         consistent_sync((void *)dma_to_virt(dev, handle), size,
> DMA_TO_DEVICE);
> }

No, it's more complicated than that. You need to pass both the direction of the
DMA transaction and the ownership to consistent_sync(), and then do the
correct cache maintenance operation for each of the six combinations.

Which operation that is depends on the microarchitecture to some degree,
e.g. on machines that can load arbitrary cache lines during speculative
execution, you have to invalidate the caches during both
_for_device/FROM_DEVICE _for_cpu/FROM_DEVICE, while machines
without speculative execution can skip the second invalidation, they
only need to get rid of dirty cache lines before the DMA from device.

Usually you don't have to do a writeback during _for_cpu, since there
are no dirty cache lines after the _for_device operation.

It's not entirely clear what the correct behavior is for buffers that
are not cache line aligned, some architectures use wbinval instead
of inval for the _for_device/_FROM_DEVICE operation, on
any partial cache line, but you wouldn't want to do that on the
_for_cpu/_FROM_DEVICE operation.

       Arnd
Greentime Hu Nov. 10, 2017, 8:13 a.m. UTC | #4
2017-11-09 18:14 GMT+08:00 Arnd Bergmann <arnd@arndb.de>:
> On Thu, Nov 9, 2017 at 8:12 AM, Greentime Hu <green.hu@gmail.com> wrote:
>> 2017-11-08 17:09 GMT+08:00 Arnd Bergmann <arnd@arndb.de>:
>>> On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu <green.hu@gmail.com> wrote:
>>>
>
>>> You do the same cache operations for _to_cpu and _to_device, which
>>> usually works,
>>> but is more expensive than you need. It's better to take the ownership into
>>> account and only do what you need.
>>>
>> Like this?
>>
>> static void
>> nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
>>                               size_t size, enum dma_data_direction dir)
>> {
>>         consistent_sync((void *)dma_to_virt(dev, handle), size,
>> DMA_FROM_DEVICE);
>> }
>>
>> static void
>> nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
>>                                  size_t size, enum dma_data_direction dir)
>> {
>>         consistent_sync((void *)dma_to_virt(dev, handle), size,
>> DMA_TO_DEVICE);
>> }
>
> No, it's more complicated than that. You need to pass both the direction of the
> DMA transaction and the ownership to consistent_sync(), and then do the
> correct cache maintenance operation for each of the six combinations.
>
> Which operation that is depends on the microarchitecture to some degree,
> e.g. on machines that can load arbitrary cache lines during speculative
> execution, you have to invalidate the caches during both
> _for_device/FROM_DEVICE _for_cpu/FROM_DEVICE, while machines
> without speculative execution can skip the second invalidation, they
> only need to get rid of dirty cache lines before the DMA from device.
>
> Usually you don't have to do a writeback during _for_cpu, since there
> are no dirty cache lines after the _for_device operation.
>
> It's not entirely clear what the correct behavior is for buffers that
> are not cache line aligned, some architectures use wbinval instead
> of inval for the _for_device/_FROM_DEVICE operation, on
> any partial cache line, but you wouldn't want to do that on the
> _for_cpu/_FROM_DEVICE operation.


I get your point. I prefer to keep it that way because it will be a
little bit complex.
I will still study the code to see what I can improve in the next version patch.
diff mbox series

Patch

diff --git a/arch/nds32/include/asm/dma-mapping.h b/arch/nds32/include/asm/dma-mapping.h
new file mode 100644
index 0000000..63dbeb7
--- /dev/null
+++ b/arch/nds32/include/asm/dma-mapping.h
@@ -0,0 +1,27 @@ 
+/*
+ * Copyright (C) 2005-2017 Andes Technology Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef ASMNDS32_DMA_MAPPING_H
+#define ASMNDS32_DMA_MAPPING_H
+
+extern struct dma_map_ops nds32_dma_ops;
+
+static inline struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+{
+	return &nds32_dma_ops;
+}
+
+#endif
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
new file mode 100644
index 0000000..939702e
--- /dev/null
+++ b/arch/nds32/kernel/dma.c
@@ -0,0 +1,478 @@ 
+/*
+ * Copyright (C) 2005-2017 Andes Technology Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/cache.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/dma-mapping.h>
+#include <asm/proc-fns.h>
+
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_RAW_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct vm_region	region;
+ *    unsigned long	flags;
+ *    struct page	**pages;
+ *    unsigned int	nr_pages;
+ *    unsigned long	phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call vm_region_alloc with an appropriate
+ * struct vm_region head (eg):
+ *
+ *  struct vm_region vmalloc_head = {
+ *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *	.vm_start	= VMALLOC_START,
+ *	.vm_end		= VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling vm_region_alloc().
+ */
+struct arch_vm_region {
+	struct list_head vm_list;
+	unsigned long vm_start;
+	unsigned long vm_end;
+	struct page *vm_pages;
+};
+
+static struct arch_vm_region consistent_head = {
+	.vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
+	.vm_start = CONSISTENT_BASE,
+	.vm_end = CONSISTENT_END,
+};
+
+static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
+					      size_t size, int gfp)
+{
+	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	unsigned long flags;
+	struct arch_vm_region *c, *new;
+
+	new = kmalloc(sizeof(struct arch_vm_region), gfp);
+	if (!new)
+		goto out;
+
+	raw_spin_lock_irqsave(&consistent_lock, flags);
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if ((addr + size) < addr)
+			goto nospc;
+		if ((addr + size) <= c->vm_start)
+			goto found;
+		addr = c->vm_end;
+		if (addr > end)
+			goto nospc;
+	}
+
+found:
+	/*
+	 * Insert this entry _before_ the one we found.
+	 */
+	list_add_tail(&new->vm_list, &c->vm_list);
+	new->vm_start = addr;
+	new->vm_end = addr + size;
+
+	raw_spin_unlock_irqrestore(&consistent_lock, flags);
+	return new;
+
+nospc:
+	raw_spin_unlock_irqrestore(&consistent_lock, flags);
+	kfree(new);
+out:
+	return NULL;
+}
+
+static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
+					     unsigned long addr)
+{
+	struct arch_vm_region *c;
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if (c->vm_start == addr)
+			goto out;
+	}
+	c = NULL;
+out:
+	return c;
+}
+
+/* FIXME: attrs is not used. */
+static void *nds32_dma_alloc_coherent(struct device *dev, size_t size,
+				      dma_addr_t * handle, gfp_t gfp,
+				      unsigned long attrs)
+{
+	struct page *page;
+	struct arch_vm_region *c;
+	unsigned long order;
+	u64 mask = ISA_DMA_THRESHOLD, limit;
+	pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
+
+	if (!consistent_pte) {
+		pr_err("%s: not initialized\n", __func__);
+		dump_stack();
+		return NULL;
+	}
+
+	if (dev) {
+		mask = dev->coherent_dma_mask;
+
+		/*
+		 * Sanity check the DMA mask - it must be non-zero, and
+		 * must be able to be satisfied by a DMA allocation.
+		 */
+		if (mask == 0) {
+			dev_warn(dev, "coherent DMA mask is unset\n");
+			goto no_page;
+		}
+
+		if ((~mask) & ISA_DMA_THRESHOLD) {
+			dev_warn(dev, "coherent DMA mask %#llx is smaller "
+				 "than system GFP_DMA mask %#llx\n",
+				 mask, (unsigned long long)ISA_DMA_THRESHOLD);
+			goto no_page;
+		}
+	}
+
+	/*
+	 * Sanity check the allocation size.
+	 */
+	size = PAGE_ALIGN(size);
+	limit = (mask + 1) & ~mask;
+	if ((limit && size >= limit) ||
+	    size >= (CONSISTENT_END - CONSISTENT_BASE)) {
+		pr_warn("coherent allocation too big "
+			"(requested %#x mask %#llx)\n", size, mask);
+		goto no_page;
+	}
+
+	order = get_order(size);
+
+	if (mask != 0xffffffff)
+		gfp |= GFP_DMA;
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		goto no_page;
+
+	/*
+	 * Invalidate any data that might be lurking in the
+	 * kernel direct-mapped region for device DMA.
+	 */
+	{
+		unsigned long kaddr = (unsigned long)page_address(page);
+		memset(page_address(page), 0, size);
+		cpu_dma_wbinval_range(kaddr, kaddr + size);
+	}
+
+	/*
+	 * Allocate a virtual address in the consistent mapping region.
+	 */
+	c = vm_region_alloc(&consistent_head, size,
+			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (c) {
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+		struct page *end = page + (1 << order);
+
+		c->vm_pages = page;
+
+		/*
+		 * Set the "dma handle"
+		 */
+		*handle = page_to_dma(dev, page);
+
+		do {
+			BUG_ON(!pte_none(*pte));
+
+			/*
+			 * x86 does not mark the pages reserved...
+			 */
+			SetPageReserved(page);
+			set_pte(pte, mk_pte(page, prot));
+			page++;
+			pte++;
+		} while (size -= PAGE_SIZE);
+
+		/*
+		 * Free the otherwise unused pages.
+		 */
+		while (page < end) {
+			__free_page(page);
+			page++;
+		}
+
+		return (void *)c->vm_start;
+	}
+
+	if (page)
+		__free_pages(page, order);
+no_page:
+	*handle = ~0;
+	return NULL;
+}
+
+static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr,
+			   dma_addr_t handle, unsigned long attrs)
+{
+	struct arch_vm_region *c;
+	unsigned long flags, addr;
+	pte_t *ptep;
+
+	size = PAGE_ALIGN(size);
+
+	raw_spin_lock_irqsave(&consistent_lock, flags);
+
+	c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
+	if (!c)
+		goto no_area;
+
+	if ((c->vm_end - c->vm_start) != size) {
+		pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
+		       __func__, c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+	addr = c->vm_start;
+	do {
+		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+		unsigned long pfn;
+
+		ptep++;
+		addr += PAGE_SIZE;
+
+		if (!pte_none(pte) && pte_present(pte)) {
+			pfn = pte_pfn(pte);
+
+			if (pfn_valid(pfn)) {
+				struct page *page = pfn_to_page(pfn);
+
+				/*
+				 * x86 does not mark the pages reserved...
+				 */
+				ClearPageReserved(page);
+
+				__free_page(page);
+				continue;
+			}
+		}
+
+		pr_crit("%s: bad page in kernel page table\n", __func__);
+	} while (size -= PAGE_SIZE);
+
+	flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+	list_del(&c->vm_list);
+
+	raw_spin_unlock_irqrestore(&consistent_lock, flags);
+
+	kfree(c);
+	return;
+
+no_area:
+	raw_spin_unlock_irqrestore(&consistent_lock, flags);
+	pr_err("%s: trying to free invalid coherent area: %p\n",
+	       __func__, cpu_addr);
+	dump_stack();
+}
+
+/*
+ * Initialise the consistent memory allocation.
+ */
+static int __init consistent_init(void)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	int ret = 0;
+
+	do {
+		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
+		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
+		if (!pmd) {
+			pr_err("%s: no pmd tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+		/* The first level mapping may be created in somewhere.
+		 * It's not necessary to warn here. */
+		/* WARN_ON(!pmd_none(*pmd)); */
+
+		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
+		if (!pte) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		consistent_pte = pte;
+	} while (0);
+
+	return ret;
+}
+
+core_initcall(consistent_init);
+static void consistent_sync(void *vaddr, size_t size, int direction);
+static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction dir,
+				     unsigned long attrs)
+{
+	consistent_sync((void *)(page_address(page) + offset), size, dir);
+	return page_to_phys(page) + offset;
+}
+
+static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle,
+				 size_t size, enum dma_data_direction dir,
+				 unsigned long attrs)
+{
+	consistent_sync(phys_to_virt(handle), size, dir);
+}
+
+/*
+ * Make an area consistent for devices.
+ */
+static void consistent_sync(void *vaddr, size_t size, int direction)
+{
+	unsigned long start = (unsigned long)vaddr;
+	unsigned long end = start + size;
+
+	switch (direction) {
+	case DMA_FROM_DEVICE:	/* invalidate only */
+		cpu_dma_inval_range(start, end);
+		break;
+	case DMA_TO_DEVICE:	/* writeback only */
+		cpu_dma_wb_range(start, end);
+		break;
+	case DMA_BIDIRECTIONAL:	/* writeback and invalidate */
+		cpu_dma_wbinval_range(start, end);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			    int nents, enum dma_data_direction dir,
+			    unsigned long attrs)
+{
+	int i;
+
+	for (i = 0; i < nents; i++, sg++) {
+		void *virt;
+		unsigned long pfn;
+		struct page *page = sg_page(sg);
+
+		sg->dma_address = page_to_dma(dev, page) + sg->offset;
+		pfn = page_to_pfn(page) + sg->offset / PAGE_SIZE;
+		page = pfn_to_page(pfn);
+		if (PageHighMem(page)) {
+			virt = kmap_atomic(page);
+			consistent_sync(virt, sg->length, dir);
+			kunmap_atomic(virt);
+		} else {
+			if (sg->offset > PAGE_SIZE)
+				panic("sg->offset:%08x > PAGE_SIZE\n",
+				      sg->offset);
+			virt = page_address(page) + sg->offset;
+			consistent_sync(virt, sg->length, dir);
+		}
+	}
+	return nents;
+}
+
+static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+			       int nhwentries, enum dma_data_direction dir,
+			       unsigned long attrs)
+{
+}
+
+static void
+nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
+			      size_t size, enum dma_data_direction dir)
+{
+	consistent_sync((void *)dma_to_virt(dev, handle), size, dir);
+}
+
+static void
+nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
+				 size_t size, enum dma_data_direction dir)
+{
+	consistent_sync((void *)dma_to_virt(dev, handle), size, dir);
+}
+
+static void
+nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
+			  enum dma_data_direction dir)
+{
+	int i;
+
+	for (i = 0; i < nents; i++, sg++) {
+		char *virt =
+		    page_address((struct page *)sg->page_link) + sg->offset;
+		consistent_sync(virt, sg->length, dir);
+	}
+}
+
+static void
+nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			     int nents, enum dma_data_direction dir)
+{
+	int i;
+
+	for (i = 0; i < nents; i++, sg++) {
+		char *virt =
+		    page_address((struct page *)sg->page_link) + sg->offset;
+		consistent_sync(virt, sg->length, dir);
+	}
+}
+
+struct dma_map_ops nds32_dma_ops = {
+	.alloc = nds32_dma_alloc_coherent,
+	.free = nds32_dma_free,
+	.map_page = nds32_dma_map_page,
+	.unmap_page = nds32_dma_unmap_page,
+	.map_sg = nds32_dma_map_sg,
+	.unmap_sg = nds32_dma_unmap_sg,
+	.sync_single_for_device = nds32_dma_sync_single_for_device,
+	.sync_single_for_cpu = nds32_dma_sync_single_for_cpu,
+	.sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu,
+	.sync_sg_for_device = nds32_dma_sync_sg_for_device,
+};
+
+EXPORT_SYMBOL(nds32_dma_ops);