[22/22] parisc: use generic dma_noncoherent_ops

Message ID 20180420080313.18796-23-hch@lst.de
State New
Headers show
Series
  • [01/22] dma-debug: move initialization to common code
Related show

Commit Message

Christoph Hellwig April 20, 2018, 8:03 a.m.
Switch to the generic noncoherent direct mapping implementation.

Parisc previously had two different non-coherent dma ops implementation
that just different in the way coherent allocations were handled or not
handled.  The different behavior is not selected at runtime in the
arch_dma_alloc and arch_dma_free routines.  The non-coherent allocation
in the pcx cases now uses the dma_direct helpers that are a little more
sophisticated and used by a lot of other architectures.

Fix sync_single_for_cpu to do skip the cache flush unless the transfer
is to the device to match the more tested unmap_single path which should
have the same cache coherency implications.

This also now consistenly uses flush_kernel_dcache_range for cache
flushing while previously some of the SG based operations used
flush_kernel_vmap_range instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/parisc/Kconfig                   |   4 +
 arch/parisc/include/asm/dma-mapping.h |   5 -
 arch/parisc/kernel/pci-dma.c          | 181 ++++----------------------
 arch/parisc/kernel/setup.c            |   8 +-
 arch/parisc/mm/init.c                 |  11 +-
 5 files changed, 35 insertions(+), 174 deletions(-)

Comments

Helge Deller April 21, 2018, 5:43 p.m. | #1
On 20.04.2018 10:03, Christoph Hellwig wrote:
> Switch to the generic noncoherent direct mapping implementation.
> 
> Parisc previously had two different non-coherent dma ops implementation
> that just different in the way coherent allocations were handled or not
> handled.  The different behavior is not selected at runtime in the
> arch_dma_alloc and arch_dma_free routines.  The non-coherent allocation
> in the pcx cases now uses the dma_direct helpers that are a little more
> sophisticated and used by a lot of other architectures.
> 
> Fix sync_single_for_cpu to do skip the cache flush unless the transfer
> is to the device to match the more tested unmap_single path which should
> have the same cache coherency implications.
> 
> This also now consistenly uses flush_kernel_dcache_range for cache
> flushing while previously some of the SG based operations used
> flush_kernel_vmap_range instead.


This patch breaks a 32bit kernel on a B160L machine (PA7300LC CPU, "pcxl2").
After applying this patch series the lasi82956 network driver works unreliable. 
NIC gets IP, but ping doesn't work.
See drivers/net/ethernet/i825xx/lasi_82596.c, it uses dma*sync() functions.

Helge

 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  arch/parisc/Kconfig                   |   4 +
>  arch/parisc/include/asm/dma-mapping.h |   5 -
>  arch/parisc/kernel/pci-dma.c          | 181 ++++----------------------
>  arch/parisc/kernel/setup.c            |   8 +-
>  arch/parisc/mm/init.c                 |  11 +-
>  5 files changed, 35 insertions(+), 174 deletions(-)
> 
> diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
> index 47047f0cbe35..80166a1cbcb7 100644
> --- a/arch/parisc/Kconfig
> +++ b/arch/parisc/Kconfig
> @@ -188,6 +188,10 @@ config PA20
>  config PA11
>  	def_bool y
>  	depends on PA7000 || PA7100LC || PA7200 || PA7300LC
> +	select ARCH_HAS_SYNC_DMA_FOR_CPU
> +	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
> +	select DMA_NONCOHERENT_OPS
> +	select DMA_NONCOHERENT_CACHE_SYNC
>  
>  config PREFETCH
>  	def_bool y
> diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
> index 01e1fc057c83..44a9f97194aa 100644
> --- a/arch/parisc/include/asm/dma-mapping.h
> +++ b/arch/parisc/include/asm/dma-mapping.h
> @@ -21,11 +21,6 @@
>  ** flush/purge and allocate "regular" cacheable pages for everything.
>  */
>  
> -#ifdef CONFIG_PA11
> -extern const struct dma_map_ops pcxl_dma_ops;
> -extern const struct dma_map_ops pcx_dma_ops;
> -#endif
> -
>  extern const struct dma_map_ops *hppa_dma_ops;
>  
>  static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
> diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
> index 91bc0cac03a1..235e2e53959e 100644
> --- a/arch/parisc/kernel/pci-dma.c
> +++ b/arch/parisc/kernel/pci-dma.c
> @@ -21,13 +21,12 @@
>  #include <linux/init.h>
>  #include <linux/gfp.h>
>  #include <linux/mm.h>
> -#include <linux/pci.h>
>  #include <linux/proc_fs.h>
>  #include <linux/seq_file.h>
>  #include <linux/string.h>
>  #include <linux/types.h>
> -#include <linux/scatterlist.h>
> -#include <linux/export.h>
> +#include <linux/dma-direct.h>
> +#include <linux/dma-noncoherent.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/dma.h>    /* for DMA_CHUNK_SIZE */
> @@ -447,178 +446,48 @@ static void pa11_dma_free(struct device *dev, size_t size, void *vaddr,
>  	free_pages((unsigned long)__va(dma_handle), order);
>  }
>  
> -static dma_addr_t pa11_dma_map_page(struct device *dev, struct page *page,
> -		unsigned long offset, size_t size,
> -		enum dma_data_direction direction, unsigned long attrs)
> +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
> +		size_t size, enum dma_data_direction dir)
>  {
> -	void *addr = page_address(page) + offset;
> -	BUG_ON(direction == DMA_NONE);
> -
> -	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
> -		flush_kernel_dcache_range((unsigned long) addr, size);
> -
> -	return virt_to_phys(addr);
> +	flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
>  }
>  
> -static void pa11_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
> -		size_t size, enum dma_data_direction direction,
> -		unsigned long attrs)
> +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
> +		size_t size, enum dma_data_direction dir)
>  {
> -	BUG_ON(direction == DMA_NONE);
> -
> -	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
> -		return;
> -
> -	if (direction == DMA_TO_DEVICE)
> +	if (dir == DMA_TO_DEVICE)
>  		return;
>  
>  	/*
> -	 * For PCI_DMA_FROMDEVICE this flush is not necessary for the
> +	 * For DMA_FROM_DEVICE this flush is not necessary for the
>  	 * simple map/unmap case. However, it IS necessary if if
> -	 * pci_dma_sync_single_* has been called and the buffer reused.
> +	 * dma_sync_single_* has been called and the buffer reused.
>  	 */
>  
> -	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle), size);
> -}
> -
> -static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist,
> -		int nents, enum dma_data_direction direction,
> -		unsigned long attrs)
> -{
> -	int i;
> -	struct scatterlist *sg;
> -
> -	BUG_ON(direction == DMA_NONE);
> -
> -	for_each_sg(sglist, sg, nents, i) {
> -		unsigned long vaddr = (unsigned long)sg_virt(sg);
> -
> -		sg_dma_address(sg) = (dma_addr_t) virt_to_phys(vaddr);
> -		sg_dma_len(sg) = sg->length;
> -
> -		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
> -			continue;
> -
> -		flush_kernel_dcache_range(vaddr, sg->length);
> -	}
> -	return nents;
> +	flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
>  }
>  
> -static void pa11_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
> -		int nents, enum dma_data_direction direction,
> -		unsigned long attrs)
> -{
> -	int i;
> -	struct scatterlist *sg;
> -
> -	BUG_ON(direction == DMA_NONE);
> -
> -	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
> -		return;
> -
> -	if (direction == DMA_TO_DEVICE)
> -		return;
> -
> -	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
> -
> -	for_each_sg(sglist, sg, nents, i)
> -		flush_kernel_vmap_range(sg_virt(sg), sg->length);
> -}
> -
> -static void pa11_dma_sync_single_for_cpu(struct device *dev,
> -		dma_addr_t dma_handle, size_t size,
> -		enum dma_data_direction direction)
> -{
> -	BUG_ON(direction == DMA_NONE);
> -
> -	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle),
> -			size);
> -}
> -
> -static void pa11_dma_sync_single_for_device(struct device *dev,
> -		dma_addr_t dma_handle, size_t size,
> -		enum dma_data_direction direction)
> -{
> -	BUG_ON(direction == DMA_NONE);
> -
> -	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle),
> -			size);
> -}
> -
> -static void pa11_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction)
> -{
> -	int i;
> -	struct scatterlist *sg;
> -
> -	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
> -
> -	for_each_sg(sglist, sg, nents, i)
> -		flush_kernel_vmap_range(sg_virt(sg), sg->length);
> -}
> -
> -static void pa11_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction)
> -{
> -	int i;
> -	struct scatterlist *sg;
> -
> -	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
> -
> -	for_each_sg(sglist, sg, nents, i)
> -		flush_kernel_vmap_range(sg_virt(sg), sg->length);
> -}
> -
> -static void pa11_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
> +void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
>  	       enum dma_data_direction direction)
>  {
>  	flush_kernel_dcache_range((unsigned long)vaddr, size);
>  }
>  
> -const struct dma_map_ops pcxl_dma_ops = {
> -	.alloc =		pa11_dma_alloc,
> -	.free =			pa11_dma_free,
> -	.map_page =		pa11_dma_map_page,
> -	.unmap_page =		pa11_dma_unmap_page,
> -	.map_sg =		pa11_dma_map_sg,
> -	.unmap_sg =		pa11_dma_unmap_sg,
> -	.sync_single_for_cpu =	pa11_dma_sync_single_for_cpu,
> -	.sync_single_for_device = pa11_dma_sync_single_for_device,
> -	.sync_sg_for_cpu =	pa11_dma_sync_sg_for_cpu,
> -	.sync_sg_for_device =	pa11_dma_sync_sg_for_device,
> -	.cache_sync =		pa11_dma_cache_sync,
> -};
> -
> -static void *pcx_dma_alloc(struct device *dev, size_t size,
> -		dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
> +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> +		gfp_t gfp, unsigned long attrs)
>  {
> -	void *addr;
> -
> -	if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0)
> -		return NULL;
> -
> -	addr = (void *)__get_free_pages(flag, get_order(size));
> -	if (addr)
> -		*dma_handle = (dma_addr_t)virt_to_phys(addr);
> -
> -	return addr;
> +	if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl)
> +		return pa11_dma_alloc(dev, size, dma_handle, gfp, attrs);
> +	if (attrs & DMA_ATTR_NON_CONSISTENT)
> +		return dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
> +	return NULL;
>  }
>  
> -static void pcx_dma_free(struct device *dev, size_t size, void *vaddr,
> -		dma_addr_t iova, unsigned long attrs)
> +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
> +		dma_addr_t dma_addr, unsigned long attrs)
>  {
> -	free_pages((unsigned long)vaddr, get_order(size));
> -	return;
> +	if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl)
> +		pa11_dma_free(dev, size, cpu_addr, dma_addr, attrs);
> +	else
> +		dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
>  }
> -
> -const struct dma_map_ops pcx_dma_ops = {
> -	.alloc =		pcx_dma_alloc,
> -	.free =			pcx_dma_free,
> -	.map_page =		pa11_dma_map_page,
> -	.unmap_page =		pa11_dma_unmap_page,
> -	.map_sg =		pa11_dma_map_sg,
> -	.unmap_sg =		pa11_dma_unmap_sg,
> -	.sync_single_for_cpu =	pa11_dma_sync_single_for_cpu,
> -	.sync_single_for_device = pa11_dma_sync_single_for_device,
> -	.sync_sg_for_cpu =	pa11_dma_sync_sg_for_cpu,
> -	.sync_sg_for_device =	pa11_dma_sync_sg_for_device,
> -	.cache_sync =		pa11_dma_cache_sync,
> -};
> diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
> index 8d3a7b80ac42..4e87c35c22b7 100644
> --- a/arch/parisc/kernel/setup.c
> +++ b/arch/parisc/kernel/setup.c
> @@ -97,14 +97,12 @@ void __init dma_ops_init(void)
>  		panic(	"PA-RISC Linux currently only supports machines that conform to\n"
>  			"the PA-RISC 1.1 or 2.0 architecture specification.\n");
>  
> -	case pcxs:
> -	case pcxt:
> -		hppa_dma_ops = &pcx_dma_ops;
> -		break;
>  	case pcxl2:
>  		pa7300lc_init();
>  	case pcxl: /* falls through */
> -		hppa_dma_ops = &pcxl_dma_ops;
> +	case pcxs:
> +	case pcxt:
> +		hppa_dma_ops = &dma_noncoherent_ops;
>  		break;
>  	default:
>  		break;
> diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
> index cab32ee824d2..4ad91c28ecbe 100644
> --- a/arch/parisc/mm/init.c
> +++ b/arch/parisc/mm/init.c
> @@ -19,7 +19,6 @@
>  #include <linux/gfp.h>
>  #include <linux/delay.h>
>  #include <linux/init.h>
> -#include <linux/pci.h>		/* for hppa_dma_ops and pcxl_dma_ops */
>  #include <linux/initrd.h>
>  #include <linux/swap.h>
>  #include <linux/unistd.h>
> @@ -616,17 +615,13 @@ void __init mem_init(void)
>  	free_all_bootmem();
>  
>  #ifdef CONFIG_PA11
> -	if (hppa_dma_ops == &pcxl_dma_ops) {
> +	if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) {
>  		pcxl_dma_start = (unsigned long)SET_MAP_OFFSET(MAP_START);
>  		parisc_vmalloc_start = SET_MAP_OFFSET(pcxl_dma_start
>  						+ PCXL_DMA_MAP_SIZE);
> -	} else {
> -		pcxl_dma_start = 0;
> -		parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START);
> -	}
> -#else
> -	parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START);
> +	} else
>  #endif
> +		parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START);
>  
>  	mem_init_print_info(NULL);
>  
>
James Bottomley April 21, 2018, 9:42 p.m. | #2
On Fri, 2018-04-20 at 10:03 +0200, Christoph Hellwig wrote:
> diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
> index 8d3a7b80ac42..4e87c35c22b7 100644
> --- a/arch/parisc/kernel/setup.c
> +++ b/arch/parisc/kernel/setup.c
> @@ -97,14 +97,12 @@ void __init dma_ops_init(void)
>  		panic(	"PA-RISC Linux currently only supports
> machines that conform to\n"
>  			"the PA-RISC 1.1 or 2.0 architecture
> specification.\n");
>  
> -	case pcxs:
> -	case pcxt:
> -		hppa_dma_ops = &pcx_dma_ops;
> -		break;
>  	case pcxl2:
>  		pa7300lc_init();
>  	case pcxl: /* falls through */
> -		hppa_dma_ops = &pcxl_dma_ops;
> +	case pcxs:
> +	case pcxt:
> +		hppa_dma_ops = &dma_noncoherent_ops;
>  		break;
>  	default:
>  		break;

Well, this is wrong: you just made every 32 bit parisc system
unnecessarily use non-coherent.  We actually only have a small small
set of non-coherent systems.  The pxcs and pcxt systems (which are
about 99% of the user base) can use coherent dma ops.  The problem
seems to be in your new world you only have one dma_noncoherent_ops
pointer ... we definitely need two on parisc, so whether
arch_dma_cache_sync is present or not needs to be dynamic not config
defined.

James
James Bottomley April 21, 2018, 9:52 p.m. | #3
On Sat, 2018-04-21 at 19:43 +0200, Helge Deller wrote:
> On 20.04.2018 10:03, Christoph Hellwig wrote:
> > Switch to the generic noncoherent direct mapping implementation.
> > 
> > Parisc previously had two different non-coherent dma ops
> > implementation that just different in the way coherent allocations
> > were handled or not handled.  The different behavior is not
> > selected at runtime in the arch_dma_alloc and arch_dma_free
> > routines.  The non-coherent allocation in the pcx cases now uses
> > the dma_direct helpers that are a little more sophisticated and
> > used by a lot of other architectures.
> > 
> > Fix sync_single_for_cpu to do skip the cache flush unless the
> > transfer is to the device to match the more tested unmap_single
> > path which should have the same cache coherency implications.
> > 
> > This also now consistenly uses flush_kernel_dcache_range for cache
> > flushing while previously some of the SG based operations used
> > flush_kernel_vmap_range instead.
> 
> 
> This patch breaks a 32bit kernel on a B160L machine (PA7300LC CPU,
> "pcxl2"). After applying this patch series the lasi82956 network
> driver works unreliable.  NIC gets IP, but ping doesn't work.
> See drivers/net/ethernet/i825xx/lasi_82596.c, it uses dma*sync()
> functions.

That's actually a weird result.  The 32 bit machines have two cases:
those that can make uncached memory by setting the U bit (and thus
don't need the sync operations in the lasi and D700 drivers) and those
that can't.  The latter is basically only the old 700 series.  The B180
is in the class of can set pages to uncached, so it sounds like
something in our uncached memory allocation for dma areas is failing
after this patch set.

I still have an old 700 in my box of curiosities, so I can try to dust
it off and plug it back in when I get home to see what it makes of the
series when it gets fixed.

James

James
Christoph Hellwig April 24, 2018, 8:20 a.m. | #4
On Sat, Apr 21, 2018 at 10:42:47PM +0100, James Bottomley wrote:
> Well, this is wrong: you just made every 32 bit parisc system
> unnecessarily use non-coherent.  We actually only have a small small
> set of non-coherent systems.  The pxcs and pcxt systems (which are
> about 99% of the user base) can use coherent dma ops.  The problem
> seems to be in your new world you only have one dma_noncoherent_ops
> pointer ... we definitely need two on parisc, so whether
> arch_dma_cache_sync is present or not needs to be dynamic not config
> defined.

The changelog explicitly mentions merging the two noncoherent
implementations, they only differ in the alloc and free callsbacks,
and we now runtime switch between them.  Before the pcxs and pcxt
cases used pcx_dma_ops, and pcxl and pxcl2 used pcxl_dma_ops, now
all four use dma_noncoherent_ops and arch_dma_alloc/arch_dma_free
branch out to different behavior.

Both pcx_dma_ops and pcxl_dma_ops do define the cache_sync
method in the existing code, so that isn't the issue.

I'll take a deeper look at what sort of behavior change might have
been introduced.

> 
> James
---end quoted text---
Christoph Hellwig April 25, 2018, 7:21 a.m. | #5
On Sat, Apr 21, 2018 at 07:43:46PM +0200, Helge Deller wrote:
> This patch breaks a 32bit kernel on a B160L machine (PA7300LC CPU, "pcxl2").
> After applying this patch series the lasi82956 network driver works unreliable. 
> NIC gets IP, but ping doesn't work.
> See drivers/net/ethernet/i825xx/lasi_82596.c, it uses dma*sync() functions.

Just to confirm:  Without the series it is known to actually work?
Helge Deller April 25, 2018, 9:07 p.m. | #6
On 25.04.2018 09:21, Christoph Hellwig wrote:
> On Sat, Apr 21, 2018 at 07:43:46PM +0200, Helge Deller wrote:
>> This patch breaks a 32bit kernel on a B160L machine (PA7300LC CPU, "pcxl2").
>> After applying this patch series the lasi82956 network driver works unreliable. 
>> NIC gets IP, but ping doesn't work.
>> See drivers/net/ethernet/i825xx/lasi_82596.c, it uses dma*sync() functions.
> 
> Just to confirm:  Without the series it is known to actually work?

Yes.
I reverted this series from my tree, rebuilt, and then the issue was gone.
I won't be able to test again earlier than next week.

Helge

Patch

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 47047f0cbe35..80166a1cbcb7 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -188,6 +188,10 @@  config PA20
 config PA11
 	def_bool y
 	depends on PA7000 || PA7100LC || PA7200 || PA7300LC
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select DMA_NONCOHERENT_OPS
+	select DMA_NONCOHERENT_CACHE_SYNC
 
 config PREFETCH
 	def_bool y
diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
index 01e1fc057c83..44a9f97194aa 100644
--- a/arch/parisc/include/asm/dma-mapping.h
+++ b/arch/parisc/include/asm/dma-mapping.h
@@ -21,11 +21,6 @@ 
 ** flush/purge and allocate "regular" cacheable pages for everything.
 */
 
-#ifdef CONFIG_PA11
-extern const struct dma_map_ops pcxl_dma_ops;
-extern const struct dma_map_ops pcx_dma_ops;
-#endif
-
 extern const struct dma_map_ops *hppa_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 91bc0cac03a1..235e2e53959e 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -21,13 +21,12 @@ 
 #include <linux/init.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
-#include <linux/pci.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/scatterlist.h>
-#include <linux/export.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
 
 #include <asm/cacheflush.h>
 #include <asm/dma.h>    /* for DMA_CHUNK_SIZE */
@@ -447,178 +446,48 @@  static void pa11_dma_free(struct device *dev, size_t size, void *vaddr,
 	free_pages((unsigned long)__va(dma_handle), order);
 }
 
-static dma_addr_t pa11_dma_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size,
-		enum dma_data_direction direction, unsigned long attrs)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	void *addr = page_address(page) + offset;
-	BUG_ON(direction == DMA_NONE);
-
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		flush_kernel_dcache_range((unsigned long) addr, size);
-
-	return virt_to_phys(addr);
+	flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
 }
 
-static void pa11_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
-		size_t size, enum dma_data_direction direction,
-		unsigned long attrs)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	BUG_ON(direction == DMA_NONE);
-
-	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-		return;
-
-	if (direction == DMA_TO_DEVICE)
+	if (dir == DMA_TO_DEVICE)
 		return;
 
 	/*
-	 * For PCI_DMA_FROMDEVICE this flush is not necessary for the
+	 * For DMA_FROM_DEVICE this flush is not necessary for the
 	 * simple map/unmap case. However, it IS necessary if if
-	 * pci_dma_sync_single_* has been called and the buffer reused.
+	 * dma_sync_single_* has been called and the buffer reused.
 	 */
 
-	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle), size);
-}
-
-static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist,
-		int nents, enum dma_data_direction direction,
-		unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	BUG_ON(direction == DMA_NONE);
-
-	for_each_sg(sglist, sg, nents, i) {
-		unsigned long vaddr = (unsigned long)sg_virt(sg);
-
-		sg_dma_address(sg) = (dma_addr_t) virt_to_phys(vaddr);
-		sg_dma_len(sg) = sg->length;
-
-		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-			continue;
-
-		flush_kernel_dcache_range(vaddr, sg->length);
-	}
-	return nents;
+	flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
 }
 
-static void pa11_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
-		int nents, enum dma_data_direction direction,
-		unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	BUG_ON(direction == DMA_NONE);
-
-	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-		return;
-
-	if (direction == DMA_TO_DEVICE)
-		return;
-
-	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
-
-	for_each_sg(sglist, sg, nents, i)
-		flush_kernel_vmap_range(sg_virt(sg), sg->length);
-}
-
-static void pa11_dma_sync_single_for_cpu(struct device *dev,
-		dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-
-	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle),
-			size);
-}
-
-static void pa11_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-
-	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle),
-			size);
-}
-
-static void pa11_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction)
-{
-	int i;
-	struct scatterlist *sg;
-
-	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
-
-	for_each_sg(sglist, sg, nents, i)
-		flush_kernel_vmap_range(sg_virt(sg), sg->length);
-}
-
-static void pa11_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction)
-{
-	int i;
-	struct scatterlist *sg;
-
-	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
-
-	for_each_sg(sglist, sg, nents, i)
-		flush_kernel_vmap_range(sg_virt(sg), sg->length);
-}
-
-static void pa11_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 	       enum dma_data_direction direction)
 {
 	flush_kernel_dcache_range((unsigned long)vaddr, size);
 }
 
-const struct dma_map_ops pcxl_dma_ops = {
-	.alloc =		pa11_dma_alloc,
-	.free =			pa11_dma_free,
-	.map_page =		pa11_dma_map_page,
-	.unmap_page =		pa11_dma_unmap_page,
-	.map_sg =		pa11_dma_map_sg,
-	.unmap_sg =		pa11_dma_unmap_sg,
-	.sync_single_for_cpu =	pa11_dma_sync_single_for_cpu,
-	.sync_single_for_device = pa11_dma_sync_single_for_device,
-	.sync_sg_for_cpu =	pa11_dma_sync_sg_for_cpu,
-	.sync_sg_for_device =	pa11_dma_sync_sg_for_device,
-	.cache_sync =		pa11_dma_cache_sync,
-};
-
-static void *pcx_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
 {
-	void *addr;
-
-	if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0)
-		return NULL;
-
-	addr = (void *)__get_free_pages(flag, get_order(size));
-	if (addr)
-		*dma_handle = (dma_addr_t)virt_to_phys(addr);
-
-	return addr;
+	if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl)
+		return pa11_dma_alloc(dev, size, dma_handle, gfp, attrs);
+	if (attrs & DMA_ATTR_NON_CONSISTENT)
+		return dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+	return NULL;
 }
 
-static void pcx_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t iova, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t dma_addr, unsigned long attrs)
 {
-	free_pages((unsigned long)vaddr, get_order(size));
-	return;
+	if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl)
+		pa11_dma_free(dev, size, cpu_addr, dma_addr, attrs);
+	else
+		dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
 }
-
-const struct dma_map_ops pcx_dma_ops = {
-	.alloc =		pcx_dma_alloc,
-	.free =			pcx_dma_free,
-	.map_page =		pa11_dma_map_page,
-	.unmap_page =		pa11_dma_unmap_page,
-	.map_sg =		pa11_dma_map_sg,
-	.unmap_sg =		pa11_dma_unmap_sg,
-	.sync_single_for_cpu =	pa11_dma_sync_single_for_cpu,
-	.sync_single_for_device = pa11_dma_sync_single_for_device,
-	.sync_sg_for_cpu =	pa11_dma_sync_sg_for_cpu,
-	.sync_sg_for_device =	pa11_dma_sync_sg_for_device,
-	.cache_sync =		pa11_dma_cache_sync,
-};
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 8d3a7b80ac42..4e87c35c22b7 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -97,14 +97,12 @@  void __init dma_ops_init(void)
 		panic(	"PA-RISC Linux currently only supports machines that conform to\n"
 			"the PA-RISC 1.1 or 2.0 architecture specification.\n");
 
-	case pcxs:
-	case pcxt:
-		hppa_dma_ops = &pcx_dma_ops;
-		break;
 	case pcxl2:
 		pa7300lc_init();
 	case pcxl: /* falls through */
-		hppa_dma_ops = &pcxl_dma_ops;
+	case pcxs:
+	case pcxt:
+		hppa_dma_ops = &dma_noncoherent_ops;
 		break;
 	default:
 		break;
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index cab32ee824d2..4ad91c28ecbe 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -19,7 +19,6 @@ 
 #include <linux/gfp.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/pci.h>		/* for hppa_dma_ops and pcxl_dma_ops */
 #include <linux/initrd.h>
 #include <linux/swap.h>
 #include <linux/unistd.h>
@@ -616,17 +615,13 @@  void __init mem_init(void)
 	free_all_bootmem();
 
 #ifdef CONFIG_PA11
-	if (hppa_dma_ops == &pcxl_dma_ops) {
+	if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) {
 		pcxl_dma_start = (unsigned long)SET_MAP_OFFSET(MAP_START);
 		parisc_vmalloc_start = SET_MAP_OFFSET(pcxl_dma_start
 						+ PCXL_DMA_MAP_SIZE);
-	} else {
-		pcxl_dma_start = 0;
-		parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START);
-	}
-#else
-	parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START);
+	} else
 #endif
+		parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START);
 
 	mem_init_print_info(NULL);