diff mbox

[2/3] powerpc/dma: Support 32-bit coherent mask with 64-bit dma_mask

Message ID 1424421330.27448.42.camel@kernel.crashing.org (mailing list archive)
State Changes Requested
Delegated to: Michael Ellerman
Headers show

Commit Message

Benjamin Herrenschmidt Feb. 20, 2015, 8:35 a.m. UTC
Some drivers try to use a 64-bit dma_mask and a smaller (32-bit typically)
coherent dma mask.

We don't currently support that on platforms that do direct DMA
because:

 - We use the generic dma_set_coherent_mask()

 - It will use dma_supported() with the provided mask

 - Our imlpementation of the latter in
   dma_set_our dma_direct_dma_supported() will fail if the mask
   passed isn't big enough to cover all of memory (+ DMA offset)

This fixes it by making dma_direct_dma_supported() use the ZONE_DMA32
limit when it's enabled, and removing the ifdef CONFIG_FSL_SOC around
the code in dma_direct_alloc_coherent() that supports allocating
from the 32-bit zone. (Nowadays, most drivers have been fixed).

This carries an additional change which is to generalize the test
against memblock_end_of_DRAM() + "dma_offset" to 32-bit platforms,
instead of just unconditionally returning 1 for these. This should
work since the rest of the code in the direct DMA ops assume this
is true anyway.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/pgtable.h |    1 +
 arch/powerpc/kernel/dma.c          |   21 ++++++---------------
 arch/powerpc/mm/mem.c              |    5 +++++
 3 files changed, 12 insertions(+), 15 deletions(-)

Comments

Scott Wood Feb. 24, 2015, 8:34 p.m. UTC | #1
On Fri, 2015-02-20 at 19:35 +1100, Benjamin Herrenschmidt wrote:
> @@ -149,14 +141,13 @@ static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
>  
>  static int dma_direct_dma_supported(struct device *dev, u64 mask)
>  {
> -#ifdef CONFIG_PPC64
> -	/* Could be improved so platforms can set the limit in case
> -	 * they have limited DMA windows
> -	 */
> -	return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
> -#else
> -	return 1;
> +	u64 offset = get_dma_offset(dev);
> +	u64 limit = offset + memblock_end_of_DRAM() - 1;
> +
> +#if defined(CONFIG_ZONE_DMA32)
> +	limit = offset + dma_get_zone_limit(ZONE_DMA32);
>  #endif
> +	return mask >= limit;
>  }

I'm confused as to whether dma_supported() is supposed to be testing a
coherent mask or regular mask...  The above suggests coherent, as does
the call to dma_supported() in dma_set_coherent_mask(), but if swiotlb
is used, swiotlb_dma_supported() will only check for a mask that can
accommodate io_tlb_end, without regard for coherent allocations.

>  static u64 dma_direct_get_required_mask(struct device *dev)
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index f146ef0..a7f15e2 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -277,6 +277,11 @@ int dma_pfn_limit_to_zone(u64 pfn_limit)
>  	return -EPERM;
>  }
>  
> +u64 dma_get_zone_limit(int zone)
> +{
> +	return max_zone_pfns[zone] << PAGE_SHIFT;
> +}

If you must do this in terms of bytes rather than pfn, cast to u64
before shifting -- and even then the result will be PAGE_SIZE - 1 too
small.

-Scott
Benjamin Herrenschmidt Feb. 24, 2015, 8:40 p.m. UTC | #2
On Tue, 2015-02-24 at 14:34 -0600, Scott Wood wrote:
> On Fri, 2015-02-20 at 19:35 +1100, Benjamin Herrenschmidt wrote:
> > @@ -149,14 +141,13 @@ static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
> >  
> >  static int dma_direct_dma_supported(struct device *dev, u64 mask)
> >  {
> > -#ifdef CONFIG_PPC64
> > -	/* Could be improved so platforms can set the limit in case
> > -	 * they have limited DMA windows
> > -	 */
> > -	return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
> > -#else
> > -	return 1;
> > +	u64 offset = get_dma_offset(dev);
> > +	u64 limit = offset + memblock_end_of_DRAM() - 1;
> > +
> > +#if defined(CONFIG_ZONE_DMA32)
> > +	limit = offset + dma_get_zone_limit(ZONE_DMA32);
> >  #endif
> > +	return mask >= limit;
> >  }
> 
> I'm confused as to whether dma_supported() is supposed to be testing a
> coherent mask or regular mask...  The above suggests coherent, as does
> the call to dma_supported() in dma_set_coherent_mask(), but if swiotlb
> is used, swiotlb_dma_supported() will only check for a mask that can
> accommodate io_tlb_end, without regard for coherent allocations.

This is confusing indeed, but without the above, dma_set_coherent_mask()
won't work ... so I'm assuming the above. Notice that x86 doesn't even
bother and basically return 1 for anything above a 24 bit mask (appart
from the force_sac case but we can ignore it).

So we probably should fix our swiotlb implementation as well... but
that's orthogonal.

> >  static u64 dma_direct_get_required_mask(struct device *dev)
> > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > index f146ef0..a7f15e2 100644
> > --- a/arch/powerpc/mm/mem.c
> > +++ b/arch/powerpc/mm/mem.c
> > @@ -277,6 +277,11 @@ int dma_pfn_limit_to_zone(u64 pfn_limit)
> >  	return -EPERM;
> >  }
> >  
> > +u64 dma_get_zone_limit(int zone)
> > +{
> > +	return max_zone_pfns[zone] << PAGE_SHIFT;
> > +}
> 
> If you must do this in terms of bytes rather than pfn, cast to u64
> before shifting -- and even then the result will be PAGE_SIZE - 1 too
> small.

Do we have RAM above what a unsigned long can hold ? I think I'll just
make it a pfn and respin...

Cheers,
Ben.

> -Scott
>
Scott Wood Feb. 24, 2015, 11:05 p.m. UTC | #3
On Wed, 2015-02-25 at 07:40 +1100, Benjamin Herrenschmidt wrote:
> On Tue, 2015-02-24 at 14:34 -0600, Scott Wood wrote:
> > On Fri, 2015-02-20 at 19:35 +1100, Benjamin Herrenschmidt wrote:
> > >  static u64 dma_direct_get_required_mask(struct device *dev)
> > > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > > index f146ef0..a7f15e2 100644
> > > --- a/arch/powerpc/mm/mem.c
> > > +++ b/arch/powerpc/mm/mem.c
> > > @@ -277,6 +277,11 @@ int dma_pfn_limit_to_zone(u64 pfn_limit)
> > >  	return -EPERM;
> > >  }
> > >  
> > > +u64 dma_get_zone_limit(int zone)
> > > +{
> > > +	return max_zone_pfns[zone] << PAGE_SHIFT;
> > > +}
> > 
> > If you must do this in terms of bytes rather than pfn, cast to u64
> > before shifting -- and even then the result will be PAGE_SIZE - 1 too
> > small.
> 
> Do we have RAM above what a unsigned long can hold ? I think I'll just
> make it a pfn and respin...

Yes, we can have over 4 GiB RAM on 32-bit.

-Scott
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 9835ac4..a3680b8 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -217,6 +217,7 @@  extern pgd_t swapper_pg_dir[];
 
 void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
 int dma_pfn_limit_to_zone(u64 pfn_limit);
+u64 dma_get_zone_limit(int zone);
 extern void paging_init(void);
 
 /*
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 484b2d4..e24e0ae 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -53,16 +53,9 @@  void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 #else
 	struct page *page;
 	int node = dev_to_node(dev);
-#ifdef CONFIG_FSL_SOC
 	u64 pfn = get_pfn_limit(dev);
 	int zone;
 
-	/*
-	 * This code should be OK on other platforms, but we have drivers that
-	 * don't set coherent_dma_mask. As a workaround we just ifdef it. This
-	 * whole routine needs some serious cleanup.
-	 */
-
 	zone = dma_pfn_limit_to_zone(pfn);
 	if (zone < 0) {
 		dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
@@ -80,7 +73,6 @@  void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 		break;
 #endif
 	};
-#endif /* CONFIG_FSL_SOC */
 
 	/* ignore region specifiers */
 	flag  &= ~(__GFP_HIGHMEM);
@@ -149,14 +141,13 @@  static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
 
 static int dma_direct_dma_supported(struct device *dev, u64 mask)
 {
-#ifdef CONFIG_PPC64
-	/* Could be improved so platforms can set the limit in case
-	 * they have limited DMA windows
-	 */
-	return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
-#else
-	return 1;
+	u64 offset = get_dma_offset(dev);
+	u64 limit = offset + memblock_end_of_DRAM() - 1;
+
+#if defined(CONFIG_ZONE_DMA32)
+	limit = offset + dma_get_zone_limit(ZONE_DMA32);
 #endif
+	return mask >= limit;
 }
 
 static u64 dma_direct_get_required_mask(struct device *dev)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f146ef0..a7f15e2 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -277,6 +277,11 @@  int dma_pfn_limit_to_zone(u64 pfn_limit)
 	return -EPERM;
 }
 
+u64 dma_get_zone_limit(int zone)
+{
+	return max_zone_pfns[zone] << PAGE_SHIFT;
+}
+
 /*
  * paging_init() sets up the page tables - in fact we've already done this.
  */