diff mbox series

[3/5] dma-direct: refine dma_direct_alloc zone selection

Message ID 20180920185247.20037-4-hch@lst.de (mailing list archive)
State Not Applicable
Headers show
Series [1/5] dma-mapping: make the get_required_mask method available unconditionally | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning next/apply_patch Patch failed to apply
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Christoph Hellwig Sept. 20, 2018, 6:52 p.m. UTC
We need to take the DMA offset and encryption bit into account when
selecting a zone.  User the opportunity to factor out the zone
selection into a helper for reuse.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/direct.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

Comments

Benjamin Herrenschmidt Sept. 27, 2018, 1:45 a.m. UTC | #1
On Thu, 2018-09-20 at 20:52 +0200, Christoph Hellwig wrote:
> +static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
> +               u64 *phys_mask)
> +{
> +       if (force_dma_unencrypted())
> +               *phys_mask = __dma_to_phys(dev, dma_mask);
> +       else
> +               *phys_mask = dma_to_phys(dev, dma_mask);
> +
> +       /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
> +       if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
> +               return GFP_DMA;
> +       if (*phys_mask <= DMA_BIT_MASK(32))
> +               return GFP_DMA32;
> +       return 0;
> +}

I'm not sure this is entirely right.

Let's say the mask is 30 bits. You will return GFP_DMA32, which will
fail if you allocate something above 1G (which is legit for
ZONE_DMA32).

I think the logic should be:

	if (mask < ZONE_DMA)
		fail;
	else if (mask < ZONE_DMA32)
		use ZONE_DMA or fail if doesn't exist
	else if (mask < top_of_ram)
		use ZONE_DMA32 or fail if doesn't exist
	else
		use ZONE_NORMAL

Additionally, we want to fold-in the top-of-ram test such that we don't
fail the second case if the mask is 31-bits (smaller than ZONE_DMA32)
but top of ram is also small enough.

So the top of ram test should take precendence.

Cheers,
Ben.
Christoph Hellwig Sept. 27, 2018, 1:49 p.m. UTC | #2
On Thu, Sep 27, 2018 at 11:45:15AM +1000, Benjamin Herrenschmidt wrote:
> I'm not sure this is entirely right.
> 
> Let's say the mask is 30 bits. You will return GFP_DMA32, which will
> fail if you allocate something above 1G (which is legit for
> ZONE_DMA32).

And then we will try GFP_DMA further down in the function:

		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
		    !(gfp & GFP_DMA)) {
			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
			goto again;
		}

This is and old optimization from x86, because chances are high that
GFP_DMA32 will give you suitable memory for the infamous 31-bit
dma mask devices (at least at boot time) and thus we don't have
to deplete the tiny ZONE_DMA pool.
Robin Murphy Sept. 27, 2018, 2:30 p.m. UTC | #3
On 20/09/18 19:52, Christoph Hellwig wrote:
> We need to take the DMA offset and encryption bit into account when
> selecting a zone.  User the opportunity to factor out the zone
> selection into a helper for reuse.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   kernel/dma/direct.c | 31 +++++++++++++++++++++----------
>   1 file changed, 21 insertions(+), 10 deletions(-)
> 
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 81b73a5bba54..3c404e33d946 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -68,6 +68,22 @@ u64 dma_direct_get_required_mask(struct device *dev)
>   	return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
>   }
>   
> +static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
> +		u64 *phys_mask)
> +{
> +	if (force_dma_unencrypted())
> +		*phys_mask = __dma_to_phys(dev, dma_mask);
> +	else
> +		*phys_mask = dma_to_phys(dev, dma_mask);

Maybe make phys_to_dma_direct() take u64 instead of phys_addr_t so we 
can reuse it here?

> +	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
> +	if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
> +		return GFP_DMA;

If we don't have ZONE_DMA it would in theory be a tiny bit better to 
fall back to GFP_DMA32 instead of returning 0 here, but I'm not sure 
it's worth the bother.

> +	if (*phys_mask <= DMA_BIT_MASK(32))
> +		return GFP_DMA32;
> +	return 0;
> +}
> +
>   static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
>   {
>   	return phys_to_dma_direct(dev, phys) + size - 1 <=
> @@ -80,17 +96,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
>   	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
>   	int page_order = get_order(size);
>   	struct page *page = NULL;
> +	u64 phys_mask;
>   	void *ret;
>   
>   	/* we always manually zero the memory once we are done: */
>   	gfp &= ~__GFP_ZERO;
> -
> -	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
> -	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
> -		gfp |= GFP_DMA;
> -	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
> -		gfp |= GFP_DMA32;
> -
> +	gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
> +			&phys_mask);
>   again:
>   	/* CMA can be used only in the context which permits sleeping */
>   	if (gfpflags_allow_blocking(gfp)) {
> @@ -109,15 +121,14 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
>   		page = NULL;
>   
>   		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
> -		    dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
> +		    phys_mask < DMA_BIT_MASK(64) &&
>   		    !(gfp & (GFP_DMA32 | GFP_DMA))) {
>   			gfp |= GFP_DMA32;
>   			goto again;

Ah right, in that situation we should probably end up here anyway, so 
that's good enough - definitely not worth any more #ifdeffery above. 
Nits aside,

Reviewed-by: Robin Murphy <robin.murphy@arm.com>

>   		}
>   
>   		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
> -		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
> -		    !(gfp & GFP_DMA)) {
> +		    phys_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) {
>   			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
>   			goto again;
>   		}
>
Christoph Hellwig Sept. 27, 2018, 3:30 p.m. UTC | #4
On Thu, Sep 27, 2018 at 03:30:20PM +0100, Robin Murphy wrote:
>>   +static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 
>> dma_mask,
>> +		u64 *phys_mask)
>> +{
>> +	if (force_dma_unencrypted())
>> +		*phys_mask = __dma_to_phys(dev, dma_mask);
>> +	else
>> +		*phys_mask = dma_to_phys(dev, dma_mask);
>
> Maybe make phys_to_dma_direct() take u64 instead of phys_addr_t so we can 
> reuse it here?

This is a dma_to_phys and not a phys_to_dma.
Robin Murphy Sept. 27, 2018, 3:38 p.m. UTC | #5
On 27/09/18 16:30, Christoph Hellwig wrote:
> On Thu, Sep 27, 2018 at 03:30:20PM +0100, Robin Murphy wrote:
>>>    +static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64
>>> dma_mask,
>>> +		u64 *phys_mask)
>>> +{
>>> +	if (force_dma_unencrypted())
>>> +		*phys_mask = __dma_to_phys(dev, dma_mask);
>>> +	else
>>> +		*phys_mask = dma_to_phys(dev, dma_mask);
>>
>> Maybe make phys_to_dma_direct() take u64 instead of phys_addr_t so we can
>> reuse it here?
> 
> This is a dma_to_phys and not a phys_to_dma.

Ugh, clearly it's time to stop reviewing patches for today... sorry :(

Robin.
Christoph Hellwig Sept. 27, 2018, 3:41 p.m. UTC | #6
On Thu, Sep 27, 2018 at 04:38:31PM +0100, Robin Murphy wrote:
> On 27/09/18 16:30, Christoph Hellwig wrote:
>> On Thu, Sep 27, 2018 at 03:30:20PM +0100, Robin Murphy wrote:
>>>>    +static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64
>>>> dma_mask,
>>>> +		u64 *phys_mask)
>>>> +{
>>>> +	if (force_dma_unencrypted())
>>>> +		*phys_mask = __dma_to_phys(dev, dma_mask);
>>>> +	else
>>>> +		*phys_mask = dma_to_phys(dev, dma_mask);
>>>
>>> Maybe make phys_to_dma_direct() take u64 instead of phys_addr_t so we can
>>> reuse it here?
>>
>> This is a dma_to_phys and not a phys_to_dma.
>
> Ugh, clearly it's time to stop reviewing patches for today... sorry :(

I actually made the same mistake when writing it..

ALthough I'd really like to see some feedback from you on the arm64
swiotlb series once you had more cofee ;-)
Benjamin Herrenschmidt Sept. 28, 2018, 12:06 a.m. UTC | #7
On Thu, 2018-09-27 at 15:49 +0200, Christoph Hellwig wrote:
> On Thu, Sep 27, 2018 at 11:45:15AM +1000, Benjamin Herrenschmidt wrote:
> > I'm not sure this is entirely right.
> > 
> > Let's say the mask is 30 bits. You will return GFP_DMA32, which will
> > fail if you allocate something above 1G (which is legit for
> > ZONE_DMA32).
> 
> And then we will try GFP_DMA further down in the function:
> 
> 		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
> 		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
> 		    !(gfp & GFP_DMA)) {
> 			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
> 			goto again;
> 		}
> 
> This is and old optimization from x86, because chances are high that
> GFP_DMA32 will give you suitable memory for the infamous 31-bit
> dma mask devices (at least at boot time) and thus we don't have
> to deplete the tiny ZONE_DMA pool.

I see, it's rather confusing :-) Wouldn't it be better to check against
top of 32-bit memory instead here too ?


Cheers,
Ben.
Christoph Hellwig Sept. 28, 2018, 3:46 p.m. UTC | #8
On Fri, Sep 28, 2018 at 10:06:48AM +1000, Benjamin Herrenschmidt wrote:
> On Thu, 2018-09-27 at 15:49 +0200, Christoph Hellwig wrote:
> > On Thu, Sep 27, 2018 at 11:45:15AM +1000, Benjamin Herrenschmidt wrote:
> > > I'm not sure this is entirely right.
> > > 
> > > Let's say the mask is 30 bits. You will return GFP_DMA32, which will
> > > fail if you allocate something above 1G (which is legit for
> > > ZONE_DMA32).
> > 
> > And then we will try GFP_DMA further down in the function:
> > 
> > 		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
> > 		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
> > 		    !(gfp & GFP_DMA)) {
> > 			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
> > 			goto again;
> > 		}
> > 
> > This is and old optimization from x86, because chances are high that
> > GFP_DMA32 will give you suitable memory for the infamous 31-bit
> > dma mask devices (at least at boot time) and thus we don't have
> > to deplete the tiny ZONE_DMA pool.
> 
> I see, it's rather confusing :-) Wouldn't it be better to check against
> top of 32-bit memory instead here too ?

Where is here?  In __dma_direct_optimal_gfp_mask we already handled
it due to the optimistic zone selection we are discussing.

In the fallback quoted above there is no point for it, as with a
physical memory size smaller than ZONE_DMA32 (or ZONE_DMA for that matter)
we will have succeeded with the optimistic zone selection and not hit
the fallback path.

Either way this code probably needs much better comments.  I'll send
a patch on top of the recent series.
diff mbox series

Patch

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 81b73a5bba54..3c404e33d946 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -68,6 +68,22 @@  u64 dma_direct_get_required_mask(struct device *dev)
 	return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
 }
 
+static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
+		u64 *phys_mask)
+{
+	if (force_dma_unencrypted())
+		*phys_mask = __dma_to_phys(dev, dma_mask);
+	else
+		*phys_mask = dma_to_phys(dev, dma_mask);
+
+	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
+	if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+		return GFP_DMA;
+	if (*phys_mask <= DMA_BIT_MASK(32))
+		return GFP_DMA32;
+	return 0;
+}
+
 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 {
 	return phys_to_dma_direct(dev, phys) + size - 1 <=
@@ -80,17 +96,13 @@  void *dma_direct_alloc_pages(struct device *dev, size_t size,
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	int page_order = get_order(size);
 	struct page *page = NULL;
+	u64 phys_mask;
 	void *ret;
 
 	/* we always manually zero the memory once we are done: */
 	gfp &= ~__GFP_ZERO;
-
-	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
-		gfp |= GFP_DMA;
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
-		gfp |= GFP_DMA32;
-
+	gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
+			&phys_mask);
 again:
 	/* CMA can be used only in the context which permits sleeping */
 	if (gfpflags_allow_blocking(gfp)) {
@@ -109,15 +121,14 @@  void *dma_direct_alloc_pages(struct device *dev, size_t size,
 		page = NULL;
 
 		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
-		    dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
+		    phys_mask < DMA_BIT_MASK(64) &&
 		    !(gfp & (GFP_DMA32 | GFP_DMA))) {
 			gfp |= GFP_DMA32;
 			goto again;
 		}
 
 		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
-		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
-		    !(gfp & GFP_DMA)) {
+		    phys_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) {
 			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
 			goto again;
 		}