diff mbox

[RFC] gpu: host1x: shut up warning about DMA API misuse

Message ID 20170419182413.866327-1-arnd@arndb.de
State Deferred
Headers show

Commit Message

Arnd Bergmann April 19, 2017, 6:24 p.m. UTC
When dma_addr_t and phys_addr_t are not the same size, we get a warning
from the dma_alloc_wc function:

drivers/gpu/host1x/cdma.c: In function 'host1x_pushbuffer_init':
drivers/gpu/host1x/cdma.c:94:48: error: passing argument 3 of 'dma_alloc_wc' from incompatible pointer type [-Werror=incompatible-pointer-types]
   pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
                                                ^
In file included from drivers/gpu/host1x/cdma.c:22:0:
include/linux/dma-mapping.h:761:37: note: expected 'dma_addr_t * {aka long long unsigned int *}' but argument is of type 'phys_addr_t * {aka unsigned int *}'
 static noinline_if_stackbloat void *dma_alloc_wc(struct device *dev, size_t size,
                                     ^~~~~~~~~~~~
drivers/gpu/host1x/cdma.c:113:48: error: passing argument 3 of 'dma_alloc_wc' from incompatible pointer type [-Werror=incompatible-pointer-types]
   pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
                                                ^
In file included from drivers/gpu/host1x/cdma.c:22:0:
include/linux/dma-mapping.h:761:37: note: expected 'dma_addr_t * {aka long long unsigned int *}' but argument is of type 'phys_addr_t * {aka unsigned int *}'
 static noinline_if_stackbloat void *dma_alloc_wc(struct device *dev, size_t size,
                                     ^~~~~~~~~~~~

The problem here is that dma_alloc_wc() returns a pointer to a dma_addr_t
that may already be translated by an IOMMU, but the driver passes this
into iommu_map() as a physical address. This works by accident only when
the IOMMU does not get registered with the DMA API and there is a 1:1
mapping between physical addresses as seen by the CPU and the device.

The fundamental problem here is the lack of a generic API to do what the
driver wants: allocating CPU-visible memory for use by a device through
user-defined IOMMU settings. Neither the dma-mapping API nor the IOMMU
API can do this by itself, and combining the two is not well-defined.

This patch addresses the type mismatch by adding a third pointer into the
push_buffer structure: in addition to the address as seen from the CPU
and the address inside of the local IOMMU domain, the pb->alloc pointer
is the token returned by dma_alloc_wc(), and this is what we later need
to pass into dma_free_wc().

The address we pass into iommu_map() however is the physical address
computed from virt_to_phys(), assuming that the pointer we have here
is part of the linear mapping (which is also questionable, e.g. when we
have a non-coherent device on ARM32 this may be false). Also, when
the DMA API uses the IOMMU to allocate the pointer for the default
domain, we end up with two IOMMU mappings for the same physical address.

Fixes: 404bfb78daf3 ("gpu: host1x: Add IOMMU support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/gpu/host1x/cdma.c | 26 ++++++++++----------------
 drivers/gpu/host1x/cdma.h |  1 +
 2 files changed, 11 insertions(+), 16 deletions(-)

Comments

Mikko Perttunen April 20, 2017, 7:02 a.m. UTC | #1
On 19.04.2017 21:24, Arnd Bergmann wrote:
> When dma_addr_t and phys_addr_t are not the same size, we get a warning
> from the dma_alloc_wc function:
>
> drivers/gpu/host1x/cdma.c: In function 'host1x_pushbuffer_init':
> drivers/gpu/host1x/cdma.c:94:48: error: passing argument 3 of 'dma_alloc_wc' from incompatible pointer type [-Werror=incompatible-pointer-types]
>    pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
>                                                 ^
> In file included from drivers/gpu/host1x/cdma.c:22:0:
> include/linux/dma-mapping.h:761:37: note: expected 'dma_addr_t * {aka long long unsigned int *}' but argument is of type 'phys_addr_t * {aka unsigned int *}'
>  static noinline_if_stackbloat void *dma_alloc_wc(struct device *dev, size_t size,
>                                      ^~~~~~~~~~~~
> drivers/gpu/host1x/cdma.c:113:48: error: passing argument 3 of 'dma_alloc_wc' from incompatible pointer type [-Werror=incompatible-pointer-types]
>    pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
>                                                 ^
> In file included from drivers/gpu/host1x/cdma.c:22:0:
> include/linux/dma-mapping.h:761:37: note: expected 'dma_addr_t * {aka long long unsigned int *}' but argument is of type 'phys_addr_t * {aka unsigned int *}'
>  static noinline_if_stackbloat void *dma_alloc_wc(struct device *dev, size_t size,
>                                      ^~~~~~~~~~~~
>
> The problem here is that dma_alloc_wc() returns a pointer to a dma_addr_t
> that may already be translated by an IOMMU, but the driver passes this
> into iommu_map() as a physical address. This works by accident only when
> the IOMMU does not get registered with the DMA API and there is a 1:1
> mapping between physical addresses as seen by the CPU and the device.
>
> The fundamental problem here is the lack of a generic API to do what the
> driver wants: allocating CPU-visible memory for use by a device through
> user-defined IOMMU settings. Neither the dma-mapping API nor the IOMMU
> API can do this by itself, and combining the two is not well-defined.
>
> This patch addresses the type mismatch by adding a third pointer into the
> push_buffer structure: in addition to the address as seen from the CPU
> and the address inside of the local IOMMU domain, the pb->alloc pointer
> is the token returned by dma_alloc_wc(), and this is what we later need
> to pass into dma_free_wc().
>
> The address we pass into iommu_map() however is the physical address
> computed from virt_to_phys(), assuming that the pointer we have here
> is part of the linear mapping (which is also questionable, e.g. when we
> have a non-coherent device on ARM32 this may be false). Also, when
> the DMA API uses the IOMMU to allocate the pointer for the default
> domain, we end up with two IOMMU mappings for the same physical address.
>

I think we have a "policy" on Tegra that the DMA API will never allocate 
using the IOMMU (Thierry can elaborate on this), which is why I wrote 
the code with that assumption. Essentially, we have made the DMA API 
into the API that allocates CPU-visible memory.

Considering that, I'm wondering if we can just have a temporary local 
dma_addr_t and then cast that to phys_addr_t, combined with a good comment?

Cheers,
Mikko.

> Fixes: 404bfb78daf3 ("gpu: host1x: Add IOMMU support")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
>  drivers/gpu/host1x/cdma.c | 26 ++++++++++----------------
>  drivers/gpu/host1x/cdma.h |  1 +
>  2 files changed, 11 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
> index 28541b280739..286edeca7ba1 100644
> --- a/drivers/gpu/host1x/cdma.c
> +++ b/drivers/gpu/host1x/cdma.c
> @@ -59,7 +59,7 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
>  		free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
>  	}
>
> -	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
> +	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->alloc);
>
>  	pb->mapped = NULL;
>  	pb->phys = 0;
> @@ -81,20 +81,21 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
>  	pb->size = HOST1X_PUSHBUFFER_SLOTS * 8;
>
>  	size = pb->size + 4;
> +	if (host1x->domain)
> +		size = iova_align(&host1x->iova, size);
>
>  	/* initialize buffer pointers */
>  	pb->fence = pb->size - 8;
>  	pb->pos = 0;
>
> -	if (host1x->domain) {
> -		unsigned long shift;
> +	pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->alloc, GFP_KERNEL);
> +	if (!pb->mapped)
> +		return -ENOMEM;
>
> -		size = iova_align(&host1x->iova, size);
> +	pb->phys = virt_to_phys(pb->mapped);
>
> -		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
> -					  GFP_KERNEL);
> -		if (!pb->mapped)
> -			return -ENOMEM;
> +	if (host1x->domain) {
> +		unsigned long shift;
>
>  		shift = iova_shift(&host1x->iova);
>  		alloc = alloc_iova(&host1x->iova, size >> shift,
> @@ -109,13 +110,6 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
>  				IOMMU_READ);
>  		if (err)
>  			goto iommu_free_iova;
> -	} else {
> -		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
> -					  GFP_KERNEL);
> -		if (!pb->mapped)
> -			return -ENOMEM;
> -
> -		pb->dma = pb->phys;
>  	}
>
>  	pb->alloc_size = size;
> @@ -127,7 +121,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
>  iommu_free_iova:
>  	__free_iova(&host1x->iova, alloc);
>  iommu_free_mem:
> -	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
> +	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->alloc);
>
>  	return err;
>  }
> diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
> index ec170a78f4e1..8479192d4265 100644
> --- a/drivers/gpu/host1x/cdma.h
> +++ b/drivers/gpu/host1x/cdma.h
> @@ -43,6 +43,7 @@ struct host1x_job;
>
>  struct push_buffer {
>  	void *mapped;			/* mapped pushbuffer memory */
> +	dma_addr_t alloc;		/* device address in root domain */
>  	dma_addr_t dma;			/* device address of pushbuffer */
>  	phys_addr_t phys;		/* physical address of pushbuffer */
>  	u32 fence;			/* index we've written */
>
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann April 20, 2017, 8:25 a.m. UTC | #2
On Thu, Apr 20, 2017 at 9:02 AM, Mikko Perttunen <cyndis@kapsi.fi> wrote:
> On 19.04.2017 21:24, Arnd Bergmann wrote:
>>
>> When dma_addr_t and phys_addr_t are not the same size, we get a warning
>> from the dma_alloc_wc function:
>>
>> drivers/gpu/host1x/cdma.c: In function 'host1x_pushbuffer_init':
>> drivers/gpu/host1x/cdma.c:94:48: error: passing argument 3 of
>> 'dma_alloc_wc' from incompatible pointer type
>> [-Werror=incompatible-pointer-types]
>>    pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
>>                                                 ^
>> In file included from drivers/gpu/host1x/cdma.c:22:0:
>> include/linux/dma-mapping.h:761:37: note: expected 'dma_addr_t * {aka long
>> long unsigned int *}' but argument is of type 'phys_addr_t * {aka unsigned
>> int *}'
>>  static noinline_if_stackbloat void *dma_alloc_wc(struct device *dev,
>> size_t size,
>>                                      ^~~~~~~~~~~~
>> drivers/gpu/host1x/cdma.c:113:48: error: passing argument 3 of
>> 'dma_alloc_wc' from incompatible pointer type
>> [-Werror=incompatible-pointer-types]
>>    pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
>>                                                 ^
>> In file included from drivers/gpu/host1x/cdma.c:22:0:
>> include/linux/dma-mapping.h:761:37: note: expected 'dma_addr_t * {aka long
>> long unsigned int *}' but argument is of type 'phys_addr_t * {aka unsigned
>> int *}'
>>  static noinline_if_stackbloat void *dma_alloc_wc(struct device *dev,
>> size_t size,
>>                                      ^~~~~~~~~~~~
>>
>> The problem here is that dma_alloc_wc() returns a pointer to a dma_addr_t
>> that may already be translated by an IOMMU, but the driver passes this
>> into iommu_map() as a physical address. This works by accident only when
>> the IOMMU does not get registered with the DMA API and there is a 1:1
>> mapping between physical addresses as seen by the CPU and the device.
>>
>> The fundamental problem here is the lack of a generic API to do what the
>> driver wants: allocating CPU-visible memory for use by a device through
>> user-defined IOMMU settings. Neither the dma-mapping API nor the IOMMU
>> API can do this by itself, and combining the two is not well-defined.
>>
>> This patch addresses the type mismatch by adding a third pointer into the
>> push_buffer structure: in addition to the address as seen from the CPU
>> and the address inside of the local IOMMU domain, the pb->alloc pointer
>> is the token returned by dma_alloc_wc(), and this is what we later need
>> to pass into dma_free_wc().
>>
>> The address we pass into iommu_map() however is the physical address
>> computed from virt_to_phys(), assuming that the pointer we have here
>> is part of the linear mapping (which is also questionable, e.g. when we
>> have a non-coherent device on ARM32 this may be false). Also, when
>> the DMA API uses the IOMMU to allocate the pointer for the default
>> domain, we end up with two IOMMU mappings for the same physical address.
>>
>
> I think we have a "policy" on Tegra that the DMA API will never allocate
> using the IOMMU (Thierry can elaborate on this), which is why I wrote the
> code with that assumption. Essentially, we have made the DMA API into the
> API that allocates CPU-visible memory.

I don't think this can be a per-platform policy.

> Considering that, I'm wondering if we can just have a temporary local
> dma_addr_t and then cast that to phys_addr_t, combined with a good comment?

That was my first approach, and it does address the warning, but
I did not send it because it still felt too wrong.

     Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mikko Perttunen April 20, 2017, 9:44 a.m. UTC | #3
On 20.04.2017 11:25, Arnd Bergmann wrote:
> On Thu, Apr 20, 2017 at 9:02 AM, Mikko Perttunen <cyndis@kapsi.fi> wrote:
>> On 19.04.2017 21:24, Arnd Bergmann wrote:
>>>
>>> When dma_addr_t and phys_addr_t are not the same size, we get a warning
>>> from the dma_alloc_wc function:
>>>
>>> drivers/gpu/host1x/cdma.c: In function 'host1x_pushbuffer_init':
>>> drivers/gpu/host1x/cdma.c:94:48: error: passing argument 3 of
>>> 'dma_alloc_wc' from incompatible pointer type
>>> [-Werror=incompatible-pointer-types]
>>>    pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
>>>                                                 ^
>>> In file included from drivers/gpu/host1x/cdma.c:22:0:
>>> include/linux/dma-mapping.h:761:37: note: expected 'dma_addr_t * {aka long
>>> long unsigned int *}' but argument is of type 'phys_addr_t * {aka unsigned
>>> int *}'
>>>  static noinline_if_stackbloat void *dma_alloc_wc(struct device *dev,
>>> size_t size,
>>>                                      ^~~~~~~~~~~~
>>> drivers/gpu/host1x/cdma.c:113:48: error: passing argument 3 of
>>> 'dma_alloc_wc' from incompatible pointer type
>>> [-Werror=incompatible-pointer-types]
>>>    pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
>>>                                                 ^
>>> In file included from drivers/gpu/host1x/cdma.c:22:0:
>>> include/linux/dma-mapping.h:761:37: note: expected 'dma_addr_t * {aka long
>>> long unsigned int *}' but argument is of type 'phys_addr_t * {aka unsigned
>>> int *}'
>>>  static noinline_if_stackbloat void *dma_alloc_wc(struct device *dev,
>>> size_t size,
>>>                                      ^~~~~~~~~~~~
>>>
>>> The problem here is that dma_alloc_wc() returns a pointer to a dma_addr_t
>>> that may already be translated by an IOMMU, but the driver passes this
>>> into iommu_map() as a physical address. This works by accident only when
>>> the IOMMU does not get registered with the DMA API and there is a 1:1
>>> mapping between physical addresses as seen by the CPU and the device.
>>>
>>> The fundamental problem here is the lack of a generic API to do what the
>>> driver wants: allocating CPU-visible memory for use by a device through
>>> user-defined IOMMU settings. Neither the dma-mapping API nor the IOMMU
>>> API can do this by itself, and combining the two is not well-defined.
>>>
>>> This patch addresses the type mismatch by adding a third pointer into the
>>> push_buffer structure: in addition to the address as seen from the CPU
>>> and the address inside of the local IOMMU domain, the pb->alloc pointer
>>> is the token returned by dma_alloc_wc(), and this is what we later need
>>> to pass into dma_free_wc().
>>>
>>> The address we pass into iommu_map() however is the physical address
>>> computed from virt_to_phys(), assuming that the pointer we have here
>>> is part of the linear mapping (which is also questionable, e.g. when we
>>> have a non-coherent device on ARM32 this may be false). Also, when
>>> the DMA API uses the IOMMU to allocate the pointer for the default
>>> domain, we end up with two IOMMU mappings for the same physical address.
>>>
>>
>> I think we have a "policy" on Tegra that the DMA API will never allocate
>> using the IOMMU (Thierry can elaborate on this), which is why I wrote the
>> code with that assumption. Essentially, we have made the DMA API into the
>> API that allocates CPU-visible memory.
>
> I don't think this can be a per-platform policy.

Yeah, now that we are using the ARM SMMU on T186 onwards it's more 
difficult than when we were using the Tegra SMMU, so we'll probably have 
to change that.

The goal of the current code is to allocate memory from the CMA, so one 
option would be to change it to use dma_alloc_from_contiguous. That way 
we wouldn't get the double IOMMU mapping, which would be nice.

>
>> Considering that, I'm wondering if we can just have a temporary local
>> dma_addr_t and then cast that to phys_addr_t, combined with a good comment?
>
> That was my first approach, and it does address the warning, but
> I did not send it because it still felt too wrong.
>
>      Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King (Oracle) April 20, 2017, 9:49 a.m. UTC | #4
On Thu, Apr 20, 2017 at 10:25:01AM +0200, Arnd Bergmann wrote:
> On Thu, Apr 20, 2017 at 9:02 AM, Mikko Perttunen <cyndis@kapsi.fi> wrote:
> > I think we have a "policy" on Tegra that the DMA API will never allocate
> > using the IOMMU (Thierry can elaborate on this), which is why I wrote the
> > code with that assumption. Essentially, we have made the DMA API into the
> > API that allocates CPU-visible memory.
> 
> I don't think this can be a per-platform policy.
> 
> > Considering that, I'm wondering if we can just have a temporary local
> > dma_addr_t and then cast that to phys_addr_t, combined with a good comment?
> 
> That was my first approach, and it does address the warning, but
> I did not send it because it still felt too wrong.

Sounds to me like the warning is justified - it's saying that there's
something not right here which could be a problem.  So I'd say, don't
fix the warning, it's doing its job, highlighting a potential problem
with the code.

(Consider hiding the warning and then running on a platform where the
assumptions are broken.)
Arnd Bergmann April 20, 2017, 10:02 a.m. UTC | #5
On Thu, Apr 20, 2017 at 11:44 AM, Mikko Perttunen <cyndis@kapsi.fi> wrote:
> On 20.04.2017 11:25, Arnd Bergmann wrote:
>> On Thu, Apr 20, 2017 at 9:02 AM, Mikko Perttunen <cyndis@kapsi.fi> wrote:
>>> On 19.04.2017 21:24, Arnd Bergmann wrote:
>>
>> I don't think this can be a per-platform policy.
>
>
> Yeah, now that we are using the ARM SMMU on T186 onwards it's more difficult
> than when we were using the Tegra SMMU, so we'll probably have to change
> that.
>
> The goal of the current code is to allocate memory from the CMA, so one
> option would be to change it to use dma_alloc_from_contiguous. That way we
> wouldn't get the double IOMMU mapping, which would be nice.

Right, also we shouldn't define what a particular API does based on
which platform you run on.

> The goal of the current code is to allocate memory from the CMA, so one
> option would be to change it to use dma_alloc_from_contiguous. That way
> we wouldn't get the double IOMMU mapping, which would be nice.

dma_alloc_from_contiguous() is intentionally not exported to drivers,
and it would result in a page that is not mapped into your kernel
address space.

This is probably not the only driver that has this issue, so maybe a
better approach would be to fill the API gap and introduce an IOMMU
API function that takes a domain/iova/length tuple as its argument
and allocates coherent or WC memory that it maps into that address?

      Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann April 20, 2017, 10:05 a.m. UTC | #6
On Thu, Apr 20, 2017 at 11:49 AM, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Thu, Apr 20, 2017 at 10:25:01AM +0200, Arnd Bergmann wrote:
>> On Thu, Apr 20, 2017 at 9:02 AM, Mikko Perttunen <cyndis@kapsi.fi> wrote:
>> > I think we have a "policy" on Tegra that the DMA API will never allocate
>> > using the IOMMU (Thierry can elaborate on this), which is why I wrote the
>> > code with that assumption. Essentially, we have made the DMA API into the
>> > API that allocates CPU-visible memory.
>>
>> I don't think this can be a per-platform policy.
>>
>> > Considering that, I'm wondering if we can just have a temporary local
>> > dma_addr_t and then cast that to phys_addr_t, combined with a good comment?
>>
>> That was my first approach, and it does address the warning, but
>> I did not send it because it still felt too wrong.
>
> Sounds to me like the warning is justified - it's saying that there's
> something not right here which could be a problem.

Absolutely.

> So I'd say, don't
> fix the warning, it's doing its job, highlighting a potential problem
> with the code.
>
> (Consider hiding the warning and then running on a platform where the
> assumptions are broken.)

The problem is that I'm probably the only one who ever sees that
warning since you don't see it in any defconfig builds that all have
the same width for dma_addr_t and phys_addr_t.

The other alternative would be to ask for the patch that introduced
the warning to get reverted before it makes it into v4.12, if we can't
come up with a proper way to do this.

      Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mikko Perttunen April 20, 2017, 10:14 a.m. UTC | #7
On 20.04.2017 13:02, Arnd Bergmann wrote:
> On Thu, Apr 20, 2017 at 11:44 AM, Mikko Perttunen <cyndis@kapsi.fi> wrote:
>> On 20.04.2017 11:25, Arnd Bergmann wrote:
>>> On Thu, Apr 20, 2017 at 9:02 AM, Mikko Perttunen <cyndis@kapsi.fi> wrote:
>>>> On 19.04.2017 21:24, Arnd Bergmann wrote:
>>>
>>> I don't think this can be a per-platform policy.
>>
>>
>> Yeah, now that we are using the ARM SMMU on T186 onwards it's more difficult
>> than when we were using the Tegra SMMU, so we'll probably have to change
>> that.
>>
>> The goal of the current code is to allocate memory from the CMA, so one
>> option would be to change it to use dma_alloc_from_contiguous. That way we
>> wouldn't get the double IOMMU mapping, which would be nice.
>
> Right, also we shouldn't define what a particular API does based on
> which platform you run on.

Indeed.

>
>> The goal of the current code is to allocate memory from the CMA, so one
>> option would be to change it to use dma_alloc_from_contiguous. That way
>> we wouldn't get the double IOMMU mapping, which would be nice.
>
> dma_alloc_from_contiguous() is intentionally not exported to drivers,
> and it would result in a page that is not mapped into your kernel
> address space.

Ah, sorry, didn't notice that while looking.

>
> This is probably not the only driver that has this issue, so maybe a
> better approach would be to fill the API gap and introduce an IOMMU
> API function that takes a domain/iova/length tuple as its argument
> and allocates coherent or WC memory that it maps into that address?

There is definitely space for some API improvement in the whole IOMMU 
domain memory allocation space. I would prefer keeping the IOMMU mapping 
and memory allocation separate, though, since otherwise we couldn't map 
the same physical memory into multiple domains (or have some memory that 
is temporarily not mapped into any.)

The issue seems to be non-trivial, so I'm fine with your RFC patch - it 
solves the issue and the double domain mapping thing is not a problem 
currently as we don't yet support IOMMU on T186. By the time we do 
support it I hope we'll have the new API or whatever replacement in place.

>
>       Arnd
>

Thanks,
Mikko
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 28541b280739..286edeca7ba1 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -59,7 +59,7 @@  static void host1x_pushbuffer_destroy(struct push_buffer *pb)
 		free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
 	}
 
-	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
+	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->alloc);
 
 	pb->mapped = NULL;
 	pb->phys = 0;
@@ -81,20 +81,21 @@  static int host1x_pushbuffer_init(struct push_buffer *pb)
 	pb->size = HOST1X_PUSHBUFFER_SLOTS * 8;
 
 	size = pb->size + 4;
+	if (host1x->domain)
+		size = iova_align(&host1x->iova, size);
 
 	/* initialize buffer pointers */
 	pb->fence = pb->size - 8;
 	pb->pos = 0;
 
-	if (host1x->domain) {
-		unsigned long shift;
+	pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->alloc, GFP_KERNEL);
+	if (!pb->mapped)
+		return -ENOMEM;
 
-		size = iova_align(&host1x->iova, size);
+	pb->phys = virt_to_phys(pb->mapped);
 
-		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
-					  GFP_KERNEL);
-		if (!pb->mapped)
-			return -ENOMEM;
+	if (host1x->domain) {
+		unsigned long shift;
 
 		shift = iova_shift(&host1x->iova);
 		alloc = alloc_iova(&host1x->iova, size >> shift,
@@ -109,13 +110,6 @@  static int host1x_pushbuffer_init(struct push_buffer *pb)
 				IOMMU_READ);
 		if (err)
 			goto iommu_free_iova;
-	} else {
-		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
-					  GFP_KERNEL);
-		if (!pb->mapped)
-			return -ENOMEM;
-
-		pb->dma = pb->phys;
 	}
 
 	pb->alloc_size = size;
@@ -127,7 +121,7 @@  static int host1x_pushbuffer_init(struct push_buffer *pb)
 iommu_free_iova:
 	__free_iova(&host1x->iova, alloc);
 iommu_free_mem:
-	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
+	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->alloc);
 
 	return err;
 }
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index ec170a78f4e1..8479192d4265 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -43,6 +43,7 @@  struct host1x_job;
 
 struct push_buffer {
 	void *mapped;			/* mapped pushbuffer memory */
+	dma_addr_t alloc;		/* device address in root domain */
 	dma_addr_t dma;			/* device address of pushbuffer */
 	phys_addr_t phys;		/* physical address of pushbuffer */
 	u32 fence;			/* index we've written */