diff mbox series

[U-Boot,v2] usb: dwc3: Allocate and flush dwc->ep0_trb in a cache aligned manner

Message ID 1508131315-23549-1-git-send-email-faiz_abbas@ti.com
State Changes Requested
Delegated to: Marek Vasut
Headers show
Series [U-Boot,v2] usb: dwc3: Allocate and flush dwc->ep0_trb in a cache aligned manner | expand

Commit Message

Faiz Abbas Oct. 16, 2017, 5:21 a.m. UTC
A flush of the cache is required before any outbound DMA access can
take place. The minimum size that can be flushed from the cache is
one cache line size. Therefore, any buffer allocated for DMA should
be in multiples of cache line size.

Thus, allocate memory for ep0_trb in multiples of cache line size.

Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
to flush cache, it leads to cache misaligned messages as only the base
address dwc->ep0_trb is cache aligned.

Therefore, flush cache using ep0_trb_addr which is always cache aligned.

Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
---

v2:
 1. Fixed the subject line tags
 2. Shifted the flush cache statements to below the check on chain

 drivers/usb/dwc3/ep0.c    | 11 ++++++-----
 drivers/usb/dwc3/gadget.c |  3 ++-
 2 files changed, 8 insertions(+), 6 deletions(-)

Comments

Marek Vasut Oct. 16, 2017, 10:12 a.m. UTC | #1
On 10/16/2017 07:21 AM, Faiz Abbas wrote:
> A flush of the cache is required before any outbound DMA access can
> take place. The minimum size that can be flushed from the cache is
> one cache line size. Therefore, any buffer allocated for DMA should
> be in multiples of cache line size.
> 
> Thus, allocate memory for ep0_trb in multiples of cache line size.
> 
> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
> to flush cache, it leads to cache misaligned messages as only the base
> address dwc->ep0_trb is cache aligned.
> 
> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
> 
> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>

SGTM, Felipe, can you review this please ?

> ---
> 
> v2:
>  1. Fixed the subject line tags
>  2. Shifted the flush cache statements to below the check on chain
> 
>  drivers/usb/dwc3/ep0.c    | 11 ++++++-----
>  drivers/usb/dwc3/gadget.c |  3 ++-
>  2 files changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
> index e61d980..d4cc725 100644
> --- a/drivers/usb/dwc3/ep0.c
> +++ b/drivers/usb/dwc3/ep0.c
> @@ -81,12 +81,12 @@ static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma,
>  		trb->ctrl |= (DWC3_TRB_CTRL_IOC
>  				| DWC3_TRB_CTRL_LST);
>  
> -	dwc3_flush_cache((uintptr_t)buf_dma, len);
> -	dwc3_flush_cache((uintptr_t)trb, sizeof(*trb));
> -
>  	if (chain)
>  		return 0;
>  
> +	dwc3_flush_cache((uintptr_t)buf_dma, len);
> +	dwc3_flush_cache((uintptr_t)dwc->ep0_trb_addr, sizeof(*trb) * 2);
> +
>  	memset(&params, 0, sizeof(params));
>  	params.param0 = upper_32_bits(dwc->ep0_trb_addr);
>  	params.param1 = lower_32_bits(dwc->ep0_trb_addr);
> @@ -790,7 +790,7 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc,
>  	if (!r)
>  		return;
>  
> -	dwc3_flush_cache((uintptr_t)trb, sizeof(*trb));
> +	dwc3_flush_cache((uintptr_t)dwc->ep0_trb_addr, sizeof(*trb) * 2);
>  
>  	status = DWC3_TRB_SIZE_TRBSTS(trb->size);
>  	if (status == DWC3_TRBSTS_SETUP_PENDING) {
> @@ -821,7 +821,8 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc,
>  			ur->actual += transferred;
>  
>  			trb++;
> -			dwc3_flush_cache((uintptr_t)trb, sizeof(*trb));
> +			dwc3_flush_cache((uintptr_t)dwc->ep0_trb_addr,
> +					 sizeof(*trb) * 2);
>  			length = trb->size & DWC3_TRB_SIZE_MASK;
>  
>  			ep0->free_slot = 0;
> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
> index e065c5a..895a5bc 100644
> --- a/drivers/usb/dwc3/gadget.c
> +++ b/drivers/usb/dwc3/gadget.c
> @@ -2567,7 +2567,8 @@ int dwc3_gadget_init(struct dwc3 *dwc)
>  		goto err0;
>  	}
>  
> -	dwc->ep0_trb = dma_alloc_coherent(sizeof(*dwc->ep0_trb) * 2,
> +	dwc->ep0_trb = dma_alloc_coherent(ROUND(sizeof(*dwc->ep0_trb) * 2,
> +						CACHELINE_SIZE),
>  					  (unsigned long *)&dwc->ep0_trb_addr);
>  	if (!dwc->ep0_trb) {
>  		dev_err(dwc->dev, "failed to allocate ep0 trb\n");
>
Felipe Balbi Oct. 16, 2017, 1:55 p.m. UTC | #2
Hi,

Marek Vasut <marex@denx.de> writes:
> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>> A flush of the cache is required before any outbound DMA access can
>> take place. The minimum size that can be flushed from the cache is
>> one cache line size. Therefore, any buffer allocated for DMA should
>> be in multiples of cache line size.
>> 
>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>> 
>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>> to flush cache, it leads to cache misaligned messages as only the base
>> address dwc->ep0_trb is cache aligned.
>> 
>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>> 
>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>
> SGTM, Felipe, can you review this please ?

is cache maintenance done correctly in u-boot? Isn't the whole idea of a
coherent memory area that is is non-cacheable, non-bufferable memory?

Also, why isn't the API itself guaranteeing alignment requirements?
Faiz Abbas Oct. 16, 2017, 2:15 p.m. UTC | #3
Hi Felipe,

On Monday 16 October 2017 07:25 PM, Felipe Balbi wrote:
> 
> Hi,
> 
> Marek Vasut <marex@denx.de> writes:
>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>> A flush of the cache is required before any outbound DMA access can
>>> take place. The minimum size that can be flushed from the cache is
>>> one cache line size. Therefore, any buffer allocated for DMA should
>>> be in multiples of cache line size.
>>>
>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>
>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>> to flush cache, it leads to cache misaligned messages as only the base
>>> address dwc->ep0_trb is cache aligned.
>>>
>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>
>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>
>> SGTM, Felipe, can you review this please ?
> 
> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
> coherent memory area that is is non-cacheable, non-bufferable memory?
> 
> Also, why isn't the API itself guaranteeing alignment requirements?
> 
There is no support in u-boot to make a memory area non-cacheable.
This is the definition of dma_alloc_coherent()

static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
{
        *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
        return (void *)*handle;
}

This driver is mostly copied from kernel (where dma_alloc_coherent() is
what you describe) and extra flush_cache functions are added because of
U-Boot's inability to allocate coherent memory.

Thanks,
Faiz
Felipe Balbi Oct. 16, 2017, 2:20 p.m. UTC | #4
Hi,

Faiz Abbas <faiz_abbas@ti.com> writes:
> Hi Felipe,
>
> On Monday 16 October 2017 07:25 PM, Felipe Balbi wrote:
>> 
>> Hi,
>> 
>> Marek Vasut <marex@denx.de> writes:
>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>> A flush of the cache is required before any outbound DMA access can
>>>> take place. The minimum size that can be flushed from the cache is
>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>> be in multiples of cache line size.
>>>>
>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>
>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>> address dwc->ep0_trb is cache aligned.
>>>>
>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>
>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>
>>> SGTM, Felipe, can you review this please ?
>> 
>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>> coherent memory area that is is non-cacheable, non-bufferable memory?
>> 
>> Also, why isn't the API itself guaranteeing alignment requirements?
>> 
> There is no support in u-boot to make a memory area non-cacheable.
> This is the definition of dma_alloc_coherent()
>
> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
> {
>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>         return (void *)*handle;
> }
>
> This driver is mostly copied from kernel (where dma_alloc_coherent() is
> what you describe) and extra flush_cache functions are added because of
> U-Boot's inability to allocate coherent memory.

then that's what should be fixed. No?
Marek Vasut Oct. 16, 2017, 2:41 p.m. UTC | #5
On 10/16/2017 04:20 PM, Felipe Balbi wrote:
> 
> Hi,
> 
> Faiz Abbas <faiz_abbas@ti.com> writes:
>> Hi Felipe,
>>
>> On Monday 16 October 2017 07:25 PM, Felipe Balbi wrote:
>>>
>>> Hi,
>>>
>>> Marek Vasut <marex@denx.de> writes:
>>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>>> A flush of the cache is required before any outbound DMA access can
>>>>> take place. The minimum size that can be flushed from the cache is
>>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>>> be in multiples of cache line size.
>>>>>
>>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>>
>>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>>> address dwc->ep0_trb is cache aligned.
>>>>>
>>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>>
>>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>>
>>>> SGTM, Felipe, can you review this please ?
>>>
>>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>>> coherent memory area that is is non-cacheable, non-bufferable memory?
>>>
>>> Also, why isn't the API itself guaranteeing alignment requirements?
>>>
>> There is no support in u-boot to make a memory area non-cacheable.
>> This is the definition of dma_alloc_coherent()
>>
>> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>> {
>>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>>         return (void *)*handle;
>> }
>>
>> This driver is mostly copied from kernel (where dma_alloc_coherent() is
>> what you describe) and extra flush_cache functions are added because of
>> U-Boot's inability to allocate coherent memory.
> 
> then that's what should be fixed. No?

AFAIK I said that in V1 , patches welcome :-)
Faiz Abbas Oct. 16, 2017, 2:50 p.m. UTC | #6
Hi,

On Monday 16 October 2017 07:50 PM, Felipe Balbi wrote:
> 
> Hi,
> 
> Faiz Abbas <faiz_abbas@ti.com> writes:
>> Hi Felipe,
>>
>> On Monday 16 October 2017 07:25 PM, Felipe Balbi wrote:
>>>
>>> Hi,
>>>
>>> Marek Vasut <marex@denx.de> writes:
>>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>>> A flush of the cache is required before any outbound DMA access can
>>>>> take place. The minimum size that can be flushed from the cache is
>>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>>> be in multiples of cache line size.
>>>>>
>>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>>
>>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>>> address dwc->ep0_trb is cache aligned.
>>>>>
>>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>>
>>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>>
>>>> SGTM, Felipe, can you review this please ?
>>>
>>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>>> coherent memory area that is is non-cacheable, non-bufferable memory?
>>>
>>> Also, why isn't the API itself guaranteeing alignment requirements?
>>>
>> There is no support in u-boot to make a memory area non-cacheable.
>> This is the definition of dma_alloc_coherent()
>>
>> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>> {
>>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>>         return (void *)*handle;
>> }
>>
>> This driver is mostly copied from kernel (where dma_alloc_coherent() is
>> what you describe) and extra flush_cache functions are added because of
>> U-Boot's inability to allocate coherent memory.
> 
> then that's what should be fixed. No?
> 

You're right but that sounds like a long-term feature which will affect
a huge part of u-boot. Until it is implemented, I guess this is the best
way to handle the issue.

Thanks,
Faiz
Felipe Balbi Oct. 16, 2017, 2:51 p.m. UTC | #7
Hi,

Faiz Abbas <faiz_abbas@ti.com> writes:
>>>> Marek Vasut <marex@denx.de> writes:
>>>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>>>> A flush of the cache is required before any outbound DMA access can
>>>>>> take place. The minimum size that can be flushed from the cache is
>>>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>>>> be in multiples of cache line size.
>>>>>>
>>>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>>>
>>>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>>>> address dwc->ep0_trb is cache aligned.
>>>>>>
>>>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>>>
>>>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>>>
>>>>> SGTM, Felipe, can you review this please ?
>>>>
>>>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>>>> coherent memory area that is is non-cacheable, non-bufferable memory?
>>>>
>>>> Also, why isn't the API itself guaranteeing alignment requirements?
>>>>
>>> There is no support in u-boot to make a memory area non-cacheable.
>>> This is the definition of dma_alloc_coherent()
>>>
>>> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>>> {
>>>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>>>         return (void *)*handle;
>>> }
>>>
>>> This driver is mostly copied from kernel (where dma_alloc_coherent() is
>>> what you describe) and extra flush_cache functions are added because of
>>> U-Boot's inability to allocate coherent memory.
>> 
>> then that's what should be fixed. No?
>> 
>
> You're right but that sounds like a long-term feature which will affect
> a huge part of u-boot. Until it is implemented, I guess this is the best
> way to handle the issue.

Not my call to make. I'll defer to Marek and Tom
Marek Vasut Oct. 16, 2017, 3:22 p.m. UTC | #8
On 10/16/2017 04:51 PM, Felipe Balbi wrote:
> 
> Hi,
> 
> Faiz Abbas <faiz_abbas@ti.com> writes:
>>>>> Marek Vasut <marex@denx.de> writes:
>>>>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>>>>> A flush of the cache is required before any outbound DMA access can
>>>>>>> take place. The minimum size that can be flushed from the cache is
>>>>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>>>>> be in multiples of cache line size.
>>>>>>>
>>>>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>>>>
>>>>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>>>>> address dwc->ep0_trb is cache aligned.
>>>>>>>
>>>>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>>>>
>>>>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>>>>
>>>>>> SGTM, Felipe, can you review this please ?
>>>>>
>>>>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>>>>> coherent memory area that is is non-cacheable, non-bufferable memory?
>>>>>
>>>>> Also, why isn't the API itself guaranteeing alignment requirements?
>>>>>
>>>> There is no support in u-boot to make a memory area non-cacheable.
>>>> This is the definition of dma_alloc_coherent()
>>>>
>>>> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>>>> {
>>>>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>>>>         return (void *)*handle;
>>>> }
>>>>
>>>> This driver is mostly copied from kernel (where dma_alloc_coherent() is
>>>> what you describe) and extra flush_cache functions are added because of
>>>> U-Boot's inability to allocate coherent memory.
>>>
>>> then that's what should be fixed. No?
>>>
>>
>> You're right but that sounds like a long-term feature which will affect
>> a huge part of u-boot. Until it is implemented, I guess this is the best
>> way to handle the issue.
> 
> Not my call to make. I'll defer to Marek and Tom
> 
We're deep in RC anyway, so feel free to prepare a fix for next MW .
Faiz Abbas Oct. 17, 2017, 5:25 a.m. UTC | #9
On Monday 16 October 2017 08:52 PM, Marek Vasut wrote:
> On 10/16/2017 04:51 PM, Felipe Balbi wrote:
>>
>> Hi,
>>
>> Faiz Abbas <faiz_abbas@ti.com> writes:
>>>>>> Marek Vasut <marex@denx.de> writes:
>>>>>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>>>>>> A flush of the cache is required before any outbound DMA access can
>>>>>>>> take place. The minimum size that can be flushed from the cache is
>>>>>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>>>>>> be in multiples of cache line size.
>>>>>>>>
>>>>>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>>>>>
>>>>>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>>>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>>>>>> address dwc->ep0_trb is cache aligned.
>>>>>>>>
>>>>>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>>>>>
>>>>>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>>>>>
>>>>>>> SGTM, Felipe, can you review this please ?
>>>>>>
>>>>>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>>>>>> coherent memory area that is is non-cacheable, non-bufferable memory?
>>>>>>
>>>>>> Also, why isn't the API itself guaranteeing alignment requirements?
>>>>>>
>>>>> There is no support in u-boot to make a memory area non-cacheable.
>>>>> This is the definition of dma_alloc_coherent()
>>>>>
>>>>> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>>>>> {
>>>>>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>>>>>         return (void *)*handle;
>>>>> }
>>>>>
>>>>> This driver is mostly copied from kernel (where dma_alloc_coherent() is
>>>>> what you describe) and extra flush_cache functions are added because of
>>>>> U-Boot's inability to allocate coherent memory.
>>>>
>>>> then that's what should be fixed. No?
>>>>
>>>
>>> You're right but that sounds like a long-term feature which will affect
>>> a huge part of u-boot. Until it is implemented, I guess this is the best
>>> way to handle the issue.
>>
>> Not my call to make. I'll defer to Marek and Tom
>>
> We're deep in RC anyway, so feel free to prepare a fix for next MW .
> 

Fix as in rebase same patch for next merge window?

Thanks,
Faiz
Marek Vasut Oct. 17, 2017, 10:01 a.m. UTC | #10
On 10/17/2017 07:25 AM, Faiz Abbas wrote:
> 
> 
> On Monday 16 October 2017 08:52 PM, Marek Vasut wrote:
>> On 10/16/2017 04:51 PM, Felipe Balbi wrote:
>>>
>>> Hi,
>>>
>>> Faiz Abbas <faiz_abbas@ti.com> writes:
>>>>>>> Marek Vasut <marex@denx.de> writes:
>>>>>>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>>>>>>> A flush of the cache is required before any outbound DMA access can
>>>>>>>>> take place. The minimum size that can be flushed from the cache is
>>>>>>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>>>>>>> be in multiples of cache line size.
>>>>>>>>>
>>>>>>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>>>>>>
>>>>>>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>>>>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>>>>>>> address dwc->ep0_trb is cache aligned.
>>>>>>>>>
>>>>>>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>>>>>>
>>>>>>>> SGTM, Felipe, can you review this please ?
>>>>>>>
>>>>>>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>>>>>>> coherent memory area that is is non-cacheable, non-bufferable memory?
>>>>>>>
>>>>>>> Also, why isn't the API itself guaranteeing alignment requirements?
>>>>>>>
>>>>>> There is no support in u-boot to make a memory area non-cacheable.
>>>>>> This is the definition of dma_alloc_coherent()
>>>>>>
>>>>>> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>>>>>> {
>>>>>>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>>>>>>         return (void *)*handle;
>>>>>> }
>>>>>>
>>>>>> This driver is mostly copied from kernel (where dma_alloc_coherent() is
>>>>>> what you describe) and extra flush_cache functions are added because of
>>>>>> U-Boot's inability to allocate coherent memory.
>>>>>
>>>>> then that's what should be fixed. No?
>>>>>
>>>>
>>>> You're right but that sounds like a long-term feature which will affect
>>>> a huge part of u-boot. Until it is implemented, I guess this is the best
>>>> way to handle the issue.
>>>
>>> Not my call to make. I'll defer to Marek and Tom
>>>
>> We're deep in RC anyway, so feel free to prepare a fix for next MW .
>>
> 
> Fix as in rebase same patch for next merge window?

As in, add support for marking memory area noncachable and then use it
here. It shouldn't be hard, it's only about some MMU table attributes.
Faiz Abbas Oct. 17, 2017, 11:10 a.m. UTC | #11
Hey,

On Tuesday 17 October 2017 03:31 PM, Marek Vasut wrote:
> On 10/17/2017 07:25 AM, Faiz Abbas wrote:
>>
>>
>> On Monday 16 October 2017 08:52 PM, Marek Vasut wrote:
>>> On 10/16/2017 04:51 PM, Felipe Balbi wrote:
>>>>
>>>> Hi,
>>>>
>>>> Faiz Abbas <faiz_abbas@ti.com> writes:
>>>>>>>> Marek Vasut <marex@denx.de> writes:
>>>>>>>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>>>>>>>> A flush of the cache is required before any outbound DMA access can
>>>>>>>>>> take place. The minimum size that can be flushed from the cache is
>>>>>>>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>>>>>>>> be in multiples of cache line size.
>>>>>>>>>>
>>>>>>>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>>>>>>>
>>>>>>>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>>>>>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>>>>>>>> address dwc->ep0_trb is cache aligned.
>>>>>>>>>>
>>>>>>>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>>>>>>>
>>>>>>>>> SGTM, Felipe, can you review this please ?
>>>>>>>>
>>>>>>>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>>>>>>>> coherent memory area that is is non-cacheable, non-bufferable memory?
>>>>>>>>
>>>>>>>> Also, why isn't the API itself guaranteeing alignment requirements?
>>>>>>>>
>>>>>>> There is no support in u-boot to make a memory area non-cacheable.
>>>>>>> This is the definition of dma_alloc_coherent()
>>>>>>>
>>>>>>> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>>>>>>> {
>>>>>>>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>>>>>>>         return (void *)*handle;
>>>>>>> }
>>>>>>>
>>>>>>> This driver is mostly copied from kernel (where dma_alloc_coherent() is
>>>>>>> what you describe) and extra flush_cache functions are added because of
>>>>>>> U-Boot's inability to allocate coherent memory.
>>>>>>
>>>>>> then that's what should be fixed. No?
>>>>>>
>>>>>
>>>>> You're right but that sounds like a long-term feature which will affect
>>>>> a huge part of u-boot. Until it is implemented, I guess this is the best
>>>>> way to handle the issue.
>>>>
>>>> Not my call to make. I'll defer to Marek and Tom
>>>>
>>> We're deep in RC anyway, so feel free to prepare a fix for next MW .
>>>
>>
>> Fix as in rebase same patch for next merge window?
> 
> As in, add support for marking memory area noncachable and then use it
> here. It shouldn't be hard, it's only about some MMU table attributes.
> 

dma_alloc_coherent() is used by many architectures (arm, x86, nios2,
nds32). I can implement the feature in arm because I can test it but
someone else needs to do it for the other architectures.

Thanks,
Faiz
Marek Vasut Oct. 17, 2017, 11:14 a.m. UTC | #12
On 10/17/2017 01:10 PM, Faiz Abbas wrote:
> Hey,
> 
> On Tuesday 17 October 2017 03:31 PM, Marek Vasut wrote:
>> On 10/17/2017 07:25 AM, Faiz Abbas wrote:
>>>
>>>
>>> On Monday 16 October 2017 08:52 PM, Marek Vasut wrote:
>>>> On 10/16/2017 04:51 PM, Felipe Balbi wrote:
>>>>>
>>>>> Hi,
>>>>>
>>>>> Faiz Abbas <faiz_abbas@ti.com> writes:
>>>>>>>>> Marek Vasut <marex@denx.de> writes:
>>>>>>>>>> On 10/16/2017 07:21 AM, Faiz Abbas wrote:
>>>>>>>>>>> A flush of the cache is required before any outbound DMA access can
>>>>>>>>>>> take place. The minimum size that can be flushed from the cache is
>>>>>>>>>>> one cache line size. Therefore, any buffer allocated for DMA should
>>>>>>>>>>> be in multiples of cache line size.
>>>>>>>>>>>
>>>>>>>>>>> Thus, allocate memory for ep0_trb in multiples of cache line size.
>>>>>>>>>>>
>>>>>>>>>>> Also, when local variable trb is assigned to dwc->ep0_trb[1] and used
>>>>>>>>>>> to flush cache, it leads to cache misaligned messages as only the base
>>>>>>>>>>> address dwc->ep0_trb is cache aligned.
>>>>>>>>>>>
>>>>>>>>>>> Therefore, flush cache using ep0_trb_addr which is always cache aligned.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
>>>>>>>>>>
>>>>>>>>>> SGTM, Felipe, can you review this please ?
>>>>>>>>>
>>>>>>>>> is cache maintenance done correctly in u-boot? Isn't the whole idea of a
>>>>>>>>> coherent memory area that is is non-cacheable, non-bufferable memory?
>>>>>>>>>
>>>>>>>>> Also, why isn't the API itself guaranteeing alignment requirements?
>>>>>>>>>
>>>>>>>> There is no support in u-boot to make a memory area non-cacheable.
>>>>>>>> This is the definition of dma_alloc_coherent()
>>>>>>>>
>>>>>>>> static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>>>>>>>> {
>>>>>>>>         *handle = (unsigned long)memalign(ARCH_DMA_MINALIGN, len);
>>>>>>>>         return (void *)*handle;
>>>>>>>> }
>>>>>>>>
>>>>>>>> This driver is mostly copied from kernel (where dma_alloc_coherent() is
>>>>>>>> what you describe) and extra flush_cache functions are added because of
>>>>>>>> U-Boot's inability to allocate coherent memory.
>>>>>>>
>>>>>>> then that's what should be fixed. No?
>>>>>>>
>>>>>>
>>>>>> You're right but that sounds like a long-term feature which will affect
>>>>>> a huge part of u-boot. Until it is implemented, I guess this is the best
>>>>>> way to handle the issue.
>>>>>
>>>>> Not my call to make. I'll defer to Marek and Tom
>>>>>
>>>> We're deep in RC anyway, so feel free to prepare a fix for next MW .
>>>>
>>>
>>> Fix as in rebase same patch for next merge window?
>>
>> As in, add support for marking memory area noncachable and then use it
>> here. It shouldn't be hard, it's only about some MMU table attributes.
>>
> 
> dma_alloc_coherent() is used by many architectures (arm, x86, nios2,
> nds32). I can implement the feature in arm because I can test it but
> someone else needs to do it for the other architectures.

Sounds good.
diff mbox series

Patch

diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index e61d980..d4cc725 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -81,12 +81,12 @@  static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma,
 		trb->ctrl |= (DWC3_TRB_CTRL_IOC
 				| DWC3_TRB_CTRL_LST);
 
-	dwc3_flush_cache((uintptr_t)buf_dma, len);
-	dwc3_flush_cache((uintptr_t)trb, sizeof(*trb));
-
 	if (chain)
 		return 0;
 
+	dwc3_flush_cache((uintptr_t)buf_dma, len);
+	dwc3_flush_cache((uintptr_t)dwc->ep0_trb_addr, sizeof(*trb) * 2);
+
 	memset(&params, 0, sizeof(params));
 	params.param0 = upper_32_bits(dwc->ep0_trb_addr);
 	params.param1 = lower_32_bits(dwc->ep0_trb_addr);
@@ -790,7 +790,7 @@  static void dwc3_ep0_complete_data(struct dwc3 *dwc,
 	if (!r)
 		return;
 
-	dwc3_flush_cache((uintptr_t)trb, sizeof(*trb));
+	dwc3_flush_cache((uintptr_t)dwc->ep0_trb_addr, sizeof(*trb) * 2);
 
 	status = DWC3_TRB_SIZE_TRBSTS(trb->size);
 	if (status == DWC3_TRBSTS_SETUP_PENDING) {
@@ -821,7 +821,8 @@  static void dwc3_ep0_complete_data(struct dwc3 *dwc,
 			ur->actual += transferred;
 
 			trb++;
-			dwc3_flush_cache((uintptr_t)trb, sizeof(*trb));
+			dwc3_flush_cache((uintptr_t)dwc->ep0_trb_addr,
+					 sizeof(*trb) * 2);
 			length = trb->size & DWC3_TRB_SIZE_MASK;
 
 			ep0->free_slot = 0;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index e065c5a..895a5bc 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2567,7 +2567,8 @@  int dwc3_gadget_init(struct dwc3 *dwc)
 		goto err0;
 	}
 
-	dwc->ep0_trb = dma_alloc_coherent(sizeof(*dwc->ep0_trb) * 2,
+	dwc->ep0_trb = dma_alloc_coherent(ROUND(sizeof(*dwc->ep0_trb) * 2,
+						CACHELINE_SIZE),
 					  (unsigned long *)&dwc->ep0_trb_addr);
 	if (!dwc->ep0_trb) {
 		dev_err(dwc->dev, "failed to allocate ep0 trb\n");