diff mbox

[U-Boot,v2,35/40] ARM: Implement non-cached memory support

Message ID 1409067268-956-36-git-send-email-thierry.reding@gmail.com
State Changes Requested
Delegated to: Albert ARIBAUD
Headers show

Commit Message

Thierry Reding Aug. 26, 2014, 3:34 p.m. UTC
From: Thierry Reding <treding@nvidia.com>

Implement an API that can be used by drivers to allocate memory from a
pool that is mapped uncached. This is useful if drivers would otherwise
need to do extensive cache maintenance (or explicitly maintaining the
cache isn't safe).

The API is protected using the new CONFIG_SYS_NONCACHED_MEMORY setting.
Boards can set this to the size to be used for the non-cached area. The
area will typically be right below the malloc() area, but architectures
should take care of aligning the beginning and end of the area to honor
any mapping restrictions. Architectures must also ensure that mappings
established for this area do not overlap with the malloc() area (which
should remain cached for improved performance).

While the API is currently only implemented for ARM v7, it should be
generic enough to allow other architectures to implement it as well.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
Changes in v2:
- avoid overflow when checking for available space

 README                        | 19 +++++++++++++++++++
 arch/arm/include/asm/system.h |  5 +++++
 arch/arm/lib/cache.c          | 42 ++++++++++++++++++++++++++++++++++++++++++
 common/board_r.c              | 11 +++++++++++
 4 files changed, 77 insertions(+)

Comments

Simon Glass Aug. 27, 2014, 7:07 p.m. UTC | #1
Hi Thierry,

On 26 August 2014 09:34, Thierry Reding <thierry.reding@gmail.com> wrote:
> From: Thierry Reding <treding@nvidia.com>
>
> Implement an API that can be used by drivers to allocate memory from a
> pool that is mapped uncached. This is useful if drivers would otherwise
> need to do extensive cache maintenance (or explicitly maintaining the
> cache isn't safe).
>
> The API is protected using the new CONFIG_SYS_NONCACHED_MEMORY setting.
> Boards can set this to the size to be used for the non-cached area. The
> area will typically be right below the malloc() area, but architectures
> should take care of aligning the beginning and end of the area to honor
> any mapping restrictions. Architectures must also ensure that mappings
> established for this area do not overlap with the malloc() area (which
> should remain cached for improved performance).
>
> While the API is currently only implemented for ARM v7, it should be
> generic enough to allow other architectures to implement it as well.
>
> Signed-off-by: Thierry Reding <treding@nvidia.com>
> ---
> Changes in v2:
> - avoid overflow when checking for available space
>
>  README                        | 19 +++++++++++++++++++
>  arch/arm/include/asm/system.h |  5 +++++
>  arch/arm/lib/cache.c          | 42 ++++++++++++++++++++++++++++++++++++++++++
>  common/board_r.c              | 11 +++++++++++
>  4 files changed, 77 insertions(+)
>
> diff --git a/README b/README
> index 1d713596ec6f..526e06edb52c 100644
> --- a/README
> +++ b/README
> @@ -3759,6 +3759,25 @@ Configuration Settings:
>                 Pre-relocation malloc() is only supported on ARM at present
>                 but is fairly easy to enable for other archs.
>
> +- CONFIG_SYS_NONCACHED_MEMORY:
> +               Size of non-cached memory area. This area of memory will be
> +               typically located right below the malloc() area and mapped
> +               uncached in the MMU. This is useful for drivers that would
> +               otherwise require a lot of explicit cache maintenance. For
> +               some drivers it's also impossible to properly maintain the
> +               cache. For example if the regions that need to be flushed
> +               are not a multiple of the cache-line size, *and* padding
> +               cannot be allocated between the regions to align them (i.e.
> +               if the HW requires a contiguous array of regions, and the
> +               size of each region is not cache-aligned), then a flush of
> +               one region may result in overwriting data that hardware has
> +               written to another region in the same cache-line. This can
> +               happen for example in network drivers where descriptors for
> +               buffers are typically smaller than the CPU cache-line (e.g.
> +               16 bytes vs. 32 or 64 bytes).
> +
> +               Non-cached memory is only supported on 32-bit ARM at present.

Is this better than allocating 1MB with memalign() and then changing
the MMU settings with something like set_section_dcache()?

> +
>  - CONFIG_SYS_BOOTM_LEN:
>                 Normally compressed uImages are limited to an
>                 uncompressed size of 8 MBytes. If this is not enough,
> diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
> index fb31a8faf2b1..fb1bc0699a56 100644
> --- a/arch/arm/include/asm/system.h
> +++ b/arch/arm/include/asm/system.h
> @@ -211,6 +211,11 @@ void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
>   */
>  void mmu_page_table_flush(unsigned long start, unsigned long stop);
>
> +#ifdef CONFIG_SYS_NONCACHED_MEMORY
> +void noncached_init(void);
> +phys_addr_t noncached_alloc(size_t size, size_t align);
> +#endif /* CONFIG_SYS_NONCACHED_MEMORY */
> +
>  #endif /* __ASSEMBLY__ */
>
>  #define arch_align_stack(x) (x)
> diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c
> index 4e597a4c1d16..b2aa7740d4f8 100644
> --- a/arch/arm/lib/cache.c
> +++ b/arch/arm/lib/cache.c
> @@ -8,6 +8,7 @@
>  /* for now: just dummy functions to satisfy the linker */
>
>  #include <common.h>
> +#include <malloc.h>
>
>  __weak void flush_cache(unsigned long start, unsigned long size)
>  {
> @@ -49,3 +50,44 @@ __weak void enable_caches(void)
>  {
>         puts("WARNING: Caches not enabled\n");
>  }
> +
> +#ifdef CONFIG_SYS_NONCACHED_MEMORY
> +/*
> + * Reserve one MMU section worth of address space below the malloc() area that
> + * will be mapped uncached.

It looks like it can be >1 section.

> + */
> +static unsigned long noncached_start;
> +static unsigned long noncached_end;
> +static unsigned long noncached_next;

You can use ulong if you like.

> +
> +void noncached_init(void)
> +{
> +       phys_addr_t start, end;
> +       size_t size;
> +
> +       end = ALIGN(mem_malloc_start, MMU_SECTION_SIZE) - MMU_SECTION_SIZE;
> +       size = ALIGN(CONFIG_SYS_NONCACHED_MEMORY, MMU_SECTION_SIZE);
> +       start = end - size;
> +
> +       debug("mapping memory %pa-%pa non-cached\n", &start, &end);
> +
> +       noncached_start = start;
> +       noncached_end = end;
> +       noncached_next = start;
> +
> +       mmu_set_region_dcache_behaviour(noncached_start, size, DCACHE_OFF);
> +}
> +
> +phys_addr_t noncached_alloc(size_t size, size_t align)
> +{
> +       phys_addr_t next = ALIGN(noncached_next, align);
> +
> +       if (next >= noncached_end || (noncached_end - next) < size)
> +               return 0;
> +
> +       debug("allocated %zu bytes of uncached memory @%pa\n", size, &next);
> +       noncached_next = next + size;

Should this be aligned? I suspect it doesn't matter, and you do it
anyway on the next call.

> +
> +       return next;
> +}
> +#endif /* CONFIG_SYS_NONCACHED_MEMORY */
> diff --git a/common/board_r.c b/common/board_r.c
> index ba9a68dc6691..6b8e62bf032b 100644
> --- a/common/board_r.c
> +++ b/common/board_r.c
> @@ -270,6 +270,14 @@ static int initr_malloc(void)
>         return 0;
>  }
>
> +#ifdef CONFIG_SYS_NONCACHED_MEMORY
> +static int initr_noncached(void)
> +{
> +       noncached_init();

Could you make this function return 0 to avoid needing this helper?

> +       return 0;
> +}
> +#endif
> +
>  #ifdef CONFIG_DM
>  static int initr_dm(void)
>  {
> @@ -765,6 +773,9 @@ init_fnc_t init_sequence_r[] = {
>  #endif
>         initr_barrier,
>         initr_malloc,
> +#ifdef CONFIG_SYS_NONCACHED_MEMORY
> +       initr_noncached,
> +#endif
>         bootstage_relocate,
>  #ifdef CONFIG_DM
>         initr_dm,
> --
> 2.0.4
>

Regards,
Simon
Stephen Warren Oct. 24, 2014, 7:11 p.m. UTC | #2
On 08/26/2014 09:34 AM, Thierry Reding wrote:
> From: Thierry Reding <treding@nvidia.com>
>
> Implement an API that can be used by drivers to allocate memory from a
> pool that is mapped uncached. This is useful if drivers would otherwise
> need to do extensive cache maintenance (or explicitly maintaining the
> cache isn't safe).
>
> The API is protected using the new CONFIG_SYS_NONCACHED_MEMORY setting.
> Boards can set this to the size to be used for the non-cached area. The
> area will typically be right below the malloc() area, but architectures
> should take care of aligning the beginning and end of the area to honor
> any mapping restrictions. Architectures must also ensure that mappings
> established for this area do not overlap with the malloc() area (which
> should remain cached for improved performance).
>
> While the API is currently only implemented for ARM v7, it should be
> generic enough to allow other architectures to implement it as well.

> diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c

> +void noncached_init(void)
...
> +	mmu_set_region_dcache_behaviour(noncached_start, size, DCACHE_OFF);
> +}

If I build with:

#define CONFIG_SYS_DCACHE_OFF
#define CONFIG_SYS_ICACHE_OFF

... then mmu_set_region_dcache_behaviour() doesn't exist (or at least 
isn't linked in) on Jetson TK1 at least.
Simon Glass Nov. 12, 2014, 3:49 p.m. UTC | #3
Hi Stephen,

On 24 October 2014 13:11, Stephen Warren <swarren@wwwdotorg.org> wrote:
> On 08/26/2014 09:34 AM, Thierry Reding wrote:
>>
>> From: Thierry Reding <treding@nvidia.com>
>>
>> Implement an API that can be used by drivers to allocate memory from a
>> pool that is mapped uncached. This is useful if drivers would otherwise
>> need to do extensive cache maintenance (or explicitly maintaining the
>> cache isn't safe).
>>
>> The API is protected using the new CONFIG_SYS_NONCACHED_MEMORY setting.
>> Boards can set this to the size to be used for the non-cached area. The
>> area will typically be right below the malloc() area, but architectures
>> should take care of aligning the beginning and end of the area to honor
>> any mapping restrictions. Architectures must also ensure that mappings
>> established for this area do not overlap with the malloc() area (which
>> should remain cached for improved performance).
>>
>> While the API is currently only implemented for ARM v7, it should be
>> generic enough to allow other architectures to implement it as well.
>
>
>> diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c
>
>
>> +void noncached_init(void)
>
> ...
>>
>> +       mmu_set_region_dcache_behaviour(noncached_start, size,
>> DCACHE_OFF);
>> +}
>
>
> If I build with:
>
> #define CONFIG_SYS_DCACHE_OFF
> #define CONFIG_SYS_ICACHE_OFF
>
> ... then mmu_set_region_dcache_behaviour() doesn't exist (or at least isn't
> linked in) on Jetson TK1 at least.

I see that too. If you #undef CONFIG_SYS_NONCACHED_MEMORY then the
problem goes away. There is a warning in the network driver in this
case but it is not important.

A better solution is probably an #ifdef around the call you mention.
It minimised code changes when cache is on/off which is probably a
good thing.

Regards
Simon
diff mbox

Patch

diff --git a/README b/README
index 1d713596ec6f..526e06edb52c 100644
--- a/README
+++ b/README
@@ -3759,6 +3759,25 @@  Configuration Settings:
 		Pre-relocation malloc() is only supported on ARM at present
 		but is fairly easy to enable for other archs.
 
+- CONFIG_SYS_NONCACHED_MEMORY:
+		Size of non-cached memory area. This area of memory will be
+		typically located right below the malloc() area and mapped
+		uncached in the MMU. This is useful for drivers that would
+		otherwise require a lot of explicit cache maintenance. For
+		some drivers it's also impossible to properly maintain the
+		cache. For example if the regions that need to be flushed
+		are not a multiple of the cache-line size, *and* padding
+		cannot be allocated between the regions to align them (i.e.
+		if the HW requires a contiguous array of regions, and the
+		size of each region is not cache-aligned), then a flush of
+		one region may result in overwriting data that hardware has
+		written to another region in the same cache-line. This can
+		happen for example in network drivers where descriptors for
+		buffers are typically smaller than the CPU cache-line (e.g.
+		16 bytes vs. 32 or 64 bytes).
+
+		Non-cached memory is only supported on 32-bit ARM at present.
+
 - CONFIG_SYS_BOOTM_LEN:
 		Normally compressed uImages are limited to an
 		uncompressed size of 8 MBytes. If this is not enough,
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index fb31a8faf2b1..fb1bc0699a56 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -211,6 +211,11 @@  void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
  */
 void mmu_page_table_flush(unsigned long start, unsigned long stop);
 
+#ifdef CONFIG_SYS_NONCACHED_MEMORY
+void noncached_init(void);
+phys_addr_t noncached_alloc(size_t size, size_t align);
+#endif /* CONFIG_SYS_NONCACHED_MEMORY */
+
 #endif /* __ASSEMBLY__ */
 
 #define arch_align_stack(x) (x)
diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c
index 4e597a4c1d16..b2aa7740d4f8 100644
--- a/arch/arm/lib/cache.c
+++ b/arch/arm/lib/cache.c
@@ -8,6 +8,7 @@ 
 /* for now: just dummy functions to satisfy the linker */
 
 #include <common.h>
+#include <malloc.h>
 
 __weak void flush_cache(unsigned long start, unsigned long size)
 {
@@ -49,3 +50,44 @@  __weak void enable_caches(void)
 {
 	puts("WARNING: Caches not enabled\n");
 }
+
+#ifdef CONFIG_SYS_NONCACHED_MEMORY
+/*
+ * Reserve one MMU section worth of address space below the malloc() area that
+ * will be mapped uncached.
+ */
+static unsigned long noncached_start;
+static unsigned long noncached_end;
+static unsigned long noncached_next;
+
+void noncached_init(void)
+{
+	phys_addr_t start, end;
+	size_t size;
+
+	end = ALIGN(mem_malloc_start, MMU_SECTION_SIZE) - MMU_SECTION_SIZE;
+	size = ALIGN(CONFIG_SYS_NONCACHED_MEMORY, MMU_SECTION_SIZE);
+	start = end - size;
+
+	debug("mapping memory %pa-%pa non-cached\n", &start, &end);
+
+	noncached_start = start;
+	noncached_end = end;
+	noncached_next = start;
+
+	mmu_set_region_dcache_behaviour(noncached_start, size, DCACHE_OFF);
+}
+
+phys_addr_t noncached_alloc(size_t size, size_t align)
+{
+	phys_addr_t next = ALIGN(noncached_next, align);
+
+	if (next >= noncached_end || (noncached_end - next) < size)
+		return 0;
+
+	debug("allocated %zu bytes of uncached memory @%pa\n", size, &next);
+	noncached_next = next + size;
+
+	return next;
+}
+#endif /* CONFIG_SYS_NONCACHED_MEMORY */
diff --git a/common/board_r.c b/common/board_r.c
index ba9a68dc6691..6b8e62bf032b 100644
--- a/common/board_r.c
+++ b/common/board_r.c
@@ -270,6 +270,14 @@  static int initr_malloc(void)
 	return 0;
 }
 
+#ifdef CONFIG_SYS_NONCACHED_MEMORY
+static int initr_noncached(void)
+{
+	noncached_init();
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_DM
 static int initr_dm(void)
 {
@@ -765,6 +773,9 @@  init_fnc_t init_sequence_r[] = {
 #endif
 	initr_barrier,
 	initr_malloc,
+#ifdef CONFIG_SYS_NONCACHED_MEMORY
+	initr_noncached,
+#endif
 	bootstage_relocate,
 #ifdef CONFIG_DM
 	initr_dm,