Message ID | 1409067268-956-36-git-send-email-thierry.reding@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | Albert ARIBAUD |
Headers | show |
Hi Thierry, On 26 August 2014 09:34, Thierry Reding <thierry.reding@gmail.com> wrote: > From: Thierry Reding <treding@nvidia.com> > > Implement an API that can be used by drivers to allocate memory from a > pool that is mapped uncached. This is useful if drivers would otherwise > need to do extensive cache maintenance (or explicitly maintaining the > cache isn't safe). > > The API is protected using the new CONFIG_SYS_NONCACHED_MEMORY setting. > Boards can set this to the size to be used for the non-cached area. The > area will typically be right below the malloc() area, but architectures > should take care of aligning the beginning and end of the area to honor > any mapping restrictions. Architectures must also ensure that mappings > established for this area do not overlap with the malloc() area (which > should remain cached for improved performance). > > While the API is currently only implemented for ARM v7, it should be > generic enough to allow other architectures to implement it as well. > > Signed-off-by: Thierry Reding <treding@nvidia.com> > --- > Changes in v2: > - avoid overflow when checking for available space > > README | 19 +++++++++++++++++++ > arch/arm/include/asm/system.h | 5 +++++ > arch/arm/lib/cache.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > common/board_r.c | 11 +++++++++++ > 4 files changed, 77 insertions(+) > > diff --git a/README b/README > index 1d713596ec6f..526e06edb52c 100644 > --- a/README > +++ b/README > @@ -3759,6 +3759,25 @@ Configuration Settings: > Pre-relocation malloc() is only supported on ARM at present > but is fairly easy to enable for other archs. > > +- CONFIG_SYS_NONCACHED_MEMORY: > + Size of non-cached memory area. This area of memory will be > + typically located right below the malloc() area and mapped > + uncached in the MMU. This is useful for drivers that would > + otherwise require a lot of explicit cache maintenance. For > + some drivers it's also impossible to properly maintain the > + cache. For example if the regions that need to be flushed > + are not a multiple of the cache-line size, *and* padding > + cannot be allocated between the regions to align them (i.e. > + if the HW requires a contiguous array of regions, and the > + size of each region is not cache-aligned), then a flush of > + one region may result in overwriting data that hardware has > + written to another region in the same cache-line. This can > + happen for example in network drivers where descriptors for > + buffers are typically smaller than the CPU cache-line (e.g. > + 16 bytes vs. 32 or 64 bytes). > + > + Non-cached memory is only supported on 32-bit ARM at present. Is this better than allocating 1MB with memalign() and then changing the MMU settings with something like set_section_dcache()? > + > - CONFIG_SYS_BOOTM_LEN: > Normally compressed uImages are limited to an > uncompressed size of 8 MBytes. If this is not enough, > diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h > index fb31a8faf2b1..fb1bc0699a56 100644 > --- a/arch/arm/include/asm/system.h > +++ b/arch/arm/include/asm/system.h > @@ -211,6 +211,11 @@ void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size, > */ > void mmu_page_table_flush(unsigned long start, unsigned long stop); > > +#ifdef CONFIG_SYS_NONCACHED_MEMORY > +void noncached_init(void); > +phys_addr_t noncached_alloc(size_t size, size_t align); > +#endif /* CONFIG_SYS_NONCACHED_MEMORY */ > + > #endif /* __ASSEMBLY__ */ > > #define arch_align_stack(x) (x) > diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c > index 4e597a4c1d16..b2aa7740d4f8 100644 > --- a/arch/arm/lib/cache.c > +++ b/arch/arm/lib/cache.c > @@ -8,6 +8,7 @@ > /* for now: just dummy functions to satisfy the linker */ > > #include <common.h> > +#include <malloc.h> > > __weak void flush_cache(unsigned long start, unsigned long size) > { > @@ -49,3 +50,44 @@ __weak void enable_caches(void) > { > puts("WARNING: Caches not enabled\n"); > } > + > +#ifdef CONFIG_SYS_NONCACHED_MEMORY > +/* > + * Reserve one MMU section worth of address space below the malloc() area that > + * will be mapped uncached. It looks like it can be >1 section. > + */ > +static unsigned long noncached_start; > +static unsigned long noncached_end; > +static unsigned long noncached_next; You can use ulong if you like. > + > +void noncached_init(void) > +{ > + phys_addr_t start, end; > + size_t size; > + > + end = ALIGN(mem_malloc_start, MMU_SECTION_SIZE) - MMU_SECTION_SIZE; > + size = ALIGN(CONFIG_SYS_NONCACHED_MEMORY, MMU_SECTION_SIZE); > + start = end - size; > + > + debug("mapping memory %pa-%pa non-cached\n", &start, &end); > + > + noncached_start = start; > + noncached_end = end; > + noncached_next = start; > + > + mmu_set_region_dcache_behaviour(noncached_start, size, DCACHE_OFF); > +} > + > +phys_addr_t noncached_alloc(size_t size, size_t align) > +{ > + phys_addr_t next = ALIGN(noncached_next, align); > + > + if (next >= noncached_end || (noncached_end - next) < size) > + return 0; > + > + debug("allocated %zu bytes of uncached memory @%pa\n", size, &next); > + noncached_next = next + size; Should this be aligned? I suspect it doesn't matter, and you do it anyway on the next call. > + > + return next; > +} > +#endif /* CONFIG_SYS_NONCACHED_MEMORY */ > diff --git a/common/board_r.c b/common/board_r.c > index ba9a68dc6691..6b8e62bf032b 100644 > --- a/common/board_r.c > +++ b/common/board_r.c > @@ -270,6 +270,14 @@ static int initr_malloc(void) > return 0; > } > > +#ifdef CONFIG_SYS_NONCACHED_MEMORY > +static int initr_noncached(void) > +{ > + noncached_init(); Could you make this function return 0 to avoid needing this helper? > + return 0; > +} > +#endif > + > #ifdef CONFIG_DM > static int initr_dm(void) > { > @@ -765,6 +773,9 @@ init_fnc_t init_sequence_r[] = { > #endif > initr_barrier, > initr_malloc, > +#ifdef CONFIG_SYS_NONCACHED_MEMORY > + initr_noncached, > +#endif > bootstage_relocate, > #ifdef CONFIG_DM > initr_dm, > -- > 2.0.4 > Regards, Simon
On 08/26/2014 09:34 AM, Thierry Reding wrote: > From: Thierry Reding <treding@nvidia.com> > > Implement an API that can be used by drivers to allocate memory from a > pool that is mapped uncached. This is useful if drivers would otherwise > need to do extensive cache maintenance (or explicitly maintaining the > cache isn't safe). > > The API is protected using the new CONFIG_SYS_NONCACHED_MEMORY setting. > Boards can set this to the size to be used for the non-cached area. The > area will typically be right below the malloc() area, but architectures > should take care of aligning the beginning and end of the area to honor > any mapping restrictions. Architectures must also ensure that mappings > established for this area do not overlap with the malloc() area (which > should remain cached for improved performance). > > While the API is currently only implemented for ARM v7, it should be > generic enough to allow other architectures to implement it as well. > diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c > +void noncached_init(void) ... > + mmu_set_region_dcache_behaviour(noncached_start, size, DCACHE_OFF); > +} If I build with: #define CONFIG_SYS_DCACHE_OFF #define CONFIG_SYS_ICACHE_OFF ... then mmu_set_region_dcache_behaviour() doesn't exist (or at least isn't linked in) on Jetson TK1 at least.
Hi Stephen, On 24 October 2014 13:11, Stephen Warren <swarren@wwwdotorg.org> wrote: > On 08/26/2014 09:34 AM, Thierry Reding wrote: >> >> From: Thierry Reding <treding@nvidia.com> >> >> Implement an API that can be used by drivers to allocate memory from a >> pool that is mapped uncached. This is useful if drivers would otherwise >> need to do extensive cache maintenance (or explicitly maintaining the >> cache isn't safe). >> >> The API is protected using the new CONFIG_SYS_NONCACHED_MEMORY setting. >> Boards can set this to the size to be used for the non-cached area. The >> area will typically be right below the malloc() area, but architectures >> should take care of aligning the beginning and end of the area to honor >> any mapping restrictions. Architectures must also ensure that mappings >> established for this area do not overlap with the malloc() area (which >> should remain cached for improved performance). >> >> While the API is currently only implemented for ARM v7, it should be >> generic enough to allow other architectures to implement it as well. > > >> diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c > > >> +void noncached_init(void) > > ... >> >> + mmu_set_region_dcache_behaviour(noncached_start, size, >> DCACHE_OFF); >> +} > > > If I build with: > > #define CONFIG_SYS_DCACHE_OFF > #define CONFIG_SYS_ICACHE_OFF > > ... then mmu_set_region_dcache_behaviour() doesn't exist (or at least isn't > linked in) on Jetson TK1 at least. I see that too. If you #undef CONFIG_SYS_NONCACHED_MEMORY then the problem goes away. There is a warning in the network driver in this case but it is not important. A better solution is probably an #ifdef around the call you mention. It minimised code changes when cache is on/off which is probably a good thing. Regards Simon
diff --git a/README b/README index 1d713596ec6f..526e06edb52c 100644 --- a/README +++ b/README @@ -3759,6 +3759,25 @@ Configuration Settings: Pre-relocation malloc() is only supported on ARM at present but is fairly easy to enable for other archs. +- CONFIG_SYS_NONCACHED_MEMORY: + Size of non-cached memory area. This area of memory will be + typically located right below the malloc() area and mapped + uncached in the MMU. This is useful for drivers that would + otherwise require a lot of explicit cache maintenance. For + some drivers it's also impossible to properly maintain the + cache. For example if the regions that need to be flushed + are not a multiple of the cache-line size, *and* padding + cannot be allocated between the regions to align them (i.e. + if the HW requires a contiguous array of regions, and the + size of each region is not cache-aligned), then a flush of + one region may result in overwriting data that hardware has + written to another region in the same cache-line. This can + happen for example in network drivers where descriptors for + buffers are typically smaller than the CPU cache-line (e.g. + 16 bytes vs. 32 or 64 bytes). + + Non-cached memory is only supported on 32-bit ARM at present. + - CONFIG_SYS_BOOTM_LEN: Normally compressed uImages are limited to an uncompressed size of 8 MBytes. If this is not enough, diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index fb31a8faf2b1..fb1bc0699a56 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -211,6 +211,11 @@ void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size, */ void mmu_page_table_flush(unsigned long start, unsigned long stop); +#ifdef CONFIG_SYS_NONCACHED_MEMORY +void noncached_init(void); +phys_addr_t noncached_alloc(size_t size, size_t align); +#endif /* CONFIG_SYS_NONCACHED_MEMORY */ + #endif /* __ASSEMBLY__ */ #define arch_align_stack(x) (x) diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index 4e597a4c1d16..b2aa7740d4f8 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -8,6 +8,7 @@ /* for now: just dummy functions to satisfy the linker */ #include <common.h> +#include <malloc.h> __weak void flush_cache(unsigned long start, unsigned long size) { @@ -49,3 +50,44 @@ __weak void enable_caches(void) { puts("WARNING: Caches not enabled\n"); } + +#ifdef CONFIG_SYS_NONCACHED_MEMORY +/* + * Reserve one MMU section worth of address space below the malloc() area that + * will be mapped uncached. + */ +static unsigned long noncached_start; +static unsigned long noncached_end; +static unsigned long noncached_next; + +void noncached_init(void) +{ + phys_addr_t start, end; + size_t size; + + end = ALIGN(mem_malloc_start, MMU_SECTION_SIZE) - MMU_SECTION_SIZE; + size = ALIGN(CONFIG_SYS_NONCACHED_MEMORY, MMU_SECTION_SIZE); + start = end - size; + + debug("mapping memory %pa-%pa non-cached\n", &start, &end); + + noncached_start = start; + noncached_end = end; + noncached_next = start; + + mmu_set_region_dcache_behaviour(noncached_start, size, DCACHE_OFF); +} + +phys_addr_t noncached_alloc(size_t size, size_t align) +{ + phys_addr_t next = ALIGN(noncached_next, align); + + if (next >= noncached_end || (noncached_end - next) < size) + return 0; + + debug("allocated %zu bytes of uncached memory @%pa\n", size, &next); + noncached_next = next + size; + + return next; +} +#endif /* CONFIG_SYS_NONCACHED_MEMORY */ diff --git a/common/board_r.c b/common/board_r.c index ba9a68dc6691..6b8e62bf032b 100644 --- a/common/board_r.c +++ b/common/board_r.c @@ -270,6 +270,14 @@ static int initr_malloc(void) return 0; } +#ifdef CONFIG_SYS_NONCACHED_MEMORY +static int initr_noncached(void) +{ + noncached_init(); + return 0; +} +#endif + #ifdef CONFIG_DM static int initr_dm(void) { @@ -765,6 +773,9 @@ init_fnc_t init_sequence_r[] = { #endif initr_barrier, initr_malloc, +#ifdef CONFIG_SYS_NONCACHED_MEMORY + initr_noncached, +#endif bootstage_relocate, #ifdef CONFIG_DM initr_dm,