Patchwork [U-Boot,v2,11/12] arm: add MMU/d-cache support for Faraday cores

login
register
mail settings
Submitter Kuo-Jung Su
Date April 18, 2013, 9:25 a.m.
Message ID <1366277139-29728-12-git-send-email-dantesu@gmail.com>
Download mbox | patch
Permalink /patch/237583/
State Superseded
Delegated to: Albert ARIBAUD
Headers show

Comments

Kuo-Jung Su - April 18, 2013, 9:25 a.m.
From: Kuo-Jung Su <dantesu@faraday-tech.com>

This patch would enable MMU for Faraday ARMv5TE cores.

Here is the abstract of this MMU design.

Assume SDRAM memory region starts at 0x10000000, and its size = 0x800000.

0x00000000 +-------------------+
           |                   |
           |     UN-CACHED     |
           |                   |
           |                   |
0x10000000 +-------------------+
           |  CACHED (SDRAM)   | <- It's where data/bss/stack lived.
           |                   |
           |                   |
0x10800000 +-------------------+
           |                   |
           |                   |
           |     UN-CACHED     |
           |                   |
           |                   |
0xFF800000 +-------------------+
           | UN-CACHED (SDRAM) | <- An un-cached shadow of the SDRAM.
           |                   |    dma_alloc_coherent() always returns
           |                   |    an address in this region.
0xFFFFFFFF +-------------------+

Signed-off-by: Kuo-Jung Su <dantesu@faraday-tech.com>
CC: Albert Aribaud <albert.u.boot@aribaud.net>
---
 arch/arm/include/asm/dma-mapping.h |   56 ++++++++++++++++++++++--
 arch/arm/include/asm/global_data.h |    4 ++
 arch/arm/include/asm/io.h          |   84 +++++++++++++++++++++++++++++++++++-
 arch/arm/lib/cache-cp15.c          |   42 ++++++++++++++++++
 common/cmd_boot.c                  |    4 ++
 5 files changed, 186 insertions(+), 4 deletions(-)

--
1.7.9.5
Wolfgang Denk - April 18, 2013, 11:13 a.m.
Dear Kuo-Jung Su,

In message <1366277139-29728-12-git-send-email-dantesu@gmail.com> you wrote:
...
> --- a/common/cmd_boot.c
> +++ b/common/cmd_boot.c
> @@ -50,6 +50,10 @@ static int do_go(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
> 
>  	printf ("## Starting application at 0x%08lX ...\n", addr);
> 
> +#if defined(__ARM__) && !defined(CONFIG_SYS_DCACHE_OFF)
> +	cleanup_before_linux();
> +#endif
> +
>  	/*
>  	 * pass address parameter as argv[0] (aka command name),
>  	 * and all remaining args

Thios affects global code. Please submit as separate patch.

And why exactly is this ARM specific?

Best regards,

Wolfgang Denk
Albert ARIBAUD - April 18, 2013, 7:09 p.m.
Hi Kuo-Jung,

On Thu, 18 Apr 2013 17:25:38 +0800, Kuo-Jung Su <dantesu@gmail.com>
wrote:

> From: Kuo-Jung Su <dantesu@faraday-tech.com>
> 
> This patch would enable MMU for Faraday ARMv5TE cores.
> 
> Here is the abstract of this MMU design.
> 
> Assume SDRAM memory region starts at 0x10000000, and its size = 0x800000.
> 
> 0x00000000 +-------------------+
>            |                   |
>            |     UN-CACHED     |
>            |                   |
>            |                   |
> 0x10000000 +-------------------+
>            |  CACHED (SDRAM)   | <- It's where data/bss/stack lived.
>            |                   |
>            |                   |
> 0x10800000 +-------------------+
>            |                   |
>            |                   |
>            |     UN-CACHED     |
>            |                   |
>            |                   |
> 0xFF800000 +-------------------+
>            | UN-CACHED (SDRAM) | <- An un-cached shadow of the SDRAM.
>            |                   |    dma_alloc_coherent() always returns
>            |                   |    an address in this region.
> 0xFFFFFFFF +-------------------+

The ASCII map is great for explaining, but I find it a bit big for a
commit message. Can you summarize it as lines like

0x00000000-0x0FFFFFFF  not cached
0x10000000-0x107FFFFF  cached (SDRAM)
...

?

> Signed-off-by: Kuo-Jung Su <dantesu@faraday-tech.com>
> CC: Albert Aribaud <albert.u.boot@aribaud.net>
> ---
>  arch/arm/include/asm/dma-mapping.h |   56 ++++++++++++++++++++++--
>  arch/arm/include/asm/global_data.h |    4 ++
>  arch/arm/include/asm/io.h          |   84 +++++++++++++++++++++++++++++++++++-
>  arch/arm/lib/cache-cp15.c          |   42 ++++++++++++++++++
>  common/cmd_boot.c                  |    4 ++
>  5 files changed, 186 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
> index 5bbb0a0..53c4edf 100644
> --- a/arch/arm/include/asm/dma-mapping.h
> +++ b/arch/arm/include/asm/dma-mapping.h
> @@ -3,6 +3,9 @@
>   * Stelian Pop <stelian@popies.net>
>   * Lead Tech Design <www.leadtechdesign.com>
>   *
> + * (C) Copyright 2010
> + * Dante Su <dantesu@faraday-tech.com>
> + *
>   * See file CREDITS for list of people who contributed to this
>   * project.
>   *
> @@ -24,22 +27,69 @@
>  #ifndef __ASM_ARM_DMA_MAPPING_H
>  #define __ASM_ARM_DMA_MAPPING_H
> 
> +#include <asm/u-boot.h>
> +#include <asm/global_data.h>
> +#include <asm/io.h>
> +#include <malloc.h>
> +
>  enum dma_data_direction {
>  	DMA_BIDIRECTIONAL	= 0,
>  	DMA_TO_DEVICE		= 1,
>  	DMA_FROM_DEVICE		= 2,
>  };
> 
> -static void *dma_alloc_coherent(size_t len, unsigned long *handle)
> +static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
> +{
> +#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
> +	DECLARE_GLOBAL_DATA_PTR;

I'd rather have the global data ptr be declared outside any function,
and only once.

> +#endif
> +	void *va = memalign(ARCH_DMA_MINALIGN, len);
> +
> +	if (va && handle)
> +		*handle = virt_to_phys(va);
> +
> +#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
> +	if (gd->arch.cpu_mmu) {
> +		/* invalidate the buffer, convert to un-cached address */
> +		if (va != NULL) {
> +			invalidate_dcache_range((ulong)va, (ulong)va + len);
> +			va = virt_to_uncached(va);
> +		}
> +	}
> +#endif
> +
> +	return va;
> +}
> +
> +static inline void dma_free_coherent(void *va)
>  {
> -	*handle = (unsigned long)malloc(len);
> -	return (void *)*handle;
> +	free(virt_to_cached(va));
>  }

If I read this correctly, this code changes the semantics of
dma_alloc_coherent() for boards other than Faraday-based: before,
mempry was simply malloc()ed, now it would be memalign()ed then
virt_to_phys()ed. Why not simply keep the previous implementation under
a #else...#endif block?

>  static inline unsigned long dma_map_single(volatile void *vaddr, size_t len,
>  					   enum dma_data_direction dir)
>  {
> +#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
> +	DECLARE_GLOBAL_DATA_PTR;
> +
> +	if (gd->arch.cpu_mmu) {
> +		switch (dir) {
> +		case DMA_BIDIRECTIONAL:
> +		case DMA_TO_DEVICE:
> +			flush_dcache_range((ulong)vaddr,
> +				(ulong)vaddr + len);
> +			break;
> +
> +		case DMA_FROM_DEVICE:
> +			invalidate_dcache_range((ulong)vaddr,
> +				(ulong)vaddr + len);
> +			break;
> +		}
> +	}
> +	return virt_to_phys((void *)vaddr);
> +#else
>  	return (unsigned long)vaddr;
> +#endif
>  }

Here we have such a #else/#endif, which makes sure non-Farady boards
are unaffected.

>  static inline void dma_unmap_single(volatile void *vaddr, size_t len,
> diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
> index 37ac0da..bd18ff7 100644
> --- a/arch/arm/include/asm/global_data.h
> +++ b/arch/arm/include/asm/global_data.h
> @@ -38,6 +38,10 @@ struct arch_global_data {
>  	unsigned long	pllb_rate_hz;
>  	unsigned long	at91_pllb_usb_init;
>  #endif
> +#ifdef CONFIG_FARADAY
> +	unsigned long   cpu_id;
> +	unsigned long   cpu_mmu;	/* has mmu */
> +#endif
>  	/* "static data" needed by most of timer.c on ARM platforms */
>  	unsigned long timer_rate_hz;
>  	unsigned long tbu;
> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> index 1fbc531..17d8898 100644
> --- a/arch/arm/include/asm/io.h
> +++ b/arch/arm/include/asm/io.h
> @@ -2,6 +2,7 @@
>   *  linux/include/asm-arm/io.h
>   *
>   *  Copyright (C) 1996-2000 Russell King
> + *  Copyright (C) 2009-2010 Dante Su <dantesu@faraday-tech.com>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License version 2 as
> @@ -23,6 +24,8 @@
>  #ifdef __KERNEL__
> 
>  #include <linux/types.h>
> +#include <asm/u-boot.h>
> +#include <asm/global_data.h>
>  #include <asm/byteorder.h>
>  #include <asm/memory.h>
>  #if 0	/* XXX###XXX */
> @@ -57,9 +60,88 @@ static inline void unmap_physmem(void *vaddr, unsigned long flags)
> 
>  }
> 
> +#ifdef CONFIG_FARADAY
> +
> +# ifndef CONFIG_SYS_DCACHE_OFF
> +
> +static inline ulong uncached_base(volatile gd_t *gd)
> +{
> +	return (4096 - (gd->ram_size >> 20)) << 20;

Symbolic constants or a comment would not hurt here.

> +}
> +# endif
> +
> +static inline void *virt_to_cached(void *va)
> +{
> +# ifndef CONFIG_SYS_DCACHE_OFF
> +	DECLARE_GLOBAL_DATA_PTR;
> +	ulong base = uncached_base(gd);
> +
> +	if (!gd->arch.cpu_mmu)
> +		return va;
> +
> +	if ((ulong)va >= base &&
> +		(ulong)va < (base + gd->ram_size))
> +		va = (void *)((ulong)va - base + CONFIG_SYS_SDRAM_BASE);
> +# endif	/* !CONFIG_SYS_DCACHE_OFF */
> +
> +	return va;
> +}
> +
> +static inline void *virt_to_uncached(void *va)
> +{
> +# ifndef CONFIG_SYS_DCACHE_OFF
> +	DECLARE_GLOBAL_DATA_PTR;
> +	ulong base = uncached_base(gd);
> +
> +	if (!gd->arch.cpu_mmu)
> +		return va;
> +
> +#  ifdef CONFIG_USE_IRQ
> +	if ((ulong)va < SZ_1M)
> +		return (void *)(base + (ulong)va);
> +#  endif
> +
> +	if ((ulong)va >= CONFIG_SYS_SDRAM_BASE &&
> +		(ulong)va < (CONFIG_SYS_SDRAM_BASE + gd->ram_size))
> +		va = (void *)(base + ((ulong)va - CONFIG_SYS_SDRAM_BASE));
> +# endif	/* !CONFIG_SYS_DCACHE_OFF */
> +
> +	return va;
> +}
> +
> +#endif	/* CONFIG_FARADAY */
> +
>  static inline phys_addr_t virt_to_phys(void * vaddr)
>  {
> -	return (phys_addr_t)(vaddr);
> +#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
> +
> +	DECLARE_GLOBAL_DATA_PTR;
> +	bd_t *bd = gd->bd;
> +	ulong base = uncached_base(gd);
> +	ulong phys = (ulong)vaddr;
> +
> +	if (!gd->arch.cpu_mmu)
> +		return (phys_addr_t)phys;
> +
> +	if (phys >= base) {
> +		ulong bank;
> +		ulong off = phys - base;
> +		for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; ++bank) {
> +			if (bd->bi_dram[bank].size > off)
> +				break;
> +			off -= bd->bi_dram[bank].size;
> +		}
> +		phys = bd->bi_dram[bank].start + off;
> +	}
> +# ifdef CONFIG_USE_IRQ
> +	else if (phys < SZ_1M && bd->bi_dram[0].start != 0)
> +		phys = bd->bi_dram[0].start + phys;
> +# endif
> +
> +	return (phys_addr_t)phys;
> +#else
> +	return (phys_addr_t)vaddr;
> +#endif
>  }
> 
>  /*
> diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c
> index 4abe1cf..eee8585 100644
> --- a/arch/arm/lib/cache-cp15.c
> +++ b/arch/arm/lib/cache-cp15.c
> @@ -1,6 +1,8 @@
>  /*
>   * (C) Copyright 2002
>   * Wolfgang Denk, DENX Software Engineering, wd@denx.de.
> + * (C) Copyright 2010
> + * Dante Su <dantesu@faraday-tech.com>
>   *
>   * See file CREDITS for list of people who contributed to this
>   * project.
> @@ -87,6 +89,10 @@ __weak void dram_bank_mmu_setup(int bank)
>  {
>  	bd_t *bd = gd->bd;
>  	int	i;
> +#ifdef CONFIG_FARADAY
> +	ulong ubase, off;
> +	u32 *page_table = (u32 *)gd->arch.tlb_addr;
> +#endif
> 
>  	debug("%s: bank: %d\n", __func__, bank);
>  	for (i = bd->bi_dram[bank].start >> 20;
> @@ -98,6 +104,32 @@ __weak void dram_bank_mmu_setup(int bank)
>  		set_section_dcache(i, DCACHE_WRITEBACK);
>  #endif
>  	}
> +#ifdef CONFIG_FARADAY
> +# ifdef CONFIG_USE_IRQ
> +	/* map the exception table to 0x00000000 if necessary */
> +	if (bank == 0 && bd->bi_dram[bank].start != 0) {
> +		u32 pa = bd->bi_dram[bank].start;
> +#if defined(CONFIG_SYS_ARM_CACHE_WRITETHROUGH)
> +		page_table[0] = pa | (3 << 10) | DCACHE_WRITETHROUGH;
> +#else
> +		page_table[0] = pa | (3 << 10) | DCACHE_WRITEBACK;
> +#endif
> +	}
> +# endif
> +	/* calculate address offset */
> +	off  = 0;
> +	for (i = 0; i < bank; ++i)
> +		off += bd->bi_dram[bank].size;
> +
> +	/* create memory map */
> +	ubase = (4096 - (gd->ram_size >> 20)) << 20;
> +	for (i = 0; i < bd->bi_dram[bank].size >> 20; ++i) {
> +		u32 pa = bd->bi_dram[bank].start + (i << 20);
> +		/* create un-cached address map */
> +		u32 va = ubase + off + (i << 20);
> +		page_table[va >> 20] = pa | (3 << 10) | DCACHE_OFF;
> +	}
> +#endif
>  }
> 
>  /* to activate the MMU we need to set up virtual memory: use 1M areas */
> @@ -126,6 +158,10 @@ static inline void mmu_setup(void)
> 
>  	/* and enable the mmu */
>  	reg = get_cr();	/* get control reg. */
> +#ifdef CONFIG_FARADAY
> +	reg |= CR_W;	/* enable write buffer */
> +	reg |= CR_Z;	/* enable branch prediction */
> +#endif
>  	cp_delay();
>  	set_cr(reg | CR_M);
>  }
> @@ -140,9 +176,15 @@ static void cache_enable(uint32_t cache_bit)
>  {
>  	uint32_t reg;
> 
> +#ifdef CONFIG_FARADAY
> +	if (!gd->arch.cpu_mmu && (cache_bit == CR_C))
> +		return;
> +#endif
> +
>  	/* The data cache is not active unless the mmu is enabled too */
>  	if ((cache_bit == CR_C) && !mmu_enabled())
>  		mmu_setup();
> +
>  	reg = get_cr();	/* get control reg. */
>  	cp_delay();
>  	set_cr(reg | cache_bit);
> diff --git a/common/cmd_boot.c b/common/cmd_boot.c
> index d3836fd..b2477e8 100644
> --- a/common/cmd_boot.c
> +++ b/common/cmd_boot.c
> @@ -50,6 +50,10 @@ static int do_go(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
> 
>  	printf ("## Starting application at 0x%08lX ...\n", addr);
> 
> +#if defined(__ARM__) && !defined(CONFIG_SYS_DCACHE_OFF)
> +	cleanup_before_linux();
> +#endif
> +
>  	/*
>  	 * pass address parameter as argv[0] (aka command name),
>  	 * and all remaining args
> --
> 1.7.9.5
> 


Amicalement,
Kuo-Jung Su - April 22, 2013, 1:23 a.m.
2013/4/18 Wolfgang Denk <wd@denx.de>:
> Dear Kuo-Jung Su,
>
> In message <1366277139-29728-12-git-send-email-dantesu@gmail.com> you wrote:
> ...
>> --- a/common/cmd_boot.c
>> +++ b/common/cmd_boot.c
>> @@ -50,6 +50,10 @@ static int do_go(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
>>
>>       printf ("## Starting application at 0x%08lX ...\n", addr);
>>
>> +#if defined(__ARM__) && !defined(CONFIG_SYS_DCACHE_OFF)
>> +     cleanup_before_linux();
>> +#endif
>> +
>>       /*
>>        * pass address parameter as argv[0] (aka command name),
>>        * and all remaining args
>
> Thios affects global code. Please submit as separate patch.

Got it, thanks

>
> And why exactly is this ARM specific?
>

Because it only has been tested on ARM platform only,
I'm not sure if the others arch. also has the function: cleanup_before_linux();

> Best regards,
>
> Wolfgang Denk
>
> --
> DENX Software Engineering GmbH,     MD: Wolfgang Denk & Detlev Zundel
> HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
> Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de
> My play was a complete success.  The audience was a failure.



--
Best wishes,
Kuo-Jung Su
Kuo-Jung Su - April 22, 2013, 1:27 a.m.
2013/4/19 Albert ARIBAUD <albert.u.boot@aribaud.net>:
> Hi Kuo-Jung,
>
> On Thu, 18 Apr 2013 17:25:38 +0800, Kuo-Jung Su <dantesu@gmail.com>
> wrote:
>
>> From: Kuo-Jung Su <dantesu@faraday-tech.com>
>>
>> This patch would enable MMU for Faraday ARMv5TE cores.
>>
>> Here is the abstract of this MMU design.
>>
>> Assume SDRAM memory region starts at 0x10000000, and its size = 0x800000.
>>
>> 0x00000000 +-------------------+
>>            |                   |
>>            |     UN-CACHED     |
>>            |                   |
>>            |                   |
>> 0x10000000 +-------------------+
>>            |  CACHED (SDRAM)   | <- It's where data/bss/stack lived.
>>            |                   |
>>            |                   |
>> 0x10800000 +-------------------+
>>            |                   |
>>            |                   |
>>            |     UN-CACHED     |
>>            |                   |
>>            |                   |
>> 0xFF800000 +-------------------+
>>            | UN-CACHED (SDRAM) | <- An un-cached shadow of the SDRAM.
>>            |                   |    dma_alloc_coherent() always returns
>>            |                   |    an address in this region.
>> 0xFFFFFFFF +-------------------+
>
> The ASCII map is great for explaining, but I find it a bit big for a
> commit message. Can you summarize it as lines like
>
> 0x00000000-0x0FFFFFFF  not cached
> 0x10000000-0x107FFFFF  cached (SDRAM)
> ...
>
> ?
>

Sure,
it would be updated in that way at next patch.

>> Signed-off-by: Kuo-Jung Su <dantesu@faraday-tech.com>
>> CC: Albert Aribaud <albert.u.boot@aribaud.net>
>> ---
>>  arch/arm/include/asm/dma-mapping.h |   56 ++++++++++++++++++++++--
>>  arch/arm/include/asm/global_data.h |    4 ++
>>  arch/arm/include/asm/io.h          |   84 +++++++++++++++++++++++++++++++++++-
>>  arch/arm/lib/cache-cp15.c          |   42 ++++++++++++++++++
>>  common/cmd_boot.c                  |    4 ++
>>  5 files changed, 186 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
>> index 5bbb0a0..53c4edf 100644
>> --- a/arch/arm/include/asm/dma-mapping.h
>> +++ b/arch/arm/include/asm/dma-mapping.h
>> @@ -3,6 +3,9 @@
>>   * Stelian Pop <stelian@popies.net>
>>   * Lead Tech Design <www.leadtechdesign.com>
>>   *
>> + * (C) Copyright 2010
>> + * Dante Su <dantesu@faraday-tech.com>
>> + *
>>   * See file CREDITS for list of people who contributed to this
>>   * project.
>>   *
>> @@ -24,22 +27,69 @@
>>  #ifndef __ASM_ARM_DMA_MAPPING_H
>>  #define __ASM_ARM_DMA_MAPPING_H
>>
>> +#include <asm/u-boot.h>
>> +#include <asm/global_data.h>
>> +#include <asm/io.h>
>> +#include <malloc.h>
>> +
>>  enum dma_data_direction {
>>       DMA_BIDIRECTIONAL       = 0,
>>       DMA_TO_DEVICE           = 1,
>>       DMA_FROM_DEVICE         = 2,
>>  };
>>
>> -static void *dma_alloc_coherent(size_t len, unsigned long *handle)
>> +static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
>> +{
>> +#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
>> +     DECLARE_GLOBAL_DATA_PTR;
>
> I'd rather have the global data ptr be declared outside any function,
> and only once.
>

Got it, thanks

>> +#endif
>> +     void *va = memalign(ARCH_DMA_MINALIGN, len);
>> +
>> +     if (va && handle)
>> +             *handle = virt_to_phys(va);
>> +
>> +#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
>> +     if (gd->arch.cpu_mmu) {
>> +             /* invalidate the buffer, convert to un-cached address */
>> +             if (va != NULL) {
>> +                     invalidate_dcache_range((ulong)va, (ulong)va + len);
>> +                     va = virt_to_uncached(va);
>> +             }
>> +     }
>> +#endif
>> +
>> +     return va;
>> +}
>> +
>> +static inline void dma_free_coherent(void *va)
>>  {
>> -     *handle = (unsigned long)malloc(len);
>> -     return (void *)*handle;
>> +     free(virt_to_cached(va));
>>  }
>
> If I read this correctly, this code changes the semantics of
> dma_alloc_coherent() for boards other than Faraday-based: before,
> mempry was simply malloc()ed, now it would be memalign()ed then
> virt_to_phys()ed. Why not simply keep the previous implementation under
> a #else...#endif block?
>

Sorry, it's an accident, I'll have it fixed at next version.

>>  static inline unsigned long dma_map_single(volatile void *vaddr, size_t len,
>>                                          enum dma_data_direction dir)
>>  {
>> +#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
>> +     DECLARE_GLOBAL_DATA_PTR;
>> +
>> +     if (gd->arch.cpu_mmu) {
>> +             switch (dir) {
>> +             case DMA_BIDIRECTIONAL:
>> +             case DMA_TO_DEVICE:
>> +                     flush_dcache_range((ulong)vaddr,
>> +                             (ulong)vaddr + len);
>> +                     break;
>> +
>> +             case DMA_FROM_DEVICE:
>> +                     invalidate_dcache_range((ulong)vaddr,
>> +                             (ulong)vaddr + len);
>> +                     break;
>> +             }
>> +     }
>> +     return virt_to_phys((void *)vaddr);
>> +#else
>>       return (unsigned long)vaddr;
>> +#endif
>>  }
>
> Here we have such a #else/#endif, which makes sure non-Farady boards
> are unaffected.
>
>>  static inline void dma_unmap_single(volatile void *vaddr, size_t len,
>> diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
>> index 37ac0da..bd18ff7 100644
>> --- a/arch/arm/include/asm/global_data.h
>> +++ b/arch/arm/include/asm/global_data.h
>> @@ -38,6 +38,10 @@ struct arch_global_data {
>>       unsigned long   pllb_rate_hz;
>>       unsigned long   at91_pllb_usb_init;
>>  #endif
>> +#ifdef CONFIG_FARADAY
>> +     unsigned long   cpu_id;
>> +     unsigned long   cpu_mmu;        /* has mmu */
>> +#endif
>>       /* "static data" needed by most of timer.c on ARM platforms */
>>       unsigned long timer_rate_hz;
>>       unsigned long tbu;
>> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
>> index 1fbc531..17d8898 100644
>> --- a/arch/arm/include/asm/io.h
>> +++ b/arch/arm/include/asm/io.h
>> @@ -2,6 +2,7 @@
>>   *  linux/include/asm-arm/io.h
>>   *
>>   *  Copyright (C) 1996-2000 Russell King
>> + *  Copyright (C) 2009-2010 Dante Su <dantesu@faraday-tech.com>
>>   *
>>   * This program is free software; you can redistribute it and/or modify
>>   * it under the terms of the GNU General Public License version 2 as
>> @@ -23,6 +24,8 @@
>>  #ifdef __KERNEL__
>>
>>  #include <linux/types.h>
>> +#include <asm/u-boot.h>
>> +#include <asm/global_data.h>
>>  #include <asm/byteorder.h>
>>  #include <asm/memory.h>
>>  #if 0        /* XXX###XXX */
>> @@ -57,9 +60,88 @@ static inline void unmap_physmem(void *vaddr, unsigned long flags)
>>
>>  }
>>
>> +#ifdef CONFIG_FARADAY
>> +
>> +# ifndef CONFIG_SYS_DCACHE_OFF
>> +
>> +static inline ulong uncached_base(volatile gd_t *gd)
>> +{
>> +     return (4096 - (gd->ram_size >> 20)) << 20;
>
> Symbolic constants or a comment would not hurt here.
>

Got it, thanks

>> +}
>> +# endif
>> +
>> +static inline void *virt_to_cached(void *va)
>> +{
>> +# ifndef CONFIG_SYS_DCACHE_OFF
>> +     DECLARE_GLOBAL_DATA_PTR;
>> +     ulong base = uncached_base(gd);
>> +
>> +     if (!gd->arch.cpu_mmu)
>> +             return va;
>> +
>> +     if ((ulong)va >= base &&
>> +             (ulong)va < (base + gd->ram_size))
>> +             va = (void *)((ulong)va - base + CONFIG_SYS_SDRAM_BASE);
>> +# endif      /* !CONFIG_SYS_DCACHE_OFF */
>> +
>> +     return va;
>> +}
>> +
>> +static inline void *virt_to_uncached(void *va)
>> +{
>> +# ifndef CONFIG_SYS_DCACHE_OFF
>> +     DECLARE_GLOBAL_DATA_PTR;
>> +     ulong base = uncached_base(gd);
>> +
>> +     if (!gd->arch.cpu_mmu)
>> +             return va;
>> +
>> +#  ifdef CONFIG_USE_IRQ
>> +     if ((ulong)va < SZ_1M)
>> +             return (void *)(base + (ulong)va);
>> +#  endif
>> +
>> +     if ((ulong)va >= CONFIG_SYS_SDRAM_BASE &&
>> +             (ulong)va < (CONFIG_SYS_SDRAM_BASE + gd->ram_size))
>> +             va = (void *)(base + ((ulong)va - CONFIG_SYS_SDRAM_BASE));
>> +# endif      /* !CONFIG_SYS_DCACHE_OFF */
>> +
>> +     return va;
>> +}
>> +
>> +#endif       /* CONFIG_FARADAY */
>> +
>>  static inline phys_addr_t virt_to_phys(void * vaddr)
>>  {
>> -     return (phys_addr_t)(vaddr);
>> +#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
>> +
>> +     DECLARE_GLOBAL_DATA_PTR;
>> +     bd_t *bd = gd->bd;
>> +     ulong base = uncached_base(gd);
>> +     ulong phys = (ulong)vaddr;
>> +
>> +     if (!gd->arch.cpu_mmu)
>> +             return (phys_addr_t)phys;
>> +
>> +     if (phys >= base) {
>> +             ulong bank;
>> +             ulong off = phys - base;
>> +             for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; ++bank) {
>> +                     if (bd->bi_dram[bank].size > off)
>> +                             break;
>> +                     off -= bd->bi_dram[bank].size;
>> +             }
>> +             phys = bd->bi_dram[bank].start + off;
>> +     }
>> +# ifdef CONFIG_USE_IRQ
>> +     else if (phys < SZ_1M && bd->bi_dram[0].start != 0)
>> +             phys = bd->bi_dram[0].start + phys;
>> +# endif
>> +
>> +     return (phys_addr_t)phys;
>> +#else
>> +     return (phys_addr_t)vaddr;
>> +#endif
>>  }
>>
>>  /*
>> diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c
>> index 4abe1cf..eee8585 100644
>> --- a/arch/arm/lib/cache-cp15.c
>> +++ b/arch/arm/lib/cache-cp15.c
>> @@ -1,6 +1,8 @@
>>  /*
>>   * (C) Copyright 2002
>>   * Wolfgang Denk, DENX Software Engineering, wd@denx.de.
>> + * (C) Copyright 2010
>> + * Dante Su <dantesu@faraday-tech.com>
>>   *
>>   * See file CREDITS for list of people who contributed to this
>>   * project.
>> @@ -87,6 +89,10 @@ __weak void dram_bank_mmu_setup(int bank)
>>  {
>>       bd_t *bd = gd->bd;
>>       int     i;
>> +#ifdef CONFIG_FARADAY
>> +     ulong ubase, off;
>> +     u32 *page_table = (u32 *)gd->arch.tlb_addr;
>> +#endif
>>
>>       debug("%s: bank: %d\n", __func__, bank);
>>       for (i = bd->bi_dram[bank].start >> 20;
>> @@ -98,6 +104,32 @@ __weak void dram_bank_mmu_setup(int bank)
>>               set_section_dcache(i, DCACHE_WRITEBACK);
>>  #endif
>>       }
>> +#ifdef CONFIG_FARADAY
>> +# ifdef CONFIG_USE_IRQ
>> +     /* map the exception table to 0x00000000 if necessary */
>> +     if (bank == 0 && bd->bi_dram[bank].start != 0) {
>> +             u32 pa = bd->bi_dram[bank].start;
>> +#if defined(CONFIG_SYS_ARM_CACHE_WRITETHROUGH)
>> +             page_table[0] = pa | (3 << 10) | DCACHE_WRITETHROUGH;
>> +#else
>> +             page_table[0] = pa | (3 << 10) | DCACHE_WRITEBACK;
>> +#endif
>> +     }
>> +# endif
>> +     /* calculate address offset */
>> +     off  = 0;
>> +     for (i = 0; i < bank; ++i)
>> +             off += bd->bi_dram[bank].size;
>> +
>> +     /* create memory map */
>> +     ubase = (4096 - (gd->ram_size >> 20)) << 20;
>> +     for (i = 0; i < bd->bi_dram[bank].size >> 20; ++i) {
>> +             u32 pa = bd->bi_dram[bank].start + (i << 20);
>> +             /* create un-cached address map */
>> +             u32 va = ubase + off + (i << 20);
>> +             page_table[va >> 20] = pa | (3 << 10) | DCACHE_OFF;
>> +     }
>> +#endif
>>  }
>>
>>  /* to activate the MMU we need to set up virtual memory: use 1M areas */
>> @@ -126,6 +158,10 @@ static inline void mmu_setup(void)
>>
>>       /* and enable the mmu */
>>       reg = get_cr(); /* get control reg. */
>> +#ifdef CONFIG_FARADAY
>> +     reg |= CR_W;    /* enable write buffer */
>> +     reg |= CR_Z;    /* enable branch prediction */
>> +#endif
>>       cp_delay();
>>       set_cr(reg | CR_M);
>>  }
>> @@ -140,9 +176,15 @@ static void cache_enable(uint32_t cache_bit)
>>  {
>>       uint32_t reg;
>>
>> +#ifdef CONFIG_FARADAY
>> +     if (!gd->arch.cpu_mmu && (cache_bit == CR_C))
>> +             return;
>> +#endif
>> +
>>       /* The data cache is not active unless the mmu is enabled too */
>>       if ((cache_bit == CR_C) && !mmu_enabled())
>>               mmu_setup();
>> +
>>       reg = get_cr(); /* get control reg. */
>>       cp_delay();
>>       set_cr(reg | cache_bit);
>> diff --git a/common/cmd_boot.c b/common/cmd_boot.c
>> index d3836fd..b2477e8 100644
>> --- a/common/cmd_boot.c
>> +++ b/common/cmd_boot.c
>> @@ -50,6 +50,10 @@ static int do_go(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
>>
>>       printf ("## Starting application at 0x%08lX ...\n", addr);
>>
>> +#if defined(__ARM__) && !defined(CONFIG_SYS_DCACHE_OFF)
>> +     cleanup_before_linux();
>> +#endif
>> +
>>       /*
>>        * pass address parameter as argv[0] (aka command name),
>>        * and all remaining args
>> --
>> 1.7.9.5
>>
>
>
> Amicalement,
> --
> Albert.



--
Best wishes,
Kuo-Jung Su

Patch

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 5bbb0a0..53c4edf 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -3,6 +3,9 @@ 
  * Stelian Pop <stelian@popies.net>
  * Lead Tech Design <www.leadtechdesign.com>
  *
+ * (C) Copyright 2010
+ * Dante Su <dantesu@faraday-tech.com>
+ *
  * See file CREDITS for list of people who contributed to this
  * project.
  *
@@ -24,22 +27,69 @@ 
 #ifndef __ASM_ARM_DMA_MAPPING_H
 #define __ASM_ARM_DMA_MAPPING_H

+#include <asm/u-boot.h>
+#include <asm/global_data.h>
+#include <asm/io.h>
+#include <malloc.h>
+
 enum dma_data_direction {
 	DMA_BIDIRECTIONAL	= 0,
 	DMA_TO_DEVICE		= 1,
 	DMA_FROM_DEVICE		= 2,
 };

-static void *dma_alloc_coherent(size_t len, unsigned long *handle)
+static inline void *dma_alloc_coherent(size_t len, unsigned long *handle)
+{
+#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
+	DECLARE_GLOBAL_DATA_PTR;
+#endif
+	void *va = memalign(ARCH_DMA_MINALIGN, len);
+
+	if (va && handle)
+		*handle = virt_to_phys(va);
+
+#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
+	if (gd->arch.cpu_mmu) {
+		/* invalidate the buffer, convert to un-cached address */
+		if (va != NULL) {
+			invalidate_dcache_range((ulong)va, (ulong)va + len);
+			va = virt_to_uncached(va);
+		}
+	}
+#endif
+
+	return va;
+}
+
+static inline void dma_free_coherent(void *va)
 {
-	*handle = (unsigned long)malloc(len);
-	return (void *)*handle;
+	free(virt_to_cached(va));
 }

 static inline unsigned long dma_map_single(volatile void *vaddr, size_t len,
 					   enum dma_data_direction dir)
 {
+#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
+	DECLARE_GLOBAL_DATA_PTR;
+
+	if (gd->arch.cpu_mmu) {
+		switch (dir) {
+		case DMA_BIDIRECTIONAL:
+		case DMA_TO_DEVICE:
+			flush_dcache_range((ulong)vaddr,
+				(ulong)vaddr + len);
+			break;
+
+		case DMA_FROM_DEVICE:
+			invalidate_dcache_range((ulong)vaddr,
+				(ulong)vaddr + len);
+			break;
+		}
+	}
+	return virt_to_phys((void *)vaddr);
+#else
 	return (unsigned long)vaddr;
+#endif
 }

 static inline void dma_unmap_single(volatile void *vaddr, size_t len,
diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
index 37ac0da..bd18ff7 100644
--- a/arch/arm/include/asm/global_data.h
+++ b/arch/arm/include/asm/global_data.h
@@ -38,6 +38,10 @@  struct arch_global_data {
 	unsigned long	pllb_rate_hz;
 	unsigned long	at91_pllb_usb_init;
 #endif
+#ifdef CONFIG_FARADAY
+	unsigned long   cpu_id;
+	unsigned long   cpu_mmu;	/* has mmu */
+#endif
 	/* "static data" needed by most of timer.c on ARM platforms */
 	unsigned long timer_rate_hz;
 	unsigned long tbu;
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1fbc531..17d8898 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -2,6 +2,7 @@ 
  *  linux/include/asm-arm/io.h
  *
  *  Copyright (C) 1996-2000 Russell King
+ *  Copyright (C) 2009-2010 Dante Su <dantesu@faraday-tech.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -23,6 +24,8 @@ 
 #ifdef __KERNEL__

 #include <linux/types.h>
+#include <asm/u-boot.h>
+#include <asm/global_data.h>
 #include <asm/byteorder.h>
 #include <asm/memory.h>
 #if 0	/* XXX###XXX */
@@ -57,9 +60,88 @@  static inline void unmap_physmem(void *vaddr, unsigned long flags)

 }

+#ifdef CONFIG_FARADAY
+
+# ifndef CONFIG_SYS_DCACHE_OFF
+
+static inline ulong uncached_base(volatile gd_t *gd)
+{
+	return (4096 - (gd->ram_size >> 20)) << 20;
+}
+# endif
+
+static inline void *virt_to_cached(void *va)
+{
+# ifndef CONFIG_SYS_DCACHE_OFF
+	DECLARE_GLOBAL_DATA_PTR;
+	ulong base = uncached_base(gd);
+
+	if (!gd->arch.cpu_mmu)
+		return va;
+
+	if ((ulong)va >= base &&
+		(ulong)va < (base + gd->ram_size))
+		va = (void *)((ulong)va - base + CONFIG_SYS_SDRAM_BASE);
+# endif	/* !CONFIG_SYS_DCACHE_OFF */
+
+	return va;
+}
+
+static inline void *virt_to_uncached(void *va)
+{
+# ifndef CONFIG_SYS_DCACHE_OFF
+	DECLARE_GLOBAL_DATA_PTR;
+	ulong base = uncached_base(gd);
+
+	if (!gd->arch.cpu_mmu)
+		return va;
+
+#  ifdef CONFIG_USE_IRQ
+	if ((ulong)va < SZ_1M)
+		return (void *)(base + (ulong)va);
+#  endif
+
+	if ((ulong)va >= CONFIG_SYS_SDRAM_BASE &&
+		(ulong)va < (CONFIG_SYS_SDRAM_BASE + gd->ram_size))
+		va = (void *)(base + ((ulong)va - CONFIG_SYS_SDRAM_BASE));
+# endif	/* !CONFIG_SYS_DCACHE_OFF */
+
+	return va;
+}
+
+#endif	/* CONFIG_FARADAY */
+
 static inline phys_addr_t virt_to_phys(void * vaddr)
 {
-	return (phys_addr_t)(vaddr);
+#if defined(CONFIG_FARADAY) && !defined(CONFIG_SYS_DCACHE_OFF)
+
+	DECLARE_GLOBAL_DATA_PTR;
+	bd_t *bd = gd->bd;
+	ulong base = uncached_base(gd);
+	ulong phys = (ulong)vaddr;
+
+	if (!gd->arch.cpu_mmu)
+		return (phys_addr_t)phys;
+
+	if (phys >= base) {
+		ulong bank;
+		ulong off = phys - base;
+		for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; ++bank) {
+			if (bd->bi_dram[bank].size > off)
+				break;
+			off -= bd->bi_dram[bank].size;
+		}
+		phys = bd->bi_dram[bank].start + off;
+	}
+# ifdef CONFIG_USE_IRQ
+	else if (phys < SZ_1M && bd->bi_dram[0].start != 0)
+		phys = bd->bi_dram[0].start + phys;
+# endif
+
+	return (phys_addr_t)phys;
+#else
+	return (phys_addr_t)vaddr;
+#endif
 }

 /*
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c
index 4abe1cf..eee8585 100644
--- a/arch/arm/lib/cache-cp15.c
+++ b/arch/arm/lib/cache-cp15.c
@@ -1,6 +1,8 @@ 
 /*
  * (C) Copyright 2002
  * Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+ * (C) Copyright 2010
+ * Dante Su <dantesu@faraday-tech.com>
  *
  * See file CREDITS for list of people who contributed to this
  * project.
@@ -87,6 +89,10 @@  __weak void dram_bank_mmu_setup(int bank)
 {
 	bd_t *bd = gd->bd;
 	int	i;
+#ifdef CONFIG_FARADAY
+	ulong ubase, off;
+	u32 *page_table = (u32 *)gd->arch.tlb_addr;
+#endif

 	debug("%s: bank: %d\n", __func__, bank);
 	for (i = bd->bi_dram[bank].start >> 20;
@@ -98,6 +104,32 @@  __weak void dram_bank_mmu_setup(int bank)
 		set_section_dcache(i, DCACHE_WRITEBACK);
 #endif
 	}
+#ifdef CONFIG_FARADAY
+# ifdef CONFIG_USE_IRQ
+	/* map the exception table to 0x00000000 if necessary */
+	if (bank == 0 && bd->bi_dram[bank].start != 0) {
+		u32 pa = bd->bi_dram[bank].start;
+#if defined(CONFIG_SYS_ARM_CACHE_WRITETHROUGH)
+		page_table[0] = pa | (3 << 10) | DCACHE_WRITETHROUGH;
+#else
+		page_table[0] = pa | (3 << 10) | DCACHE_WRITEBACK;
+#endif
+	}
+# endif
+	/* calculate address offset */
+	off  = 0;
+	for (i = 0; i < bank; ++i)
+		off += bd->bi_dram[bank].size;
+
+	/* create memory map */
+	ubase = (4096 - (gd->ram_size >> 20)) << 20;
+	for (i = 0; i < bd->bi_dram[bank].size >> 20; ++i) {
+		u32 pa = bd->bi_dram[bank].start + (i << 20);
+		/* create un-cached address map */
+		u32 va = ubase + off + (i << 20);
+		page_table[va >> 20] = pa | (3 << 10) | DCACHE_OFF;
+	}
+#endif
 }

 /* to activate the MMU we need to set up virtual memory: use 1M areas */
@@ -126,6 +158,10 @@  static inline void mmu_setup(void)

 	/* and enable the mmu */
 	reg = get_cr();	/* get control reg. */
+#ifdef CONFIG_FARADAY
+	reg |= CR_W;	/* enable write buffer */
+	reg |= CR_Z;	/* enable branch prediction */
+#endif
 	cp_delay();
 	set_cr(reg | CR_M);
 }
@@ -140,9 +176,15 @@  static void cache_enable(uint32_t cache_bit)
 {
 	uint32_t reg;

+#ifdef CONFIG_FARADAY
+	if (!gd->arch.cpu_mmu && (cache_bit == CR_C))
+		return;
+#endif
+
 	/* The data cache is not active unless the mmu is enabled too */
 	if ((cache_bit == CR_C) && !mmu_enabled())
 		mmu_setup();
+
 	reg = get_cr();	/* get control reg. */
 	cp_delay();
 	set_cr(reg | cache_bit);
diff --git a/common/cmd_boot.c b/common/cmd_boot.c
index d3836fd..b2477e8 100644
--- a/common/cmd_boot.c
+++ b/common/cmd_boot.c
@@ -50,6 +50,10 @@  static int do_go(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])

 	printf ("## Starting application at 0x%08lX ...\n", addr);

+#if defined(__ARM__) && !defined(CONFIG_SYS_DCACHE_OFF)
+	cleanup_before_linux();
+#endif
+
 	/*
 	 * pass address parameter as argv[0] (aka command name),
 	 * and all remaining args