Patchwork [U-Boot] arm: Tegra2: add support for A9 CPU init

login
register
mail settings
Submitter Tom Warren
Date Feb. 16, 2011, 8:26 p.m.
Message ID <1297887964-25207-2-git-send-email-twarren@nvidia.com>
Download mbox | patch
Permalink /patch/83397/
State Superseded
Delegated to: Albert ARIBAUD
Headers show

Comments

Tom Warren - Feb. 16, 2011, 8:26 p.m.
Signed-off-by: Tom Warren <twarren@nvidia.com>
---
 arch/arm/cpu/armv7/start.S                 |    6 +
 arch/arm/cpu/armv7/tegra2/Makefile         |    2 +-
 arch/arm/cpu/armv7/tegra2/ap20.c           |  490 ++++++++++++++++++++++++++++
 arch/arm/cpu/armv7/tegra2/ap20.h           |  105 ++++++
 arch/arm/include/asm/arch-tegra2/clk_rst.h |   27 ++
 arch/arm/include/asm/arch-tegra2/pmc.h     |    8 +
 arch/arm/include/asm/arch-tegra2/scu.h     |   43 +++
 arch/arm/include/asm/arch-tegra2/tegra2.h  |    5 +
 board/nvidia/common/board.c                |   10 +
 board/nvidia/common/board.h                |   29 ++
 include/configs/harmony.h                  |    1 +
 include/configs/seaboard.h                 |    1 +
 include/configs/tegra2-common.h            |    2 +
 13 files changed, 728 insertions(+), 1 deletions(-)
 create mode 100644 arch/arm/cpu/armv7/tegra2/ap20.c
 create mode 100644 arch/arm/cpu/armv7/tegra2/ap20.h
 create mode 100644 arch/arm/include/asm/arch-tegra2/scu.h
 create mode 100644 board/nvidia/common/board.h
Tom Warren - Feb. 22, 2011, 11:41 p.m.
Anyone willing to review this? I'd like to get it pulled in to
Albert's arm repo ASAP.

Thanks,

Tom

On Wed, Feb 16, 2011 at 1:26 PM, Tom Warren <twarren.nvidia@gmail.com> wrote:
> Signed-off-by: Tom Warren <twarren@nvidia.com>
> ---
>  arch/arm/cpu/armv7/start.S                 |    6 +
>  arch/arm/cpu/armv7/tegra2/Makefile         |    2 +-
>  arch/arm/cpu/armv7/tegra2/ap20.c           |  490 ++++++++++++++++++++++++++++
>  arch/arm/cpu/armv7/tegra2/ap20.h           |  105 ++++++
>  arch/arm/include/asm/arch-tegra2/clk_rst.h |   27 ++
>  arch/arm/include/asm/arch-tegra2/pmc.h     |    8 +
>  arch/arm/include/asm/arch-tegra2/scu.h     |   43 +++
>  arch/arm/include/asm/arch-tegra2/tegra2.h  |    5 +
>  board/nvidia/common/board.c                |   10 +
>  board/nvidia/common/board.h                |   29 ++
>  include/configs/harmony.h                  |    1 +
>  include/configs/seaboard.h                 |    1 +
>  include/configs/tegra2-common.h            |    2 +
>  13 files changed, 728 insertions(+), 1 deletions(-)
>  create mode 100644 arch/arm/cpu/armv7/tegra2/ap20.c
>  create mode 100644 arch/arm/cpu/armv7/tegra2/ap20.h
>  create mode 100644 arch/arm/include/asm/arch-tegra2/scu.h
>  create mode 100644 board/nvidia/common/board.h
>
> diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
> index 684f2d2..50a1725 100644
> --- a/arch/arm/cpu/armv7/start.S
> +++ b/arch/arm/cpu/armv7/start.S
> @@ -70,6 +70,12 @@ _end_vect:
>  _TEXT_BASE:
>        .word   CONFIG_SYS_TEXT_BASE
>
> +#ifdef CONFIG_TEGRA2
> +.globl _armboot_start
> +_armboot_start:
> +        .word _start
> +#endif
> +
>  /*
>  * These are defined in the board-specific linker script.
>  */
> diff --git a/arch/arm/cpu/armv7/tegra2/Makefile b/arch/arm/cpu/armv7/tegra2/Makefile
> index 687c887..f1ea915 100644
> --- a/arch/arm/cpu/armv7/tegra2/Makefile
> +++ b/arch/arm/cpu/armv7/tegra2/Makefile
> @@ -28,7 +28,7 @@ include $(TOPDIR)/config.mk
>  LIB    =  $(obj)lib$(SOC).o
>
>  SOBJS  := lowlevel_init.o
> -COBJS  := board.o sys_info.o timer.o
> +COBJS  := ap20.o board.o sys_info.o timer.o
>
>  SRCS   := $(SOBJS:.o=.S) $(COBJS:.o=.c)
>  OBJS   := $(addprefix $(obj),$(COBJS) $(SOBJS))
> diff --git a/arch/arm/cpu/armv7/tegra2/ap20.c b/arch/arm/cpu/armv7/tegra2/ap20.c
> new file mode 100644
> index 0000000..89d0d5e
> --- /dev/null
> +++ b/arch/arm/cpu/armv7/tegra2/ap20.c
> @@ -0,0 +1,490 @@
> +/*
> +* (C) Copyright 2010-2011
> +* NVIDIA Corporation <www.nvidia.com>
> +*
> +* See file CREDITS for list of people who contributed to this
> +* project.
> +*
> +* This program is free software; you can redistribute it and/or
> +* modify it under the terms of the GNU General Public License as
> +* published by the Free Software Foundation; either version 2 of
> +* the License, or (at your option) any later version.
> +*
> +* This program is distributed in the hope that it will be useful,
> +* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +* GNU General Public License for more details.
> +*
> +* You should have received a copy of the GNU General Public License
> +* along with this program; if not, write to the Free Software
> +* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> +* MA 02111-1307 USA
> +*/
> +
> +#include "ap20.h"
> +#include <asm/io.h>
> +#include <asm/arch/tegra2.h>
> +#include <asm/arch/clk_rst.h>
> +#include <asm/arch/pmc.h>
> +#include <asm/arch/pinmux.h>
> +#include <asm/arch/scu.h>
> +#include <common.h>
> +
> +static void init_pll_x(void)
> +{
> +       struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> +       u32     reg;
> +
> +       /* Is PLL-X already running? */
> +       reg = readl(&clkrst->crc_pllx_base);
> +       if (reg & PLL_ENABLE)
> +               return;
> +
> +       /* Do PLLX init if it isn't running, but BootROM sets it, so TBD */
> +}
> +
> +static void set_cpu_reset_vector(u32 vector)
> +{
> +       writel(vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
> +}
> +
> +static void enable_cpu_clock(int enable)
> +{
> +       struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> +       u32   reg, clk;
> +
> +       /*
> +        * NOTE:
> +        * Regardless of whether the request is to enable or disable the CPU
> +        * clock, every processor in the CPU complex except the master (CPU 0)
> +        * will have it's clock stopped because the AVP only talks to the
> +        * master. The AVP does not know (nor does it need to know) that there
> +        * are multiple processors in the CPU complex.
> +        */
> +
> +       /* Need to initialize PLLX? */
> +       if (enable) {
> +               /* Initialize PLL */
> +               init_pll_x();
> +
> +               /* Wait until stable */
> +               udelay(NVBOOT_CLOCKS_PLL_STABILIZATION_DELAY);
> +
> +               reg = CCLK_BURST_POLICY;
> +               writel(reg, &clkrst->crc_cclk_brst_pol);
> +
> +               reg = SUPER_CCLK_DIVIDER;
> +               writel(reg, &clkrst->crc_super_cclk_div);
> +       }
> +
> +       /* Fetch the register containing the main CPU complex clock enable */
> +       reg = readl(&clkrst->crc_clk_out_enb_l);
> +
> +       /*
> +        * Read the register containing the individual CPU clock enables and
> +        * always stop the clock to CPU 1.
> +        */
> +       clk = readl(&clkrst->crc_clk_cpu_cmplx);
> +       clk |= CPU1_CLK_STP;
> +
> +       if (enable) {
> +               /* Enable the CPU clock */
> +               reg = readl(&clkrst->crc_clk_out_enb_l);
> +               reg |= CLK_ENB_CPU;
> +               clk = readl(&clkrst->crc_clk_cpu_cmplx);
> +               clk &= ~CPU0_CLK_STP;
> +       } else {
> +               /* Disable the CPU clock */
> +               reg = readl(&clkrst->crc_clk_out_enb_l);
> +               reg |= CLK_ENB_CPU;
> +               clk = readl(&clkrst->crc_clk_cpu_cmplx);
> +               clk |= CPU0_CLK_STP;
> +       }
> +
> +       writel(clk, &clkrst->crc_clk_cpu_cmplx);
> +       writel(reg, &clkrst->crc_clk_out_enb_l);
> +}
> +
> +static int is_cpu_powered(void)
> +{
> +       struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +       u32   reg;
> +
> +       reg = readl(&pmc->pmc_pwrgate_status);
> +
> +       if (reg & CPU_PWRED)
> +               return TRUE;
> +
> +       return FALSE;
> +}
> +
> +static void remove_cpu_io_clamps(void)
> +{
> +       struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +       u32   reg;
> +
> +       /* Remove the clamps on the CPU I/O signals */
> +       reg = readl(&pmc->pmc_remove_clamping);
> +       reg |= CPU_CLMP;
> +       writel(reg, &pmc->pmc_remove_clamping);
> +
> +       /* Give I/O signals time to stabilize */
> +       udelay(1000);
> +}
> +
> +static void powerup_cpu(void)
> +{
> +       struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +       u32   reg;
> +
> +       if (!is_cpu_powered()) {
> +               /* Toggle the CPU power state (OFF -> ON) */
> +               reg = readl(&pmc->pmc_pwrgate_toggle);
> +               reg &= (PARTID_CP);
> +               reg |= START_CP;
> +               writel(reg, &pmc->pmc_pwrgate_toggle);
> +
> +               /* Wait for the power to come up */
> +               while (!is_cpu_powered())
> +                       ;                       /* Do nothing */
> +
> +               /*
> +                * Remove the I/O clamps from CPU power partition.
> +                * Recommended only on a Warm boot, if the CPU partition gets
> +                * power gated. Shouldn't cause any harm when called after a
> +                * cold boot according to HW, probably just redundant.
> +                */
> +               remove_cpu_io_clamps();
> +       }
> +}
> +
> +static void enable_cpu_power_rail(void)
> +{
> +       struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +       u32   reg;
> +
> +       reg = readl(&pmc->pmc_cntrl);
> +       reg |= CPUPWRREQ_OE;
> +       writel(reg, &pmc->pmc_cntrl);
> +
> +       /* Delay to allow for stable power (CPU_PWR_REQ -> VDD_CPU from PMU) */
> +       udelay(3750);
> +}
> +
> +static void reset_A9_cpu(int reset)
> +{
> +       struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> +       u32   reg, cpu;
> +
> +       /*
> +       * NOTE:  Regardless of whether the request is to hold the CPU in reset
> +       *        or take it out of reset, every processor in the CPU complex
> +       *        except the master (CPU 0) will be held in reset because the
> +       *        AVP only talks to the master. The AVP does not know that there
> +       *        are multiple processors in the CPU complex.
> +       */
> +
> +       /* Hold CPU 1 in reset */
> +       cpu = SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1;
> +       writel(cpu, &clkrst->crc_cpu_cmplx_set);
> +
> +       reg = readl(&clkrst->crc_rst_dev_l);
> +       if (reset) {
> +               /* Place CPU0 into reset */
> +               cpu = SET_DBGRESET0 | SET_DERESET0 | SET_CPURESET0;
> +               writel(cpu, &clkrst->crc_cpu_cmplx_set);
> +
> +               /* Enable master CPU reset */
> +               reg |= SWR_CPU_RST;
> +
> +       } else {
> +               /* Take CPU0 out of reset */
> +               cpu = CLR_DBGRESET0 | CLR_DERESET0 | CLR_CPURESET0;
> +               writel(cpu, &clkrst->crc_cpu_cmplx_clr);
> +
> +               /* Disable master CPU reset */
> +               reg &= ~SWR_CPU_RST;
> +       }
> +
> +       writel(reg, &clkrst->crc_rst_dev_l);
> +}
> +
> +static void clock_enable_coresight(int enable)
> +{
> +       struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> +       u32   rst, clk, src;
> +
> +       rst = readl(&clkrst->crc_rst_dev_u);
> +       clk = readl(&clkrst->crc_clk_out_enb_u);
> +
> +       if (enable) {
> +               rst &= ~SWR_CSITE_RST;
> +               clk |= CLK_ENB_CSITE;
> +       } else {
> +               rst |= SWR_CSITE_RST;
> +               clk &= ~CLK_ENB_CSITE;
> +       }
> +
> +       writel(clk, &clkrst->crc_clk_out_enb_u);
> +       writel(rst, &clkrst->crc_rst_dev_u);
> +
> +       if (enable) {
> +               /*
> +                * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
> +                *  1.5, giving an effective frequency of 144MHz.
> +                * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
> +                *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
> +                */
> +               src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
> +               writel(src, &clkrst->crc_clk_src_csite);
> +
> +               /* Unlock the CPU CoreSight interfaces */
> +               rst = 0xC5ACCE55;
> +               writel(rst, CSITE_CPU_DBG0_LAR);
> +               writel(rst, CSITE_CPU_DBG1_LAR);
> +       }
> +}
> +
> +void start_cpu(u32 reset_vector)
> +{
> +       /* Enable VDD_CPU */
> +       enable_cpu_power_rail();
> +
> +       /* Hold the CPUs in reset */
> +       reset_A9_cpu(TRUE);
> +
> +       /* Disable the CPU clock */
> +       enable_cpu_clock(FALSE);
> +
> +       /* Enable CoreSight */
> +       clock_enable_coresight(TRUE);
> +
> +       /*
> +       * Set the entry point for CPU execution from reset,
> +       *  if it's a non-zero value.
> +       */
> +       if (reset_vector)
> +               set_cpu_reset_vector(reset_vector);
> +
> +       /* Enable the CPU clock */
> +       enable_cpu_clock(TRUE);
> +
> +       /* If the CPU doesn't already have power, power it up */
> +       if (!is_cpu_powered())
> +               powerup_cpu();
> +
> +       /* Take the CPU out of reset */
> +       reset_A9_cpu(FALSE);
> +}
> +
> +
> +void halt_avp(void)
> +{
> +       u32   reg;
> +
> +       for (;;) {
> +               reg = HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
> +                       | HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29);
> +               writel(reg, FLOW_CTLR_HALT_COP_EVENTS);
> +       }
> +       /* Should never get here */
> +       for (;;)
> +               ;
> +}
> +
> +void startup_cpu(void)
> +{
> +       /* Initialize the AVP, clocks, and memory controller */
> +       /* SDRAM is guaranteed to be on at this point */
> +
> +       asm volatile(
> +
> +       /* Start the CPU */
> +       /* R0 = reset vector for CPU */
> +       "ldr     r0, =cold_boot      \n"
> +       "bl      start_cpu           \n"
> +
> +       /* Transfer control to the AVP code */
> +       "bl      halt_avp            \n"
> +
> +       /* Should never get here */
> +       "b       .                   \n"
> +       );
> +}
> +
> +extern ulong _armboot_start;
> +u32 cpu_boot_stack = CPU_EARLY_BOOT_STACK_LIMIT;
> +u32 avp_boot_stack = AVP_EARLY_BOOT_STACK_LIMIT;
> +u32 proc_tag = PG_UP_TAG_0_PID_CPU & 0xFF;
> +u32 deadbeef = 0xdeadbeef;
> +
> +void cold_boot(void)
> +{
> +       asm volatile(
> +
> +       "msr     cpsr_c, #0xd3            \n"
> +       /*
> +       * Check current processor: CPU or AVP?
> +       * If AVP, go to AVP boot code, else continue on.
> +       */
> +       "mov     r0, %0                   \n"
> +       "ldrb    r2, [r0, %1]             \n"
> +       /* are we the CPU? */
> +       "cmp     r2, %2                   \n"
> +       "mov     sp, %3                   \n"
> +       /* leave in some symbols for release debugging */
> +       "mov     r3, %6                   \n"
> +       "str     r3, [sp, #-4]!           \n"
> +       "str     r3, [sp, #-4]!           \n"
> +       /*  yep, we are the CPU */
> +       "bxeq     %4                      \n"
> +       /* AVP Initialization follows this path */
> +       "mov     sp, %5                   \n"
> +       /* leave in some symbols for release debugging */
> +       "mov     r3, %6                   \n"
> +       "str     r3, [sp, #-4]!           \n"
> +       "str     r3, [sp, #-4]!           \n"
> +
> +       /* Init and Start CPU */
> +       "b       startup_cpu              \n"
> +       :
> +       : "i"(NV_PA_PG_UP_BASE),
> +       "i"(PG_UP_TAG_0),
> +       "r"(proc_tag),
> +       "r"(cpu_boot_stack),
> +       "r"(_armboot_start),
> +       "r"(avp_boot_stack),
> +       "r"(deadbeef)
> +       : "r0", "r2", "r3", "cc", "lr"
> +       );
> +}
> +
> +void enable_scu(void)
> +{
> +       struct scu_ctlr *scu = (struct scu_ctlr *)NV_PA_ARM_PERIPHBASE;
> +       u32 reg;
> +
> +       reg = readl(&scu->scu_ctrl);
> +       if (reg & SCU_ENABLE) {
> +               /* SCU already enabled, return */
> +               return;
> +       }
> +
> +       /* Invalidate all ways for all processors */
> +       writel(0xFFFF, &scu->scu_inv_all);
> +
> +       /* Enable SCU - bit 0 */
> +       reg = readl(&scu->scu_ctrl);
> +       reg |= SCU_ENABLE;
> +       writel(reg, &scu->scu_ctrl);
> +
> +       return;
> +}
> +
> +void cache_configure(void)
> +{
> +       asm volatile(
> +
> +       "stmdb r13!,{r14}                 \n"
> +       /* invalidate instruction cache */
> +       "mov r1, #0                       \n"
> +       "mcr p15, 0, r1, c7, c5, 0        \n"
> +
> +       /* invalidate the i&d tlb entries */
> +       "mcr p15, 0, r1, c8, c5, 0        \n"
> +       "mcr p15, 0, r1, c8, c6, 0        \n"
> +
> +       /* enable instruction cache */
> +       "mrc  p15, 0, r1, c1, c0, 0       \n"
> +       "orr  r1, r1, #(1<<12)            \n"
> +       "mcr  p15, 0, r1, c1, c0, 0       \n"
> +
> +       "bl enable_scu                    \n"
> +
> +       /* enable SMP mode and FW for CPU0, by writing to Auxiliary Ctl reg */
> +       "mrc p15, 0, r0, c1, c0, 1        \n"
> +       "orr r0, r0, #0x41                \n"
> +       "mcr p15, 0, r0, c1, c0, 1        \n"
> +
> +       /* Now flush the Dcache */
> +       "mov r0, #0                       \n"
> +       /* 256 cache lines */
> +       "mov r1, #256                     \n"
> +
> +       "invalidate_loop:                 \n"
> +
> +       "add r1, r1, #-1                  \n"
> +       "mov r0, r1, lsl #5               \n"
> +       /* invalidate d-cache using line (way0) */
> +       "mcr p15, 0, r0, c7, c6, 2        \n"
> +
> +       "orr r2, r0, #(1<<30)             \n"
> +       /* invalidate d-cache using line (way1) */
> +       "mcr p15, 0, r2, c7, c6, 2        \n"
> +
> +       "orr r2, r0, #(2<<30)             \n"
> +       /* invalidate d-cache using line (way2) */
> +       "mcr p15, 0, r2, c7, c6, 2        \n"
> +
> +       "orr r2, r0, #(3<<30)             \n"
> +       /* invalidate d-cache using line (way3) */
> +       "mcr p15, 0, r2, c7, c6, 2        \n"
> +       "cmp r1, #0                       \n"
> +       "bne invalidate_loop              \n"
> +
> +       /* FIXME: should have ap20's L2 disabled too */
> +       "invalidate_done:                 \n"
> +       "ldmia r13!,{pc}                  \n"
> +       ".ltorg                           \n"
> +       );
> +}
> +
> +void init_pmc_scratch(void)
> +{
> +       struct pmc_ctlr *const pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +
> +       /* SCRATCH0 is initialized by the boot ROM and shouldn't be cleared */
> +       memset(&pmc->pmc_scratch1, 0, 23*4);
> +       writel(CONFIG_SYS_BOARD_ODMDATA, &pmc->pmc_scratch20);
> +}
> +
> +u32 s_first_boot = 1;
> +
> +void cpu_start(void)
> +{
> +       struct pmux_tri_ctlr *pmt = (struct pmux_tri_ctlr *)NV_PA_APB_MISC_BASE;
> +
> +       /* enable JTAG */
> +       writel(0xC0, &pmt->pmt_cfg_ctl);
> +
> +       if (s_first_boot) {
> +               /*
> +                * Need to set this before cold-booting,
> +                *  otherwise we'll end up in an infinite loop.
> +                */
> +               s_first_boot = 0;
> +               cold_boot();
> +       }
> +
> +       return;
> +}
> +
> +void tegra2_start()
> +{
> +       if (s_first_boot) {
> +               /* Init Debug UART Port (115200 8n1) */
> +               uart_init();
> +
> +               /* Init PMC scratch memory */
> +               init_pmc_scratch();
> +       }
> +
> +#ifdef CONFIG_ENABLE_CORTEXA9
> +       /* take the mpcore out of reset */
> +       cpu_start();
> +
> +       /* configure cache */
> +       cache_configure();
> +#endif
> +}
> +
> diff --git a/arch/arm/cpu/armv7/tegra2/ap20.h b/arch/arm/cpu/armv7/tegra2/ap20.h
> new file mode 100644
> index 0000000..de7622d
> --- /dev/null
> +++ b/arch/arm/cpu/armv7/tegra2/ap20.h
> @@ -0,0 +1,105 @@
> +/*
> + * (C) Copyright 2010-2011
> + * NVIDIA Corporation <www.nvidia.com>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +#include <asm/types.h>
> +
> +#ifndef        FALSE
> +#define FALSE  0
> +#endif
> +#ifndef TRUE
> +#define TRUE   1
> +#endif
> +
> +#define NVBL_PLLP_KHZ  (432000/2)
> +/* The stabilization delay in usec */
> +#define NVBOOT_CLOCKS_PLL_STABILIZATION_DELAY (300)
> +
> +#define PLLX_ENABLED           (1 << 30)
> +#define CCLK_BURST_POLICY      0x20008888
> +#define SUPER_CCLK_DIVIDER     0x80000000
> +
> +/* Calculate clock fractional divider value from ref and target frequencies */
> +#define CLK_DIVIDER(REF, FREQ)  ((((REF) * 2) / FREQ) - 2)
> +
> +/* Calculate clock frequency value from reference and clock divider value */
> +#define CLK_FREQUENCY(REF, REG)  (((REF) * 2) / (REG + 2))
> +
> +/* AVP/CPU ID */
> +#define PG_UP_TAG_0_PID_CPU    0x55555555      /* CPU aka "a9" aka "mpcore" */
> +#define PG_UP_TAG_0             0x0
> +
> +/* AP20-Specific Base Addresses */
> +
> +/* AP20 Base physical address of SDRAM. */
> +#define AP20_BASE_PA_SDRAM      0x00000000
> +/* AP20 Base physical address of internal SRAM. */
> +#define AP20_BASE_PA_SRAM       0x40000000
> +/* AP20 Size of internal SRAM (256KB). */
> +#define AP20_BASE_PA_SRAM_SIZE  0x00040000
> +/* AP20 Base physical address of flash. */
> +#define AP20_BASE_PA_NOR_FLASH  0xD0000000
> +/* AP20 Base physical address of boot information table. */
> +#define AP20_BASE_PA_BOOT_INFO  AP20_BASE_PA_SRAM
> +
> +/*
> + * Super-temporary stacks for EXTREMELY early startup. The values chosen for
> + * these addresses must be valid on ALL SOCs because this value is used before
> + * we are able to differentiate between the SOC types.
> + *
> + * NOTE: The since CPU's stack will eventually be moved from IRAM to SDRAM, its
> + *       stack is placed below the AVP stack. Once the CPU stack has been moved,
> + *       the AVP is free to use the IRAM the CPU stack previously occupied if
> + *       it should need to do so.
> + *
> + * NOTE: In multi-processor CPU complex configurations, each processor will have
> + *       its own stack of size CPU_EARLY_BOOT_STACK_SIZE. CPU 0 will have a
> + *       limit of CPU_EARLY_BOOT_STACK_LIMIT. Each successive CPU will have a
> + *       stack limit that is CPU_EARLY_BOOT_STACK_SIZE less then the previous
> + *       CPU.
> + */
> +
> +/* Common AVP early boot stack limit */
> +#define AVP_EARLY_BOOT_STACK_LIMIT     \
> +       (AP20_BASE_PA_SRAM + (AP20_BASE_PA_SRAM_SIZE/2))
> +/* Common AVP early boot stack size */
> +#define AVP_EARLY_BOOT_STACK_SIZE      0x1000
> +/* Common CPU early boot stack limit */
> +#define CPU_EARLY_BOOT_STACK_LIMIT     \
> +       (AVP_EARLY_BOOT_STACK_LIMIT - AVP_EARLY_BOOT_STACK_SIZE)
> +/* Common CPU early boot stack size */
> +#define CPU_EARLY_BOOT_STACK_SIZE      0x1000
> +
> +#define EXCEP_VECTOR_CPU_RESET_VECTOR  (NV_PA_EVP_BASE + 0x100)
> +#define CSITE_CPU_DBG0_LAR             (NV_PA_CSITE_BASE + 0x10FB0)
> +#define CSITE_CPU_DBG1_LAR             (NV_PA_CSITE_BASE + 0x12FB0)
> +
> +#define FLOW_CTLR_HALT_COP_EVENTS      (NV_PA_FLOW_BASE + 4)
> +#define FLOW_MODE_STOP                 2
> +#define HALT_COP_EVENT_JTAG            (1 << 28)
> +#define HALT_COP_EVENT_IRQ_1           (1 << 11)
> +#define HALT_COP_EVENT_FIQ_1           (1 << 9)
> +
> +/* Prototypes */
> +
> +void tegra2_start(void);
> +void uart_init(void);
> +void udelay(unsigned long);
> diff --git a/arch/arm/include/asm/arch-tegra2/clk_rst.h b/arch/arm/include/asm/arch-tegra2/clk_rst.h
> index 6d573bf..d67a5d7 100644
> --- a/arch/arm/include/asm/arch-tegra2/clk_rst.h
> +++ b/arch/arm/include/asm/arch-tegra2/clk_rst.h
> @@ -149,6 +149,9 @@ struct clk_rst_ctlr {
>        uint crc_clk_src_csite;         /*_CSITE_0,             0x1D4 */
>        uint crc_reserved19[9];         /*                      0x1D8-1F8 */
>        uint crc_clk_src_osc;           /*_OSC_0,               0x1FC */
> +       uint crc_reserved20[80];        /*                      0x200-33C */
> +       uint crc_cpu_cmplx_set;         /* _CPU_CMPLX_SET_0,    0x340 */
> +       uint crc_cpu_cmplx_clr;         /* _CPU_CMPLX_CLR_0,    0x344 */
>  };
>
>  #define PLL_BYPASS             (1 << 31)
> @@ -162,4 +165,28 @@ struct clk_rst_ctlr {
>  #define SWR_UARTA_RST          (1 << 6)
>  #define CLK_ENB_UARTA          (1 << 6)
>
> +#define SWR_CPU_RST            (1 << 0)
> +#define CLK_ENB_CPU            (1 << 0)
> +#define SWR_CSITE_RST          (1 << 9)
> +#define CLK_ENB_CSITE          (1 << 9)
> +
> +#define SET_CPURESET0          (1 << 0)
> +#define SET_DERESET0           (1 << 4)
> +#define SET_DBGRESET0          (1 << 12)
> +
> +#define SET_CPURESET1          (1 << 1)
> +#define SET_DERESET1           (1 << 5)
> +#define SET_DBGRESET1          (1 << 13)
> +
> +#define CLR_CPURESET0          (1 << 0)
> +#define CLR_DERESET0           (1 << 4)
> +#define CLR_DBGRESET0          (1 << 12)
> +
> +#define CLR_CPURESET1          (1 << 1)
> +#define CLR_DERESET1           (1 << 5)
> +#define CLR_DBGRESET1          (1 << 13)
> +
> +#define CPU0_CLK_STP           (1 << 8)
> +#define CPU1_CLK_STP           (1 << 9)
> +
>  #endif /* CLK_RST_H */
> diff --git a/arch/arm/include/asm/arch-tegra2/pmc.h b/arch/arm/include/asm/arch-tegra2/pmc.h
> index 7ec9eeb..b1d47cd 100644
> --- a/arch/arm/include/asm/arch-tegra2/pmc.h
> +++ b/arch/arm/include/asm/arch-tegra2/pmc.h
> @@ -121,4 +121,12 @@ struct pmc_ctlr {
>        uint pmc_gate;                  /* _GATE_0, offset 15C */
>  };
>
> +#define CPU_PWRED      1
> +#define CPU_CLMP       1
> +
> +#define PARTID_CP      0xFFFFFFF8
> +#define START_CP       (1 << 8)
> +
> +#define CPUPWRREQ_OE   (1 << 16)
> +
>  #endif /* PMC_H */
> diff --git a/arch/arm/include/asm/arch-tegra2/scu.h b/arch/arm/include/asm/arch-tegra2/scu.h
> new file mode 100644
> index 0000000..d2faa6f
> --- /dev/null
> +++ b/arch/arm/include/asm/arch-tegra2/scu.h
> @@ -0,0 +1,43 @@
> +/*
> + *  (C) Copyright 2010,2011
> + *  NVIDIA Corporation <www.nvidia.com>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#ifndef _SCU_H_
> +#define _SCU_H_
> +
> +/* ARM Snoop Control Unit (SCU) registers */
> +struct scu_ctlr {
> +       uint scu_ctrl;          /* SCU Control Register, offset 00 */
> +       uint scu_cfg;           /* SCU Config Register, offset 04 */
> +       uint scu_cpu_pwr_stat;  /* SCU CPU Power Status Register, offset 08 */
> +       uint scu_inv_all;       /* SCU Invalidate All Register, offset 0C */
> +       uint scu_reserved0[12]; /* reserved, offset 10-3C */
> +       uint scu_filt_start;    /* SCU Filtering Start Address Reg, offset 40 */
> +       uint scu_filt_end;      /* SCU Filtering End Address Reg, offset 44 */
> +       uint scu_reserved1[2];  /* reserved, offset 48-4C */
> +       uint scu_acc_ctl;       /* SCU Access Control Register, offset 50 */
> +       uint scu_ns_acc_ctl;    /* SCU Non-secure Access Cntrl Reg, offset 54 */
> +};
> +
> +#define SCU_ENABLE     1
> +
> +#endif /* SCU_H */
> diff --git a/arch/arm/include/asm/arch-tegra2/tegra2.h b/arch/arm/include/asm/arch-tegra2/tegra2.h
> index 9001b68..5813cd9 100644
> --- a/arch/arm/include/asm/arch-tegra2/tegra2.h
> +++ b/arch/arm/include/asm/arch-tegra2/tegra2.h
> @@ -25,8 +25,12 @@
>  #define _TEGRA2_H_
>
>  #define NV_PA_SDRAM_BASE       0x00000000
> +#define NV_PA_ARM_PERIPHBASE   0x50040000
> +#define NV_PA_PG_UP_BASE       0x60000000
>  #define NV_PA_TMRUS_BASE       0x60005010
>  #define NV_PA_CLK_RST_BASE     0x60006000
> +#define NV_PA_FLOW_BASE                0x60007000
> +#define NV_PA_EVP_BASE         0x6000F000
>  #define NV_PA_APB_MISC_BASE    0x70000000
>  #define NV_PA_APB_UARTA_BASE   (NV_PA_APB_MISC_BASE + 0x6000)
>  #define NV_PA_APB_UARTB_BASE   (NV_PA_APB_MISC_BASE + 0x6040)
> @@ -34,6 +38,7 @@
>  #define NV_PA_APB_UARTD_BASE   (NV_PA_APB_MISC_BASE + 0x6300)
>  #define NV_PA_APB_UARTE_BASE   (NV_PA_APB_MISC_BASE + 0x6400)
>  #define NV_PA_PMC_BASE         0x7000E400
> +#define NV_PA_CSITE_BASE       0x70040000
>
>  #define TEGRA2_SDRC_CS0                NV_PA_SDRAM_BASE
>  #define LOW_LEVEL_SRAM_STACK   0x4000FFFC
> diff --git a/board/nvidia/common/board.c b/board/nvidia/common/board.c
> index b2c412c..078547b 100644
> --- a/board/nvidia/common/board.c
> +++ b/board/nvidia/common/board.c
> @@ -30,6 +30,7 @@
>  #include <asm/arch/clk_rst.h>
>  #include <asm/arch/pinmux.h>
>  #include <asm/arch/uart.h>
> +#include "board.h"
>
>  DECLARE_GLOBAL_DATA_PTR;
>
> @@ -37,6 +38,15 @@ const struct tegra2_sysinfo sysinfo = {
>        CONFIG_TEGRA2_BOARD_STRING
>  };
>
> +#ifdef CONFIG_BOARD_EARLY_INIT_F
> +int board_early_init_f(void)
> +{
> +       debug("Board Early Init\n");
> +       tegra2_start();
> +       return 0;
> +}
> +#endif /* EARLY_INIT */
> +
>  /*
>  * Routine: timer_init
>  * Description: init the timestamp and lastinc value
> diff --git a/board/nvidia/common/board.h b/board/nvidia/common/board.h
> new file mode 100644
> index 0000000..47c7885
> --- /dev/null
> +++ b/board/nvidia/common/board.h
> @@ -0,0 +1,29 @@
> +/*
> + *  (C) Copyright 2010,2011
> + *  NVIDIA Corporation <www.nvidia.com>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#ifndef _BOARD_H_
> +#define _BOARD_H_
> +
> +void tegra2_start(void);
> +
> +#endif /* BOARD_H */
> diff --git a/include/configs/harmony.h b/include/configs/harmony.h
> index d004f31..34bd899 100644
> --- a/include/configs/harmony.h
> +++ b/include/configs/harmony.h
> @@ -46,4 +46,5 @@
>  #define CONFIG_MACH_TYPE               MACH_TYPE_HARMONY
>  #define CONFIG_SYS_BOARD_ODMDATA       0x300d8011 /* lp1, 1GB */
>
> +#define CONFIG_BOARD_EARLY_INIT_F
>  #endif /* __CONFIG_H */
> diff --git a/include/configs/seaboard.h b/include/configs/seaboard.h
> index fd87560..b7b72bb 100644
> --- a/include/configs/seaboard.h
> +++ b/include/configs/seaboard.h
> @@ -40,4 +40,5 @@
>  #define CONFIG_MACH_TYPE               MACH_TYPE_TEGRA_SEABOARD
>  #define CONFIG_SYS_BOARD_ODMDATA       0x300d8011 /* lp1, 1GB */
>
> +#define CONFIG_BOARD_EARLY_INIT_F
>  #endif /* __CONFIG_H */
> diff --git a/include/configs/tegra2-common.h b/include/configs/tegra2-common.h
> index 4f4374a..2924325 100644
> --- a/include/configs/tegra2-common.h
> +++ b/include/configs/tegra2-common.h
> @@ -33,6 +33,8 @@
>  #define CONFIG_MACH_TEGRA_GENERIC      /* which is a Tegra generic machine */
>  #define CONFIG_L2_OFF                  /* No L2 cache */
>
> +#define CONFIG_ENABLE_CORTEXA9         /* enable CPU (A9 complex) */
> +
>  #include <asm/arch/tegra2.h>           /* get chip and board defs */
>
>  /*
> --
> 1.7.4.1
>
>
Albert ARIBAUD - Feb. 22, 2011, 11:57 p.m.
Hi Tom,

Le 23/02/2011 00:41, Tom Warren a écrit :
> Anyone willing to review this? I'd like to get it pulled in to
> Albert's arm repo ASAP.

I should be able to review it during the week-end--not before, sorry.

Note however that since this is not a bugfix and it came after the merge 
window closed, it would go to u-boot-arm/next, not /master; it will go 
to u-boot-arm/master after the the upcoming release (and then to 
u-boot/master when the next merge window closes).

> Thanks,
>
> Tom

Amicalement,
Tom Warren - Feb. 23, 2011, 3:50 p.m.
Albert,

On Tue, Feb 22, 2011 at 4:57 PM, Albert ARIBAUD <albert.aribaud@free.fr> wrote:
> Hi Tom,
>
> Le 23/02/2011 00:41, Tom Warren a écrit :
>>
>> Anyone willing to review this? I'd like to get it pulled in to
>> Albert's arm repo ASAP.
>
> I should be able to review it during the week-end--not before, sorry.
>
Thank you. That would be fine - no rush.

> Note however that since this is not a bugfix and it came after the merge
> window closed, it would go to u-boot-arm/next, not /master; it will go to
> u-boot-arm/master after the the upcoming release (and then to u-boot/master
> when the next merge window closes).
Understood. Just wanted an ACK, etc. so I can sign off on the CPU work
and move on to the drivers.
>
>> Thanks,
>>
>> Tom
>
> Amicalement,
> --
> Albert.
>
Thanks for your help,

Tom
Tom Warren - March 7, 2011, 4:15 p.m.
Albert,

On Tue, Feb 22, 2011 at 4:57 PM, Albert ARIBAUD <albert.aribaud@free.fr> wrote:
> Hi Tom,
>
> Le 23/02/2011 00:41, Tom Warren a écrit :
>>
>> Anyone willing to review this? I'd like to get it pulled in to
>> Albert's arm repo ASAP.
>
> I should be able to review it during the week-end--not before, sorry.
>
Any chance you can give this some bandwidth? I'd like to get it into
the arm tree.

As far as I know, it should still apply, but let me know if I need to
rebase it against next (or master).

Thanks,
Tom
> Note however that since this is not a bugfix and it came after the merge
> window closed, it would go to u-boot-arm/next, not /master; it will go to
> u-boot-arm/master after the the upcoming release (and then to u-boot/master
> when the next merge window closes).
>
>> Thanks,
>>
>> Tom
>
> Amicalement,
> --
> Albert.
>
Albert ARIBAUD - March 13, 2011, 5:46 p.m.
Le 16/02/2011 21:26, Tom Warren a écrit :
> Signed-off-by: Tom Warren<twarren@nvidia.com>
> ---
>   arch/arm/cpu/armv7/start.S                 |    6 +
>   arch/arm/cpu/armv7/tegra2/Makefile         |    2 +-

>   arch/arm/cpu/armv7/tegra2/ap20.c           |  490 ++++++++++++++++++++++++++++
This one has an extra empty line at end of file.

> +void cold_boot(void)
> +{
> +	asm volatile(
> +
> +	"msr     cpsr_c, #0xd3            \n"
> +	/*
> +	* Check current processor: CPU or AVP?
> +	* If AVP, go to AVP boot code, else continue on.
> +	*/
> +	"mov     r0, %0                   \n"
> +	"ldrb    r2, [r0, %1]             \n"
> +	/* are we the CPU? */
> +	"cmp     r2, %2                   \n"
> +	"mov     sp, %3                   \n"
> +	/* leave in some symbols for release debugging */
> +	"mov     r3, %6                   \n"
> +	"str     r3, [sp, #-4]!           \n"
> +	"str     r3, [sp, #-4]!           \n"
> +	/*  yep, we are the CPU */
> +	"bxeq     %4                      \n"
> +	/* AVP Initialization follows this path */
> +	"mov     sp, %5                   \n"
> +	/* leave in some symbols for release debugging */
> +	"mov     r3, %6                   \n"
> +	"str     r3, [sp, #-4]!           \n"
> +	"str     r3, [sp, #-4]!           \n"
> +
> +	/* Init and Start CPU */
> +	"b       startup_cpu              \n"
> +	:
> +	: "i"(NV_PA_PG_UP_BASE),

If I'm not mistaken, NV_PA_PG_UP_BASE could be used just as well 
directly in the asm statement instead of via %0 and i(), as anyway the 
asm will be preprocessed and the macro will turn to a number. That would 
simplify the asm instruction as a whole and make the asm statement more 
understandable (also applies to other macros used similarly).

Apart from that, I must admit I don't know the Tegra2/A9 well enough to 
comment further.

amicalement,
Amicalement,
Peter Tyser - March 14, 2011, 3:33 p.m.
Hi Tom,
I'm not too familiar with the architecture, so many comments are
aesthetic.  It would be a good idea to run the patch through
checkpatch.pl too.

> diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
> index 684f2d2..50a1725 100644
> --- a/arch/arm/cpu/armv7/start.S
> +++ b/arch/arm/cpu/armv7/start.S
> @@ -70,6 +70,12 @@ _end_vect:
>  _TEXT_BASE:
>  	.word	CONFIG_SYS_TEXT_BASE
>  
> +#ifdef CONFIG_TEGRA2
> +.globl _armboot_start
> +_armboot_start:
> +        .word _start
> +#endif
> +

It'd be good to add a comment about what's going on above, and why its
tegra2-specific.  Eg why is it needed?

<snip>

> +static void init_pll_x(void)
> +{
> +	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> +	u32	reg;

The spaces in front of "reg" can be removed.  Ditto for all
functions/variables.

> +	/* Is PLL-X already running? */
> +	reg = readl(&clkrst->crc_pllx_base);
> +	if (reg & PLL_ENABLE)
> +		return;
> +
> +	/* Do PLLX init if it isn't running, but BootROM sets it, so TBD */
> +}

The above function looks incorrect.

> +static void set_cpu_reset_vector(u32 vector)
> +{
> +	writel(vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
> +}

Is it worth breaking this out into its own function?  The define names
make it pretty clear what is being done, and its only called from 1
location.

> +static void enable_cpu_clock(int enable)
> +{
> +	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> +	u32   reg, clk;
> +
> +	/*
> +	 * NOTE:
> +	 * Regardless of whether the request is to enable or disable the CPU
> +	 * clock, every processor in the CPU complex except the master (CPU 0)
> +	 * will have it's clock stopped because the AVP only talks to the
> +	 * master. The AVP does not know (nor does it need to know) that there
> +	 * are multiple processors in the CPU complex.
> +	 */
> +
> +	/* Need to initialize PLLX? */
> +	if (enable) {
> +		/* Initialize PLL */
> +		init_pll_x();
> +
> +		/* Wait until stable */
> +		udelay(NVBOOT_CLOCKS_PLL_STABILIZATION_DELAY);
> +
> +		reg = CCLK_BURST_POLICY;
> +		writel(reg, &clkrst->crc_cclk_brst_pol);

It'd look cleaner to not set "reg" for each access, just use the define
directly.  I'd personally use less temporary variables in general and
only use them when it made the code cleaner and easier to understand.

> +		reg = SUPER_CCLK_DIVIDER;
> +		writel(reg, &clkrst->crc_super_cclk_div);
> +	}
> +
> +	/* Fetch the register containing the main CPU complex clock enable */
> +	reg = readl(&clkrst->crc_clk_out_enb_l);

Is this read necessary?  You overwrite reg unconditionally below.

> +	/*
> +	 * Read the register containing the individual CPU clock enables and
> +	 * always stop the clock to CPU 1.
> +	 */
> +	clk = readl(&clkrst->crc_clk_cpu_cmplx);
> +	clk |= CPU1_CLK_STP;
> +
> +	if (enable) {
> +		/* Enable the CPU clock */
> +		reg = readl(&clkrst->crc_clk_out_enb_l);
> +		reg |= CLK_ENB_CPU;
> +		clk = readl(&clkrst->crc_clk_cpu_cmplx);
> +		clk &= ~CPU0_CLK_STP;
> +	} else {
> +		/* Disable the CPU clock */
> +		reg = readl(&clkrst->crc_clk_out_enb_l);
> +		reg |= CLK_ENB_CPU;
> +		clk = readl(&clkrst->crc_clk_cpu_cmplx);
> +		clk |= CPU0_CLK_STP;
> +	}

For if/elses that share common code, the common code should be broken
out.  eg above only the one-line |=/&= operations should be conditional.

> +	writel(clk, &clkrst->crc_clk_cpu_cmplx);
> +	writel(reg, &clkrst->crc_clk_out_enb_l);
> +}
> +
> +static int is_cpu_powered(void)
> +{
> +	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +	u32   reg;
> +
> +	reg = readl(&pmc->pmc_pwrgate_status);
> +
> +	if (reg & CPU_PWRED)
> +		return TRUE;

I'd remove the reg variable.  eg something like:
return readl(&pmc->pmc_pwrgate_status & CPU_PWRED ? 1 : 0);

> +	return FALSE;
> +}
> +
> +static void remove_cpu_io_clamps(void)
> +{
> +	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +	u32   reg;
> +
> +	/* Remove the clamps on the CPU I/O signals */
> +	reg = readl(&pmc->pmc_remove_clamping);
> +	reg |= CPU_CLMP;
> +	writel(reg, &pmc->pmc_remove_clamping);
> +
> +	/* Give I/O signals time to stabilize */
> +	udelay(1000);

For magic delays it'd be good to reference why you're waiting a specific
amount of time.  Does a manual state 1 ms?  Did testing show 1ms was
required?

> +}
> +
> +static void powerup_cpu(void)
> +{
> +	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +	u32   reg;
> +
> +	if (!is_cpu_powered()) {
> +		/* Toggle the CPU power state (OFF -> ON) */
> +		reg = readl(&pmc->pmc_pwrgate_toggle);
> +		reg &= (PARTID_CP);
> +		reg |= START_CP;

Why ()'s on one operation, but not the other?

> +		writel(reg, &pmc->pmc_pwrgate_toggle);
> +
> +		/* Wait for the power to come up */
> +		while (!is_cpu_powered())
> +			;			/* Do nothing */
> +
> +		/*
> +		 * Remove the I/O clamps from CPU power partition.
> +		 * Recommended only on a Warm boot, if the CPU partition gets
> +		 * power gated. Shouldn't cause any harm when called after a
> +		 * cold boot according to HW, probably just redundant.
> +		 */
> +		remove_cpu_io_clamps();
> +	}
> +}
> +
> +static void enable_cpu_power_rail(void)
> +{
> +	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +	u32   reg;
> +
> +	reg = readl(&pmc->pmc_cntrl);
> +	reg |= CPUPWRREQ_OE;
> +	writel(reg, &pmc->pmc_cntrl);
> +
> +	/* Delay to allow for stable power (CPU_PWR_REQ -> VDD_CPU from PMU) */
> +	udelay(3750);

Ditto for description.

> +}
> +
> +static void reset_A9_cpu(int reset)
> +{
> +	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> +	u32   reg, cpu;
> +
> +	/*
> +	* NOTE:  Regardless of whether the request is to hold the CPU in reset
> +	*        or take it out of reset, every processor in the CPU complex
> +	*        except the master (CPU 0) will be held in reset because the
> +	*        AVP only talks to the master. The AVP does not know that there
> +	*        are multiple processors in the CPU complex.
> +	*/
> +
> +	/* Hold CPU 1 in reset */
> +	cpu = SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1;
> +	writel(cpu, &clkrst->crc_cpu_cmplx_set);

The cpu variable can be removed.

> +	reg = readl(&clkrst->crc_rst_dev_l);
> +	if (reset) {
> +		/* Place CPU0 into reset */
> +		cpu = SET_DBGRESET0 | SET_DERESET0 | SET_CPURESET0;
> +		writel(cpu, &clkrst->crc_cpu_cmplx_set);
> +
> +		/* Enable master CPU reset */
> +		reg |= SWR_CPU_RST;
> +

Extra newline.

> +	} else {
> +		/* Take CPU0 out of reset */
> +		cpu = CLR_DBGRESET0 | CLR_DERESET0 | CLR_CPURESET0;
> +		writel(cpu, &clkrst->crc_cpu_cmplx_clr);
> +
> +		/* Disable master CPU reset */
> +		reg &= ~SWR_CPU_RST;
> +	}
> +
> +	writel(reg, &clkrst->crc_rst_dev_l);
> +}
> +
> +static void clock_enable_coresight(int enable)
> +{
> +	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> +	u32   rst, clk, src;
> +
> +	rst = readl(&clkrst->crc_rst_dev_u);
> +	clk = readl(&clkrst->crc_clk_out_enb_u);
> +
> +	if (enable) {
> +		rst &= ~SWR_CSITE_RST;
> +		clk |= CLK_ENB_CSITE;
> +	} else {
> +		rst |= SWR_CSITE_RST;
> +		clk &= ~CLK_ENB_CSITE;
> +	}
> +
> +	writel(clk, &clkrst->crc_clk_out_enb_u);
> +	writel(rst, &clkrst->crc_rst_dev_u);
> +
> +	if (enable) {
> +		/*
> +		 * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
> +		 *  1.5, giving an effective frequency of 144MHz.
> +		 * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
> +		 *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
> +		 */
> +		src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
> +		writel(src, &clkrst->crc_clk_src_csite);
> +
> +		/* Unlock the CPU CoreSight interfaces */
> +		rst = 0xC5ACCE55;

What's this number?  Is there a define that could be used instead?

> +		writel(rst, CSITE_CPU_DBG0_LAR);
> +		writel(rst, CSITE_CPU_DBG1_LAR);
> +	}
> +}
> +
> +void start_cpu(u32 reset_vector)
> +{
> +	/* Enable VDD_CPU */
> +	enable_cpu_power_rail();
> +
> +	/* Hold the CPUs in reset */
> +	reset_A9_cpu(TRUE);
> +
> +	/* Disable the CPU clock */
> +	enable_cpu_clock(FALSE);
> +
> +	/* Enable CoreSight */
> +	clock_enable_coresight(TRUE);
> +
> +	/*
> +	* Set the entry point for CPU execution from reset,
> +	*  if it's a non-zero value.
> +	*/

Spaces should be added above.

> +	if (reset_vector)
> +		set_cpu_reset_vector(reset_vector);
> +
> +	/* Enable the CPU clock */
> +	enable_cpu_clock(TRUE);
> +
> +	/* If the CPU doesn't already have power, power it up */
> +	if (!is_cpu_powered())
> +		powerup_cpu();
> +
> +	/* Take the CPU out of reset */
> +	reset_A9_cpu(FALSE);
> +}
> +
> +
> +void halt_avp(void)
> +{
> +	u32   reg;
> +
> +	for (;;) {
> +		reg = HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
> +			| HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29);
> +		writel(reg, FLOW_CTLR_HALT_COP_EVENTS);
> +	}
> +	/* Should never get here */
> +	for (;;)
> +		;

I'd remove the use of reg, and a secondary infinite loop seems
necessary.

> +}
> +
> +void startup_cpu(void)
> +{
> +	/* Initialize the AVP, clocks, and memory controller */
> +	/* SDRAM is guaranteed to be on at this point */
> +
> +	asm volatile(
> +
> +	/* Start the CPU */
> +	/* R0 = reset vector for CPU */
> +	"ldr     r0, =cold_boot      \n"
> +	"bl      start_cpu           \n"
> +
> +	/* Transfer control to the AVP code */
> +	"bl      halt_avp            \n"
> +
> +	/* Should never get here */
> +	"b       .                   \n"
> +	);

The assembly code should be indented.  Actually, is there a reason not
to move all these assembly functions into a seperate assembly file?

> +}
> +
> +extern ulong _armboot_start;
> +u32 cpu_boot_stack = CPU_EARLY_BOOT_STACK_LIMIT;
> +u32 avp_boot_stack = AVP_EARLY_BOOT_STACK_LIMIT;
> +u32 proc_tag = PG_UP_TAG_0_PID_CPU & 0xFF;
> +u32 deadbeef = 0xdeadbeef;
> +
> +void cold_boot(void)
> +{
> +	asm volatile(
> +
> +	"msr     cpsr_c, #0xd3            \n"
> +	/*
> +	* Check current processor: CPU or AVP?
> +	* If AVP, go to AVP boot code, else continue on.
> +	*/
> +	"mov     r0, %0                   \n"
> +	"ldrb    r2, [r0, %1]             \n"
> +	/* are we the CPU? */
> +	"cmp     r2, %2                   \n"
> +	"mov     sp, %3                   \n"
> +	/* leave in some symbols for release debugging */
> +	"mov     r3, %6                   \n"
> +	"str     r3, [sp, #-4]!           \n"
> +	"str     r3, [sp, #-4]!           \n"
> +	/*  yep, we are the CPU */
> +	"bxeq     %4                      \n"
> +	/* AVP Initialization follows this path */
> +	"mov     sp, %5                   \n"
> +	/* leave in some symbols for release debugging */
> +	"mov     r3, %6                   \n"
> +	"str     r3, [sp, #-4]!           \n"
> +	"str     r3, [sp, #-4]!           \n"
> +
> +	/* Init and Start CPU */
> +	"b       startup_cpu              \n"
> +	:
> +	: "i"(NV_PA_PG_UP_BASE),
> +	"i"(PG_UP_TAG_0),
> +	"r"(proc_tag),
> +	"r"(cpu_boot_stack),
> +	"r"(_armboot_start),
> +	"r"(avp_boot_stack),
> +	"r"(deadbeef)
> +	: "r0", "r2", "r3", "cc", "lr"
> +	);

Ditto on indentation/move.

> +}
> +
> +void enable_scu(void)
> +{
> +	struct scu_ctlr *scu = (struct scu_ctlr *)NV_PA_ARM_PERIPHBASE;
> +	u32 reg;
> +
> +	reg = readl(&scu->scu_ctrl);
> +	if (reg & SCU_ENABLE) {
> +		/* SCU already enabled, return */
> +		return;
> +	}

I'd not use reg above, and not use brackets, eg:
/* If the SCU is already enabled, we have nothing to do */
if (readl(&scu_ctrl) & SCU_ENABLE)
	return;

> +	/* Invalidate all ways for all processors */
> +	writel(0xFFFF, &scu->scu_inv_all);
> +
> +	/* Enable SCU - bit 0 */
> +	reg = readl(&scu->scu_ctrl);
> +	reg |= SCU_ENABLE;
> +	writel(reg, &scu->scu_ctrl);
> +
> +	return;

Remove return.

> +}
> +
> +void cache_configure(void)
> +{
> +	asm volatile(
> +
> +	"stmdb r13!,{r14}                 \n"
> +	/* invalidate instruction cache */
> +	"mov r1, #0                       \n"
> +	"mcr p15, 0, r1, c7, c5, 0        \n"
> +
> +	/* invalidate the i&d tlb entries */
> +	"mcr p15, 0, r1, c8, c5, 0        \n"
> +	"mcr p15, 0, r1, c8, c6, 0        \n"
> +
> +	/* enable instruction cache */
> +	"mrc  p15, 0, r1, c1, c0, 0       \n"
> +	"orr  r1, r1, #(1<<12)            \n"
> +	"mcr  p15, 0, r1, c1, c0, 0       \n"
> +
> +	"bl enable_scu                    \n"
> +
> +	/* enable SMP mode and FW for CPU0, by writing to Auxiliary Ctl reg */
> +	"mrc p15, 0, r0, c1, c0, 1        \n"
> +	"orr r0, r0, #0x41                \n"
> +	"mcr p15, 0, r0, c1, c0, 1        \n"
> +
> +	/* Now flush the Dcache */
> +	"mov r0, #0                       \n"
> +	/* 256 cache lines */
> +	"mov r1, #256                     \n"
> +
> +	"invalidate_loop:                 \n"
> +
> +	"add r1, r1, #-1                  \n"
> +	"mov r0, r1, lsl #5               \n"
> +	/* invalidate d-cache using line (way0) */
> +	"mcr p15, 0, r0, c7, c6, 2        \n"
> +
> +	"orr r2, r0, #(1<<30)             \n"
> +	/* invalidate d-cache using line (way1) */
> +	"mcr p15, 0, r2, c7, c6, 2        \n"
> +
> +	"orr r2, r0, #(2<<30)             \n"
> +	/* invalidate d-cache using line (way2) */
> +	"mcr p15, 0, r2, c7, c6, 2        \n"
> +
> +	"orr r2, r0, #(3<<30)             \n"
> +	/* invalidate d-cache using line (way3) */
> +	"mcr p15, 0, r2, c7, c6, 2        \n"
> +	"cmp r1, #0                       \n"
> +	"bne invalidate_loop              \n"
> +
> +	/* FIXME: should have ap20's L2 disabled too */
> +	"invalidate_done:                 \n"
> +	"ldmia r13!,{pc}                  \n"
> +	".ltorg                           \n"
> +	);

Ditto on indentation/move.

> +}
> +
> +void init_pmc_scratch(void)
> +{
> +	struct pmc_ctlr *const pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> +
> +	/* SCRATCH0 is initialized by the boot ROM and shouldn't be cleared */
> +	memset(&pmc->pmc_scratch1, 0, 23*4);

Is it safe to memset on IO regions here?  A for loop of writel's would
be safer, and more consistent.

> +	writel(CONFIG_SYS_BOARD_ODMDATA, &pmc->pmc_scratch20);
> +}
> +
> +u32 s_first_boot = 1;

I'd move all global variables to the top of the file. 

> +
> +void cpu_start(void)
> +{
> +	struct pmux_tri_ctlr *pmt = (struct pmux_tri_ctlr *)NV_PA_APB_MISC_BASE;
> +
> +	/* enable JTAG */
> +	writel(0xC0, &pmt->pmt_cfg_ctl);
> +
> +	if (s_first_boot) {
> +		/*
> +		 * Need to set this before cold-booting,
> +		 *  otherwise we'll end up in an infinite loop.
> +		 */
> +		s_first_boot = 0;
> +		cold_boot();
> +	}
> +
> +	return;

Remove return.

> +}
> +
> +void tegra2_start()
> +{
> +	if (s_first_boot) {
> +		/* Init Debug UART Port (115200 8n1) */
> +		uart_init();
> +
> +		/* Init PMC scratch memory */
> +		init_pmc_scratch();
> +	}
> +
> +#ifdef CONFIG_ENABLE_CORTEXA9
> +	/* take the mpcore out of reset */
> +	cpu_start();
> +
> +	/* configure cache */
> +	cache_configure();
> +#endif
> +}
> +
> diff --git a/arch/arm/cpu/armv7/tegra2/ap20.h b/arch/arm/cpu/armv7/tegra2/ap20.h
> new file mode 100644
> index 0000000..de7622d
> --- /dev/null
> +++ b/arch/arm/cpu/armv7/tegra2/ap20.h
> @@ -0,0 +1,105 @@
> +/*
> + * (C) Copyright 2010-2011
> + * NVIDIA Corporation <www.nvidia.com>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +#include <asm/types.h>
> +
> +#ifndef	FALSE
> +#define FALSE	0
> +#endif
> +#ifndef TRUE
> +#define TRUE	1
> +#endif

These shouldn't be added here.  They should be somewhere common, or
shouldn't be used (my preference).

> +
> +#define NVBL_PLLP_KHZ	(432000/2)
> +/* The stabilization delay in usec */
> +#define NVBOOT_CLOCKS_PLL_STABILIZATION_DELAY (300)
> +
> +#define PLLX_ENABLED		(1 << 30)
> +#define CCLK_BURST_POLICY	0x20008888
> +#define SUPER_CCLK_DIVIDER	0x80000000
> +
> +/* Calculate clock fractional divider value from ref and target frequencies */
> +#define CLK_DIVIDER(REF, FREQ)  ((((REF) * 2) / FREQ) - 2)
> +
> +/* Calculate clock frequency value from reference and clock divider value */
> +#define CLK_FREQUENCY(REF, REG)  (((REF) * 2) / (REG + 2))
> +
> +/* AVP/CPU ID */
> +#define PG_UP_TAG_0_PID_CPU	0x55555555	/* CPU aka "a9" aka "mpcore" */
> +#define PG_UP_TAG_0             0x0
> +
> +/* AP20-Specific Base Addresses */
> +
> +/* AP20 Base physical address of SDRAM. */
> +#define AP20_BASE_PA_SDRAM      0x00000000
> +/* AP20 Base physical address of internal SRAM. */
> +#define AP20_BASE_PA_SRAM       0x40000000
> +/* AP20 Size of internal SRAM (256KB). */
> +#define AP20_BASE_PA_SRAM_SIZE  0x00040000
> +/* AP20 Base physical address of flash. */
> +#define AP20_BASE_PA_NOR_FLASH  0xD0000000
> +/* AP20 Base physical address of boot information table. */
> +#define AP20_BASE_PA_BOOT_INFO  AP20_BASE_PA_SRAM
> +
> +/*
> + * Super-temporary stacks for EXTREMELY early startup. The values chosen for
> + * these addresses must be valid on ALL SOCs because this value is used before
> + * we are able to differentiate between the SOC types.
> + *
> + * NOTE: The since CPU's stack will eventually be moved from IRAM to SDRAM, its
> + *       stack is placed below the AVP stack. Once the CPU stack has been moved,
> + *       the AVP is free to use the IRAM the CPU stack previously occupied if
> + *       it should need to do so.
> + *
> + * NOTE: In multi-processor CPU complex configurations, each processor will have
> + *       its own stack of size CPU_EARLY_BOOT_STACK_SIZE. CPU 0 will have a
> + *       limit of CPU_EARLY_BOOT_STACK_LIMIT. Each successive CPU will have a
> + *       stack limit that is CPU_EARLY_BOOT_STACK_SIZE less then the previous
> + *       CPU.
> + */
> +
> +/* Common AVP early boot stack limit */
> +#define AVP_EARLY_BOOT_STACK_LIMIT	\
> +	(AP20_BASE_PA_SRAM + (AP20_BASE_PA_SRAM_SIZE/2))
> +/* Common AVP early boot stack size */
> +#define AVP_EARLY_BOOT_STACK_SIZE	0x1000
> +/* Common CPU early boot stack limit */
> +#define CPU_EARLY_BOOT_STACK_LIMIT	\
> +	(AVP_EARLY_BOOT_STACK_LIMIT - AVP_EARLY_BOOT_STACK_SIZE)
> +/* Common CPU early boot stack size */
> +#define CPU_EARLY_BOOT_STACK_SIZE	0x1000
> +
> +#define EXCEP_VECTOR_CPU_RESET_VECTOR	(NV_PA_EVP_BASE + 0x100)
> +#define CSITE_CPU_DBG0_LAR		(NV_PA_CSITE_BASE + 0x10FB0)
> +#define CSITE_CPU_DBG1_LAR		(NV_PA_CSITE_BASE + 0x12FB0)
> +
> +#define FLOW_CTLR_HALT_COP_EVENTS	(NV_PA_FLOW_BASE + 4)
> +#define FLOW_MODE_STOP			2
> +#define HALT_COP_EVENT_JTAG		(1 << 28)
> +#define HALT_COP_EVENT_IRQ_1		(1 << 11)
> +#define HALT_COP_EVENT_FIQ_1		(1 << 9)
> +
> +/* Prototypes */
> +
> +void tegra2_start(void);
> +void uart_init(void);
> +void udelay(unsigned long);
> diff --git a/arch/arm/include/asm/arch-tegra2/clk_rst.h b/arch/arm/include/asm/arch-tegra2/clk_rst.h
> index 6d573bf..d67a5d7 100644
> --- a/arch/arm/include/asm/arch-tegra2/clk_rst.h
> +++ b/arch/arm/include/asm/arch-tegra2/clk_rst.h
> @@ -149,6 +149,9 @@ struct clk_rst_ctlr {
>  	uint crc_clk_src_csite;		/*_CSITE_0,		0x1D4 */
>  	uint crc_reserved19[9];		/*			0x1D8-1F8 */
>  	uint crc_clk_src_osc;		/*_OSC_0,		0x1FC */
> +	uint crc_reserved20[80];	/*			0x200-33C */
> +	uint crc_cpu_cmplx_set;		/* _CPU_CMPLX_SET_0,	0x340 */
> +	uint crc_cpu_cmplx_clr;		/* _CPU_CMPLX_CLR_0,	0x344 */
>  };
>  
>  #define PLL_BYPASS		(1 << 31)
> @@ -162,4 +165,28 @@ struct clk_rst_ctlr {
>  #define SWR_UARTA_RST		(1 << 6)
>  #define CLK_ENB_UARTA		(1 << 6)
>  
> +#define SWR_CPU_RST		(1 << 0)
> +#define CLK_ENB_CPU		(1 << 0)
> +#define SWR_CSITE_RST		(1 << 9)
> +#define CLK_ENB_CSITE		(1 << 9)
> +
> +#define SET_CPURESET0		(1 << 0)
> +#define SET_DERESET0		(1 << 4)
> +#define SET_DBGRESET0		(1 << 12)
> +
> +#define SET_CPURESET1		(1 << 1)
> +#define SET_DERESET1		(1 << 5)
> +#define SET_DBGRESET1		(1 << 13)
> +
> +#define CLR_CPURESET0		(1 << 0)
> +#define CLR_DERESET0		(1 << 4)
> +#define CLR_DBGRESET0		(1 << 12)
> +
> +#define CLR_CPURESET1		(1 << 1)
> +#define CLR_DERESET1		(1 << 5)
> +#define CLR_DBGRESET1		(1 << 13)
> +
> +#define CPU0_CLK_STP		(1 << 8)
> +#define CPU1_CLK_STP		(1 << 9)
> +
>  #endif	/* CLK_RST_H */
> diff --git a/arch/arm/include/asm/arch-tegra2/pmc.h b/arch/arm/include/asm/arch-tegra2/pmc.h
> index 7ec9eeb..b1d47cd 100644
> --- a/arch/arm/include/asm/arch-tegra2/pmc.h
> +++ b/arch/arm/include/asm/arch-tegra2/pmc.h
> @@ -121,4 +121,12 @@ struct pmc_ctlr {
>  	uint pmc_gate;			/* _GATE_0, offset 15C */
>  };
>  
> +#define CPU_PWRED	1
> +#define CPU_CLMP	1
> +
> +#define PARTID_CP	0xFFFFFFF8
> +#define START_CP	(1 << 8)
> +
> +#define CPUPWRREQ_OE	(1 << 16)
> +
>  #endif	/* PMC_H */
> diff --git a/arch/arm/include/asm/arch-tegra2/scu.h b/arch/arm/include/asm/arch-tegra2/scu.h
> new file mode 100644
> index 0000000..d2faa6f
> --- /dev/null
> +++ b/arch/arm/include/asm/arch-tegra2/scu.h
> @@ -0,0 +1,43 @@
> +/*
> + *  (C) Copyright 2010,2011
> + *  NVIDIA Corporation <www.nvidia.com>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +
> +#ifndef _SCU_H_
> +#define _SCU_H_
> +
> +/* ARM Snoop Control Unit (SCU) registers */
> +struct scu_ctlr {
> +	uint scu_ctrl;		/* SCU Control Register, offset 00 */
> +	uint scu_cfg;		/* SCU Config Register, offset 04 */
> +	uint scu_cpu_pwr_stat;	/* SCU CPU Power Status Register, offset 08 */
> +	uint scu_inv_all;	/* SCU Invalidate All Register, offset 0C */
> +	uint scu_reserved0[12];	/* reserved, offset 10-3C */
> +	uint scu_filt_start;	/* SCU Filtering Start Address Reg, offset 40 */
> +	uint scu_filt_end;	/* SCU Filtering End Address Reg, offset 44 */
> +	uint scu_reserved1[2];	/* reserved, offset 48-4C */
> +	uint scu_acc_ctl;	/* SCU Access Control Register, offset 50 */
> +	uint scu_ns_acc_ctl;	/* SCU Non-secure Access Cntrl Reg, offset 54 */
> +};
> +
> +#define SCU_ENABLE	1

Should this be here?  Seems like its not the proper location to enable
things...

> +
> +#endif	/* SCU_H */

Best,
Peter
Tom Warren - March 14, 2011, 4:15 p.m.
Albert,

On Sun, Mar 13, 2011 at 10:46 AM, Albert ARIBAUD <albert.aribaud@free.fr> wrote:
> Le 16/02/2011 21:26, Tom Warren a écrit :
>>
>> Signed-off-by: Tom Warren<twarren@nvidia.com>
>> ---
>>  arch/arm/cpu/armv7/start.S                 |    6 +
>>  arch/arm/cpu/armv7/tegra2/Makefile         |    2 +-
>
>>  arch/arm/cpu/armv7/tegra2/ap20.c           |  490
>> ++++++++++++++++++++++++++++
>
> This one has an extra empty line at end of file.
>
Thanks - I'll remove it.

>> +void cold_boot(void)
>> +{
>> +       asm volatile(
>> +
>> +       "msr     cpsr_c, #0xd3            \n"
>> +       /*
>> +       * Check current processor: CPU or AVP?
>> +       * If AVP, go to AVP boot code, else continue on.
>> +       */
>> +       "mov     r0, %0                   \n"
>> +       "ldrb    r2, [r0, %1]             \n"
>> +       /* are we the CPU? */
>> +       "cmp     r2, %2                   \n"
>> +       "mov     sp, %3                   \n"
>> +       /* leave in some symbols for release debugging */
>> +       "mov     r3, %6                   \n"
>> +       "str     r3, [sp, #-4]!           \n"
>> +       "str     r3, [sp, #-4]!           \n"
>> +       /*  yep, we are the CPU */
>> +       "bxeq     %4                      \n"
>> +       /* AVP Initialization follows this path */
>> +       "mov     sp, %5                   \n"
>> +       /* leave in some symbols for release debugging */
>> +       "mov     r3, %6                   \n"
>> +       "str     r3, [sp, #-4]!           \n"
>> +       "str     r3, [sp, #-4]!           \n"
>> +
>> +       /* Init and Start CPU */
>> +       "b       startup_cpu              \n"
>> +       :
>> +       : "i"(NV_PA_PG_UP_BASE),
>
> If I'm not mistaken, NV_PA_PG_UP_BASE could be used just as well directly in
> the asm statement instead of via %0 and i(), as anyway the asm will be
> preprocessed and the macro will turn to a number. That would simplify the
> asm instruction as a whole and make the asm statement more understandable
> (also applies to other macros used similarly).
>
Yeah, this code came this way from our bringup group for out in-house
bootloader, and it's always been confusing.
I'll try to simplify it when I incorporate Peter's critiques. Thanks.
> Apart from that, I must admit I don't know the Tegra2/A9 well enough to
> comment further.
>
> amicalement,
> Amicalement,
> --
> Albert.
Thanks for the input,
Tom
>
Tom Warren - March 14, 2011, 9:16 p.m.
Peter,

On Mon, Mar 14, 2011 at 8:33 AM, Peter Tyser <ptyser@xes-inc.com> wrote:
> Hi Tom,
> I'm not too familiar with the architecture, so many comments are
> aesthetic.  It would be a good idea to run the patch through
> checkpatch.pl too.
I run checkpatch on every submission. Only 1 warning was generated
(about an extern), and no errors.

>
>> diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
>> index 684f2d2..50a1725 100644
>> --- a/arch/arm/cpu/armv7/start.S
>> +++ b/arch/arm/cpu/armv7/start.S
>> @@ -70,6 +70,12 @@ _end_vect:
>>  _TEXT_BASE:
>>       .word   CONFIG_SYS_TEXT_BASE
>>
>> +#ifdef CONFIG_TEGRA2
>> +.globl _armboot_start
>> +_armboot_start:
>> +        .word _start
>> +#endif
>> +
>
> It'd be good to add a comment about what's going on above, and why its
> tegra2-specific.  Eg why is it needed?
Tegra uses 2 separate CPUs - the AVP (ARM7TDI) and the dual-core A9. A
bootloader first runs on the AVP and sets things up for the CPU
complex to take over.
The CPU complex jumps here during it's init phase/boot (from it's
reset vector). It needs a dereferenced jump to the _start code in
start.S (which in turn chains to the reset code).
I'll add a comment about this in the next patch.

> <snip>
>
>> +static void init_pll_x(void)
>> +{
>> +     struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
>> +     u32     reg;
>
> The spaces in front of "reg" can be removed.  Ditto for all
> functions/variables.
Not sure what you mean, here, Peter. There are no spaces here in my
ap20.c file, just tabs.
Please clarify.

>
>> +     /* Is PLL-X already running? */
>> +     reg = readl(&clkrst->crc_pllx_base);
>> +     if (reg & PLL_ENABLE)
>> +             return;
>> +
>> +     /* Do PLLX init if it isn't running, but BootROM sets it, so TBD */
>> +}
>
> The above function looks incorrect.
What looks incorrect? It checks to see if the PLL is already
running/enabled, and returns if it is.
Tegra2 mask ROM (BootROM) currently always sets PLLX, so this will
always return, but the comment is there for future chips that may not
set up PLLX.

>
>> +static void set_cpu_reset_vector(u32 vector)
>> +{
>> +     writel(vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
>> +}
>
> Is it worth breaking this out into its own function?  The define names
> make it pretty clear what is being done, and its only called from 1
> location.
I can move the writel() to the called location and remove this func.

>
>> +static void enable_cpu_clock(int enable)
>> +{
>> +     struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
>> +     u32   reg, clk;
>> +
>> +     /*
>> +      * NOTE:
>> +      * Regardless of whether the request is to enable or disable the CPU
>> +      * clock, every processor in the CPU complex except the master (CPU 0)
>> +      * will have it's clock stopped because the AVP only talks to the
>> +      * master. The AVP does not know (nor does it need to know) that there
>> +      * are multiple processors in the CPU complex.
>> +      */
>> +
>> +     /* Need to initialize PLLX? */
>> +     if (enable) {
>> +             /* Initialize PLL */
>> +             init_pll_x();
>> +
>> +             /* Wait until stable */
>> +             udelay(NVBOOT_CLOCKS_PLL_STABILIZATION_DELAY);
>> +
>> +             reg = CCLK_BURST_POLICY;
>> +             writel(reg, &clkrst->crc_cclk_brst_pol);
>
> It'd look cleaner to not set "reg" for each access, just use the define
> directly.  I'd personally use less temporary variables in general and
> only use them when it made the code cleaner and easier to understand.
Will do. This code was ported from our orignal, in-house bootloader,
so it's going to be a bit hairy.

>
>> +             reg = SUPER_CCLK_DIVIDER;
>> +             writel(reg, &clkrst->crc_super_cclk_div);
>> +     }
>> +
>> +     /* Fetch the register containing the main CPU complex clock enable */
>> +     reg = readl(&clkrst->crc_clk_out_enb_l);
>
> Is this read necessary?  You overwrite reg unconditionally below.
Yeah, I saw that and did a rewrite a week or two ago, but was waiting
to incorporate into the next patch to get everything in one place.

>
>> +     /*
>> +      * Read the register containing the individual CPU clock enables and
>> +      * always stop the clock to CPU 1.
>> +      */
>> +     clk = readl(&clkrst->crc_clk_cpu_cmplx);
>> +     clk |= CPU1_CLK_STP;
>> +
>> +     if (enable) {
>> +             /* Enable the CPU clock */
>> +             reg = readl(&clkrst->crc_clk_out_enb_l);
>> +             reg |= CLK_ENB_CPU;
>> +             clk = readl(&clkrst->crc_clk_cpu_cmplx);
>> +             clk &= ~CPU0_CLK_STP;
>> +     } else {
>> +             /* Disable the CPU clock */
>> +             reg = readl(&clkrst->crc_clk_out_enb_l);
>> +             reg |= CLK_ENB_CPU;
>> +             clk = readl(&clkrst->crc_clk_cpu_cmplx);
>> +             clk |= CPU0_CLK_STP;
>> +     }
>
> For if/elses that share common code, the common code should be broken
> out.  eg above only the one-line |=/&= operations should be conditional.
Yep, already done as part of the previous cleanup. Just need to put it
into the full patchset #2.

>
>> +     writel(clk, &clkrst->crc_clk_cpu_cmplx);
>> +     writel(reg, &clkrst->crc_clk_out_enb_l);
>> +}
>> +
>> +static int is_cpu_powered(void)
>> +{
>> +     struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
>> +     u32   reg;
>> +
>> +     reg = readl(&pmc->pmc_pwrgate_status);
>> +
>> +     if (reg & CPU_PWRED)
>> +             return TRUE;
>
> I'd remove the reg variable.  eg something like:
> return readl(&pmc->pmc_pwrgate_status & CPU_PWRED ? 1 : 0);
Good. Will do.

>
>> +     return FALSE;
>> +}
>> +
>> +static void remove_cpu_io_clamps(void)
>> +{
>> +     struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
>> +     u32   reg;
>> +
>> +     /* Remove the clamps on the CPU I/O signals */
>> +     reg = readl(&pmc->pmc_remove_clamping);
>> +     reg |= CPU_CLMP;
>> +     writel(reg, &pmc->pmc_remove_clamping);
>> +
>> +     /* Give I/O signals time to stabilize */
>> +     udelay(1000);
>
> For magic delays it'd be good to reference why you're waiting a specific
> amount of time.  Does a manual state 1 ms?  Did testing show 1ms was
> required?
Not sure, since this code came from the dark ages of Tegra bringup,
and was ported from our in-house bootloader.
The Tegra TRM doesn't call out a specific delay, and no one seems to
be able to point to any reference for this.
I think the 'give I/O signals time to stabilize' will have to suffice
here. I can make it a #define such as 'IO_STAB_TIME', if that'd help.

>
>> +}
>> +
>> +static void powerup_cpu(void)
>> +{
>> +     struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
>> +     u32   reg;
>> +
>> +     if (!is_cpu_powered()) {
>> +             /* Toggle the CPU power state (OFF -> ON) */
>> +             reg = readl(&pmc->pmc_pwrgate_toggle);
>> +             reg &= (PARTID_CP);
>> +             reg |= START_CP;
>
> Why ()'s on one operation, but not the other?
Don't know - part of the porting process. I'll remove 'em.

>
>> +             writel(reg, &pmc->pmc_pwrgate_toggle);
>> +
>> +             /* Wait for the power to come up */
>> +             while (!is_cpu_powered())
>> +                     ;                       /* Do nothing */
>> +
>> +             /*
>> +              * Remove the I/O clamps from CPU power partition.
>> +              * Recommended only on a Warm boot, if the CPU partition gets
>> +              * power gated. Shouldn't cause any harm when called after a
>> +              * cold boot according to HW, probably just redundant.
>> +              */
>> +             remove_cpu_io_clamps();
>> +     }
>> +}
>> +
>> +static void enable_cpu_power_rail(void)
>> +{
>> +     struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
>> +     u32   reg;
>> +
>> +     reg = readl(&pmc->pmc_cntrl);
>> +     reg |= CPUPWRREQ_OE;
>> +     writel(reg, &pmc->pmc_cntrl);
>> +
>> +     /* Delay to allow for stable power (CPU_PWR_REQ -> VDD_CPU from PMU) */
>> +     udelay(3750);
>
> Ditto for description.
Ditto on why the delay? In this case, I did find a reference to the
time needed between the request from the chipset to the PMU, hence the
comment.

>
>> +}
>> +
>> +static void reset_A9_cpu(int reset)
>> +{
>> +     struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
>> +     u32   reg, cpu;
>> +
>> +     /*
>> +     * NOTE:  Regardless of whether the request is to hold the CPU in reset
>> +     *        or take it out of reset, every processor in the CPU complex
>> +     *        except the master (CPU 0) will be held in reset because the
>> +     *        AVP only talks to the master. The AVP does not know that there
>> +     *        are multiple processors in the CPU complex.
>> +     */
>> +
>> +     /* Hold CPU 1 in reset */
>> +     cpu = SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1;
>> +     writel(cpu, &clkrst->crc_cpu_cmplx_set);
>
> The cpu variable can be removed.
It could be, sure. But it makes the line longer, >80 chars, and hence
it would have to be broken into two lines.
Actually, now that I look at the code again, it appears that 'cpu'
later should be OR'd with the SET_/CLR_DBGRESET0, etc. bits, depending
on the state of 'reset'.
I'll give it a rewrite for the next patch.

>
>> +     reg = readl(&clkrst->crc_rst_dev_l);
>> +     if (reset) {
>> +             /* Place CPU0 into reset */
>> +             cpu = SET_DBGRESET0 | SET_DERESET0 | SET_CPURESET0;
>> +             writel(cpu, &clkrst->crc_cpu_cmplx_set);
>> +
>> +             /* Enable master CPU reset */
>> +             reg |= SWR_CPU_RST;
>> +
>
> Extra newline.
OK.

>
>> +     } else {
>> +             /* Take CPU0 out of reset */
>> +             cpu = CLR_DBGRESET0 | CLR_DERESET0 | CLR_CPURESET0;
>> +             writel(cpu, &clkrst->crc_cpu_cmplx_clr);
>> +
>> +             /* Disable master CPU reset */
>> +             reg &= ~SWR_CPU_RST;
>> +     }
>> +
>> +     writel(reg, &clkrst->crc_rst_dev_l);
>> +}
>> +
>> +static void clock_enable_coresight(int enable)
>> +{
>> +     struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
>> +     u32   rst, clk, src;
>> +
>> +     rst = readl(&clkrst->crc_rst_dev_u);
>> +     clk = readl(&clkrst->crc_clk_out_enb_u);
>> +
>> +     if (enable) {
>> +             rst &= ~SWR_CSITE_RST;
>> +             clk |= CLK_ENB_CSITE;
>> +     } else {
>> +             rst |= SWR_CSITE_RST;
>> +             clk &= ~CLK_ENB_CSITE;
>> +     }
>> +
>> +     writel(clk, &clkrst->crc_clk_out_enb_u);
>> +     writel(rst, &clkrst->crc_rst_dev_u);
>> +
>> +     if (enable) {
>> +             /*
>> +              * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
>> +              *  1.5, giving an effective frequency of 144MHz.
>> +              * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
>> +              *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
>> +              */
>> +             src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
>> +             writel(src, &clkrst->crc_clk_src_csite);
>> +
>> +             /* Unlock the CPU CoreSight interfaces */
>> +             rst = 0xC5ACCE55;
>
> What's this number?  Is there a define that could be used instead?
Sure, I can add a #define. Some magic ARM access sequence to unlock
the CoreSight I/F, as the comment says

>
>> +             writel(rst, CSITE_CPU_DBG0_LAR);
>> +             writel(rst, CSITE_CPU_DBG1_LAR);
>> +     }
>> +}
>> +
>> +void start_cpu(u32 reset_vector)
>> +{
>> +     /* Enable VDD_CPU */
>> +     enable_cpu_power_rail();
>> +
>> +     /* Hold the CPUs in reset */
>> +     reset_A9_cpu(TRUE);
>> +
>> +     /* Disable the CPU clock */
>> +     enable_cpu_clock(FALSE);
>> +
>> +     /* Enable CoreSight */
>> +     clock_enable_coresight(TRUE);
>> +
>> +     /*
>> +     * Set the entry point for CPU execution from reset,
>> +     *  if it's a non-zero value.
>> +     */
>
> Spaces should be added above.
Spaces where? Please be more specific.  Again, checkpatch had no
problem with this section.

>
>> +     if (reset_vector)
>> +             set_cpu_reset_vector(reset_vector);
>> +
>> +     /* Enable the CPU clock */
>> +     enable_cpu_clock(TRUE);
>> +
>> +     /* If the CPU doesn't already have power, power it up */
>> +     if (!is_cpu_powered())
>> +             powerup_cpu();
>> +
>> +     /* Take the CPU out of reset */
>> +     reset_A9_cpu(FALSE);
>> +}
>> +
>> +
>> +void halt_avp(void)
>> +{
>> +     u32   reg;
>> +
>> +     for (;;) {
>> +             reg = HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
>> +                     | HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29);
>> +             writel(reg, FLOW_CTLR_HALT_COP_EVENTS);
>> +     }
>> +     /* Should never get here */
>> +     for (;;)
>> +             ;
>
> I'd remove the use of reg, and a secondary infinite loop seems
> necessary.
OK, I'll rewrite it.

>
>> +}
>> +
>> +void startup_cpu(void)
>> +{
>> +     /* Initialize the AVP, clocks, and memory controller */
>> +     /* SDRAM is guaranteed to be on at this point */
>> +
>> +     asm volatile(
>> +
>> +     /* Start the CPU */
>> +     /* R0 = reset vector for CPU */
>> +     "ldr     r0, =cold_boot      \n"
>> +     "bl      start_cpu           \n"
>> +
>> +     /* Transfer control to the AVP code */
>> +     "bl      halt_avp            \n"
>> +
>> +     /* Should never get here */
>> +     "b       .                   \n"
>> +     );
>
> The assembly code should be indented.  Actually, is there a reason not
> to move all these assembly functions into a seperate assembly file?
I can indent the asm code, no problem.
I don't see the need to move it to a .S file, though. Lots of examples
of embedded assembly code in the U-Boot tree.
Unless I see some strong objections, I'm just going to indent it (and
other asm statements).

>
>> +}
>> +
>> +extern ulong _armboot_start;
>> +u32 cpu_boot_stack = CPU_EARLY_BOOT_STACK_LIMIT;
>> +u32 avp_boot_stack = AVP_EARLY_BOOT_STACK_LIMIT;
>> +u32 proc_tag = PG_UP_TAG_0_PID_CPU & 0xFF;
>> +u32 deadbeef = 0xdeadbeef;
>> +
>> +void cold_boot(void)
>> +{
>> +     asm volatile(
>> +
>> +     "msr     cpsr_c, #0xd3            \n"
>> +     /*
>> +     * Check current processor: CPU or AVP?
>> +     * If AVP, go to AVP boot code, else continue on.
>> +     */
>> +     "mov     r0, %0                   \n"
>> +     "ldrb    r2, [r0, %1]             \n"
>> +     /* are we the CPU? */
>> +     "cmp     r2, %2                   \n"
>> +     "mov     sp, %3                   \n"
>> +     /* leave in some symbols for release debugging */
>> +     "mov     r3, %6                   \n"
>> +     "str     r3, [sp, #-4]!           \n"
>> +     "str     r3, [sp, #-4]!           \n"
>> +     /*  yep, we are the CPU */
>> +     "bxeq     %4                      \n"
>> +     /* AVP Initialization follows this path */
>> +     "mov     sp, %5                   \n"
>> +     /* leave in some symbols for release debugging */
>> +     "mov     r3, %6                   \n"
>> +     "str     r3, [sp, #-4]!           \n"
>> +     "str     r3, [sp, #-4]!           \n"
>> +
>> +     /* Init and Start CPU */
>> +     "b       startup_cpu              \n"
>> +     :
>> +     : "i"(NV_PA_PG_UP_BASE),
>> +     "i"(PG_UP_TAG_0),
>> +     "r"(proc_tag),
>> +     "r"(cpu_boot_stack),
>> +     "r"(_armboot_start),
>> +     "r"(avp_boot_stack),
>> +     "r"(deadbeef)
>> +     : "r0", "r2", "r3", "cc", "lr"
>> +     );
>
> Ditto on indentation/move.
Albert pointed out that this code is kinda dense, and I agree. I'm
going to rewrite without the %0 stuff, plus add indents.

>
>> +}
>> +
>> +void enable_scu(void)
>> +{
>> +     struct scu_ctlr *scu = (struct scu_ctlr *)NV_PA_ARM_PERIPHBASE;
>> +     u32 reg;
>> +
>> +     reg = readl(&scu->scu_ctrl);
>> +     if (reg & SCU_ENABLE) {
>> +             /* SCU already enabled, return */
>> +             return;
>> +     }
>
> I'd not use reg above, and not use brackets, eg:
> /* If the SCU is already enabled, we have nothing to do */
> if (readl(&scu_ctrl) & SCU_ENABLE)
>        return;
Sure, I can do that.

>
>> +     /* Invalidate all ways for all processors */
>> +     writel(0xFFFF, &scu->scu_inv_all);
>> +
>> +     /* Enable SCU - bit 0 */
>> +     reg = readl(&scu->scu_ctrl);
>> +     reg |= SCU_ENABLE;
>> +     writel(reg, &scu->scu_ctrl);
>> +
>> +     return;
>
> Remove return.
OK.

>
>> +}
>> +
>> +void cache_configure(void)
>> +{
>> +     asm volatile(
>> +
>> +     "stmdb r13!,{r14}                 \n"
>> +     /* invalidate instruction cache */
>> +     "mov r1, #0                       \n"
>> +     "mcr p15, 0, r1, c7, c5, 0        \n"
>> +
>> +     /* invalidate the i&d tlb entries */
>> +     "mcr p15, 0, r1, c8, c5, 0        \n"
>> +     "mcr p15, 0, r1, c8, c6, 0        \n"
>> +
>> +     /* enable instruction cache */
>> +     "mrc  p15, 0, r1, c1, c0, 0       \n"
>> +     "orr  r1, r1, #(1<<12)            \n"
>> +     "mcr  p15, 0, r1, c1, c0, 0       \n"
>> +
>> +     "bl enable_scu                    \n"
>> +
>> +     /* enable SMP mode and FW for CPU0, by writing to Auxiliary Ctl reg */
>> +     "mrc p15, 0, r0, c1, c0, 1        \n"
>> +     "orr r0, r0, #0x41                \n"
>> +     "mcr p15, 0, r0, c1, c0, 1        \n"
>> +
>> +     /* Now flush the Dcache */
>> +     "mov r0, #0                       \n"
>> +     /* 256 cache lines */
>> +     "mov r1, #256                     \n"
>> +
>> +     "invalidate_loop:                 \n"
>> +
>> +     "add r1, r1, #-1                  \n"
>> +     "mov r0, r1, lsl #5               \n"
>> +     /* invalidate d-cache using line (way0) */
>> +     "mcr p15, 0, r0, c7, c6, 2        \n"
>> +
>> +     "orr r2, r0, #(1<<30)             \n"
>> +     /* invalidate d-cache using line (way1) */
>> +     "mcr p15, 0, r2, c7, c6, 2        \n"
>> +
>> +     "orr r2, r0, #(2<<30)             \n"
>> +     /* invalidate d-cache using line (way2) */
>> +     "mcr p15, 0, r2, c7, c6, 2        \n"
>> +
>> +     "orr r2, r0, #(3<<30)             \n"
>> +     /* invalidate d-cache using line (way3) */
>> +     "mcr p15, 0, r2, c7, c6, 2        \n"
>> +     "cmp r1, #0                       \n"
>> +     "bne invalidate_loop              \n"
>> +
>> +     /* FIXME: should have ap20's L2 disabled too */
>> +     "invalidate_done:                 \n"
>> +     "ldmia r13!,{pc}                  \n"
>> +     ".ltorg                           \n"
>> +     );
>
> Ditto on indentation/move.
I'll indent it.

>
>> +}
>> +
>> +void init_pmc_scratch(void)
>> +{
>> +     struct pmc_ctlr *const pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
>> +
>> +     /* SCRATCH0 is initialized by the boot ROM and shouldn't be cleared */
>> +     memset(&pmc->pmc_scratch1, 0, 23*4);
>
> Is it safe to memset on IO regions here?  A for loop of writel's would
> be safer, and more consistent.
The generated assembly looks OK. These are mem-mapped scratch
registers, so there're no side-effects here.
I can convert to a loop if you insist.

>
>> +     writel(CONFIG_SYS_BOARD_ODMDATA, &pmc->pmc_scratch20);
>> +}
>> +
>> +u32 s_first_boot = 1;
>
> I'd move all global variables to the top of the file.
OK, will do.

>
>> +
>> +void cpu_start(void)
>> +{
>> +     struct pmux_tri_ctlr *pmt = (struct pmux_tri_ctlr *)NV_PA_APB_MISC_BASE;
>> +
>> +     /* enable JTAG */
>> +     writel(0xC0, &pmt->pmt_cfg_ctl);
>> +
>> +     if (s_first_boot) {
>> +             /*
>> +              * Need to set this before cold-booting,
>> +              *  otherwise we'll end up in an infinite loop.
>> +              */
>> +             s_first_boot = 0;
>> +             cold_boot();
>> +     }
>> +
>> +     return;
>
> Remove return.
OK.

>
>> +}
>> +
>> +void tegra2_start()
>> +{
>> +     if (s_first_boot) {
>> +             /* Init Debug UART Port (115200 8n1) */
>> +             uart_init();
>> +
>> +             /* Init PMC scratch memory */
>> +             init_pmc_scratch();
>> +     }
>> +
>> +#ifdef CONFIG_ENABLE_CORTEXA9
>> +     /* take the mpcore out of reset */
>> +     cpu_start();
>> +
>> +     /* configure cache */
>> +     cache_configure();
>> +#endif
>> +}
>> +
>> diff --git a/arch/arm/cpu/armv7/tegra2/ap20.h b/arch/arm/cpu/armv7/tegra2/ap20.h
>> new file mode 100644
>> index 0000000..de7622d
>> --- /dev/null
>> +++ b/arch/arm/cpu/armv7/tegra2/ap20.h
>> @@ -0,0 +1,105 @@
>> +/*
>> + * (C) Copyright 2010-2011
>> + * NVIDIA Corporation <www.nvidia.com>
>> + *
>> + * See file CREDITS for list of people who contributed to this
>> + * project.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License as
>> + * published by the Free Software Foundation; either version 2 of
>> + * the License, or (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
>> + * MA 02111-1307 USA
>> + */
>> +#include <asm/types.h>
>> +
>> +#ifndef      FALSE
>> +#define FALSE        0
>> +#endif
>> +#ifndef TRUE
>> +#define TRUE 1
>> +#endif
>
> These shouldn't be added here.  They should be somewhere common, or
> shouldn't be used (my preference).
I would think they'd be in the ARM tree somewhere, but I couldn't find
them so I added 'em here.
My preference is to use TRUE/FALSE - it carries more context than 1/0 to me.

>
>> +
>> +#define NVBL_PLLP_KHZ        (432000/2)
>> +/* The stabilization delay in usec */
>> +#define NVBOOT_CLOCKS_PLL_STABILIZATION_DELAY (300)
>> +
>> +#define PLLX_ENABLED         (1 << 30)
>> +#define CCLK_BURST_POLICY    0x20008888
>> +#define SUPER_CCLK_DIVIDER   0x80000000
>> +
>> +/* Calculate clock fractional divider value from ref and target frequencies */
>> +#define CLK_DIVIDER(REF, FREQ)  ((((REF) * 2) / FREQ) - 2)
>> +
>> +/* Calculate clock frequency value from reference and clock divider value */
>> +#define CLK_FREQUENCY(REF, REG)  (((REF) * 2) / (REG + 2))
>> +
>> +/* AVP/CPU ID */
>> +#define PG_UP_TAG_0_PID_CPU  0x55555555      /* CPU aka "a9" aka "mpcore" */
>> +#define PG_UP_TAG_0             0x0
>> +
>> +/* AP20-Specific Base Addresses */
>> +
>> +/* AP20 Base physical address of SDRAM. */
>> +#define AP20_BASE_PA_SDRAM      0x00000000
>> +/* AP20 Base physical address of internal SRAM. */
>> +#define AP20_BASE_PA_SRAM       0x40000000
>> +/* AP20 Size of internal SRAM (256KB). */
>> +#define AP20_BASE_PA_SRAM_SIZE  0x00040000
>> +/* AP20 Base physical address of flash. */
>> +#define AP20_BASE_PA_NOR_FLASH  0xD0000000
>> +/* AP20 Base physical address of boot information table. */
>> +#define AP20_BASE_PA_BOOT_INFO  AP20_BASE_PA_SRAM
>> +
>> +/*
>> + * Super-temporary stacks for EXTREMELY early startup. The values chosen for
>> + * these addresses must be valid on ALL SOCs because this value is used before
>> + * we are able to differentiate between the SOC types.
>> + *
>> + * NOTE: The since CPU's stack will eventually be moved from IRAM to SDRAM, its
>> + *       stack is placed below the AVP stack. Once the CPU stack has been moved,
>> + *       the AVP is free to use the IRAM the CPU stack previously occupied if
>> + *       it should need to do so.
>> + *
>> + * NOTE: In multi-processor CPU complex configurations, each processor will have
>> + *       its own stack of size CPU_EARLY_BOOT_STACK_SIZE. CPU 0 will have a
>> + *       limit of CPU_EARLY_BOOT_STACK_LIMIT. Each successive CPU will have a
>> + *       stack limit that is CPU_EARLY_BOOT_STACK_SIZE less then the previous
>> + *       CPU.
>> + */
>> +
>> +/* Common AVP early boot stack limit */
>> +#define AVP_EARLY_BOOT_STACK_LIMIT   \
>> +     (AP20_BASE_PA_SRAM + (AP20_BASE_PA_SRAM_SIZE/2))
>> +/* Common AVP early boot stack size */
>> +#define AVP_EARLY_BOOT_STACK_SIZE    0x1000
>> +/* Common CPU early boot stack limit */
>> +#define CPU_EARLY_BOOT_STACK_LIMIT   \
>> +     (AVP_EARLY_BOOT_STACK_LIMIT - AVP_EARLY_BOOT_STACK_SIZE)
>> +/* Common CPU early boot stack size */
>> +#define CPU_EARLY_BOOT_STACK_SIZE    0x1000
>> +
>> +#define EXCEP_VECTOR_CPU_RESET_VECTOR        (NV_PA_EVP_BASE + 0x100)
>> +#define CSITE_CPU_DBG0_LAR           (NV_PA_CSITE_BASE + 0x10FB0)
>> +#define CSITE_CPU_DBG1_LAR           (NV_PA_CSITE_BASE + 0x12FB0)
>> +
>> +#define FLOW_CTLR_HALT_COP_EVENTS    (NV_PA_FLOW_BASE + 4)
>> +#define FLOW_MODE_STOP                       2
>> +#define HALT_COP_EVENT_JTAG          (1 << 28)
>> +#define HALT_COP_EVENT_IRQ_1         (1 << 11)
>> +#define HALT_COP_EVENT_FIQ_1         (1 << 9)
>> +
>> +/* Prototypes */
>> +
>> +void tegra2_start(void);
>> +void uart_init(void);
>> +void udelay(unsigned long);
>> diff --git a/arch/arm/include/asm/arch-tegra2/clk_rst.h b/arch/arm/include/asm/arch-tegra2/clk_rst.h
>> index 6d573bf..d67a5d7 100644
>> --- a/arch/arm/include/asm/arch-tegra2/clk_rst.h
>> +++ b/arch/arm/include/asm/arch-tegra2/clk_rst.h
>> @@ -149,6 +149,9 @@ struct clk_rst_ctlr {
>>       uint crc_clk_src_csite;         /*_CSITE_0,             0x1D4 */
>>       uint crc_reserved19[9];         /*                      0x1D8-1F8 */
>>       uint crc_clk_src_osc;           /*_OSC_0,               0x1FC */
>> +     uint crc_reserved20[80];        /*                      0x200-33C */
>> +     uint crc_cpu_cmplx_set;         /* _CPU_CMPLX_SET_0,    0x340 */
>> +     uint crc_cpu_cmplx_clr;         /* _CPU_CMPLX_CLR_0,    0x344 */
>>  };
>>
>>  #define PLL_BYPASS           (1 << 31)
>> @@ -162,4 +165,28 @@ struct clk_rst_ctlr {
>>  #define SWR_UARTA_RST                (1 << 6)
>>  #define CLK_ENB_UARTA                (1 << 6)
>>
>> +#define SWR_CPU_RST          (1 << 0)
>> +#define CLK_ENB_CPU          (1 << 0)
>> +#define SWR_CSITE_RST                (1 << 9)
>> +#define CLK_ENB_CSITE                (1 << 9)
>> +
>> +#define SET_CPURESET0                (1 << 0)
>> +#define SET_DERESET0         (1 << 4)
>> +#define SET_DBGRESET0                (1 << 12)
>> +
>> +#define SET_CPURESET1                (1 << 1)
>> +#define SET_DERESET1         (1 << 5)
>> +#define SET_DBGRESET1                (1 << 13)
>> +
>> +#define CLR_CPURESET0                (1 << 0)
>> +#define CLR_DERESET0         (1 << 4)
>> +#define CLR_DBGRESET0                (1 << 12)
>> +
>> +#define CLR_CPURESET1                (1 << 1)
>> +#define CLR_DERESET1         (1 << 5)
>> +#define CLR_DBGRESET1                (1 << 13)
>> +
>> +#define CPU0_CLK_STP         (1 << 8)
>> +#define CPU1_CLK_STP         (1 << 9)
>> +
>>  #endif       /* CLK_RST_H */
>> diff --git a/arch/arm/include/asm/arch-tegra2/pmc.h b/arch/arm/include/asm/arch-tegra2/pmc.h
>> index 7ec9eeb..b1d47cd 100644
>> --- a/arch/arm/include/asm/arch-tegra2/pmc.h
>> +++ b/arch/arm/include/asm/arch-tegra2/pmc.h
>> @@ -121,4 +121,12 @@ struct pmc_ctlr {
>>       uint pmc_gate;                  /* _GATE_0, offset 15C */
>>  };
>>
>> +#define CPU_PWRED    1
>> +#define CPU_CLMP     1
>> +
>> +#define PARTID_CP    0xFFFFFFF8
>> +#define START_CP     (1 << 8)
>> +
>> +#define CPUPWRREQ_OE (1 << 16)
>> +
>>  #endif       /* PMC_H */
>> diff --git a/arch/arm/include/asm/arch-tegra2/scu.h b/arch/arm/include/asm/arch-tegra2/scu.h
>> new file mode 100644
>> index 0000000..d2faa6f
>> --- /dev/null
>> +++ b/arch/arm/include/asm/arch-tegra2/scu.h
>> @@ -0,0 +1,43 @@
>> +/*
>> + *  (C) Copyright 2010,2011
>> + *  NVIDIA Corporation <www.nvidia.com>
>> + *
>> + * See file CREDITS for list of people who contributed to this
>> + * project.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License as
>> + * published by the Free Software Foundation; either version 2 of
>> + * the License, or (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
>> + * MA 02111-1307 USA
>> + */
>> +
>> +#ifndef _SCU_H_
>> +#define _SCU_H_
>> +
>> +/* ARM Snoop Control Unit (SCU) registers */
>> +struct scu_ctlr {
>> +     uint scu_ctrl;          /* SCU Control Register, offset 00 */
>> +     uint scu_cfg;           /* SCU Config Register, offset 04 */
>> +     uint scu_cpu_pwr_stat;  /* SCU CPU Power Status Register, offset 08 */
>> +     uint scu_inv_all;       /* SCU Invalidate All Register, offset 0C */
>> +     uint scu_reserved0[12]; /* reserved, offset 10-3C */
>> +     uint scu_filt_start;    /* SCU Filtering Start Address Reg, offset 40 */
>> +     uint scu_filt_end;      /* SCU Filtering End Address Reg, offset 44 */
>> +     uint scu_reserved1[2];  /* reserved, offset 48-4C */
>> +     uint scu_acc_ctl;       /* SCU Access Control Register, offset 50 */
>> +     uint scu_ns_acc_ctl;    /* SCU Non-secure Access Cntrl Reg, offset 54 */
>> +};
>> +
>> +#define SCU_ENABLE   1
>
> Should this be here?  Seems like its not the proper location to enable
> things...
It's a bit setting, for the scu_ctrl register. I could change it to (1
<< 0), if you'd prefer.
There are other bits in the SCU, but since they aren't used in this
code, I didn't declare 'em.

>
>> +
>> +#endif       /* SCU_H */
>
> Best,
> Peter
Thanks for the review. I'll have a new patch in a couple of days, once
I have your feedback on my questions, above.

Tom
>
>
>
Peter Tyser - March 14, 2011, 10:20 p.m.
Hi Tom,

<snip>

> >> +static void init_pll_x(void)
> >> +{
> >> +     struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> >> +     u32     reg;
> >
> > The spaces in front of "reg" can be removed.  Ditto for all
> > functions/variables.
> Not sure what you mean, here, Peter. There are no spaces here in my
> ap20.c file, just tabs.
> Please clarify.

My email client converted to spaces I was referring to the "u32    reg;"
above.  I think it should be "u32<space>reg" instead of "u32<tab>reg".
Other places in this patch spaces are used (eg in enable_cpu_clock, and
in the struct declaration above), so at a minimum the tabs/spaces should
be changed to be consistent, and <type><space><name> seems cleanest to
me.

> >
> >> +     /* Is PLL-X already running? */
> >> +     reg = readl(&clkrst->crc_pllx_base);
> >> +     if (reg & PLL_ENABLE)
> >> +             return;
> >> +
> >> +     /* Do PLLX init if it isn't running, but BootROM sets it, so TBD */
> >> +}
> >
> > The above function looks incorrect.
> What looks incorrect? It checks to see if the PLL is already
> running/enabled, and returns if it is.
> Tegra2 mask ROM (BootROM) currently always sets PLLX, so this will
> always return, but the comment is there for future chips that may not
> set up PLLX.

It looks like a function that does a read of a value it doesn't care
about, does an unnecessary comparison, and then returns either way,
without doing anything:)  If/when you want to do future stuff with the
PLL-X, code should be added then - as is it doesn't do anything and is
confusing.  The general policy of U-Boot is not to add dead code.

<snip>

> >> +static void enable_cpu_power_rail(void)
> >> +{
> >> +     struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> >> +     u32   reg;
> >> +
> >> +     reg = readl(&pmc->pmc_cntrl);
> >> +     reg |= CPUPWRREQ_OE;
> >> +     writel(reg, &pmc->pmc_cntrl);
> >> +
> >> +     /* Delay to allow for stable power (CPU_PWR_REQ -> VDD_CPU from PMU) */
> >> +     udelay(3750);
> >
> > Ditto for description.
> Ditto on why the delay? In this case, I did find a reference to the
> time needed between the request from the chipset to the PMU, hence the
> comment.

It'd be nice mention that reference in the comment.  For those designing
boards around your chips, during debug they will look through the
initialization code and wonder "I wonder if we need to delay longer", or
"I'm not using the same power supply sequencer, I wonder if this might
be a problem".  If you more explicitly state where the delay came from,
or what the delay specifically accomplishes, it saves others from having
to guess and investigate.

> >> +}
> >> +
> >> +static void reset_A9_cpu(int reset)
> >> +{
> >> +     struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
> >> +     u32   reg, cpu;
> >> +
> >> +     /*
> >> +     * NOTE:  Regardless of whether the request is to hold the CPU in reset
> >> +     *        or take it out of reset, every processor in the CPU complex
> >> +     *        except the master (CPU 0) will be held in reset because the
> >> +     *        AVP only talks to the master. The AVP does not know that there
> >> +     *        are multiple processors in the CPU complex.
> >> +     */
> >> +
> >> +     /* Hold CPU 1 in reset */
> >> +     cpu = SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1;
> >> +     writel(cpu, &clkrst->crc_cpu_cmplx_set);
> >
> > The cpu variable can be removed.
> It could be, sure. But it makes the line longer, >80 chars, and hence
> it would have to be broken into two lines.
> Actually, now that I look at the code again, it appears that 'cpu'
> later should be OR'd with the SET_/CLR_DBGRESET0, etc. bits, depending
> on the state of 'reset'.
> I'll give it a rewrite for the next patch.

Its a matter of preference, but is acceptable either way.  I think:
+	writel(SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1,
+		&clkrst->crc_cpu_cmplx_set);

makes it clearer what is going on.  Setting 'cpu', then writing would
imply to me that 'cpu' has some additional purpose, or is being used
later.  If you restructure the code, this comment will likely be moot.

<snip>

> >> +     if (enable) {
> >> +             /*
> >> +              * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
> >> +              *  1.5, giving an effective frequency of 144MHz.
> >> +              * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
> >> +              *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
> >> +              */
> >> +             src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
> >> +             writel(src, &clkrst->crc_clk_src_csite);
> >> +
> >> +             /* Unlock the CPU CoreSight interfaces */
> >> +             rst = 0xC5ACCE55;
> >
> > What's this number?  Is there a define that could be used instead?
> Sure, I can add a #define. Some magic ARM access sequence to unlock
> the CoreSight I/F, as the comment says

Its not clear what the number is from the comment.  Is it a bitmask
where each bit has some significance?  Or is it just a "magic number" of
some sort?  If its a magic number with no other significance, a more
verbose comment is fine stating so without adding a define.

> >
> >> +             writel(rst, CSITE_CPU_DBG0_LAR);
> >> +             writel(rst, CSITE_CPU_DBG1_LAR);
> >> +     }
> >> +}
> >> +
> >> +void start_cpu(u32 reset_vector)
> >> +{
> >> +     /* Enable VDD_CPU */
> >> +     enable_cpu_power_rail();
> >> +
> >> +     /* Hold the CPUs in reset */
> >> +     reset_A9_cpu(TRUE);
> >> +
> >> +     /* Disable the CPU clock */
> >> +     enable_cpu_clock(FALSE);
> >> +
> >> +     /* Enable CoreSight */
> >> +     clock_enable_coresight(TRUE);
> >> +
> >> +     /*
> >> +     * Set the entry point for CPU execution from reset,
> >> +     *  if it's a non-zero value.
> >> +     */
> >
> > Spaces should be added above.
> Spaces where? Please be more specific.  Again, checkpatch had no
> problem with this section.

The multiline comment is not aligned:
/*
*
*
*/
should be
/*
 *
 */

<snip>

> >> +void halt_avp(void)
> >> +{
> >> +     u32   reg;
> >> +
> >> +     for (;;) {
> >> +             reg = HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
> >> +                     | HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29);
> >> +             writel(reg, FLOW_CTLR_HALT_COP_EVENTS);
> >> +     }
> >> +     /* Should never get here */
> >> +     for (;;)
> >> +             ;
> >
> > I'd remove the use of reg, and a secondary infinite loop seems
> > necessary.
> OK, I'll rewrite it.

Sorry, should have said "unnecessary" instead of "necessary" in the
original comment.

> >> +void startup_cpu(void)
> >> +{
> >> +     /* Initialize the AVP, clocks, and memory controller */
> >> +     /* SDRAM is guaranteed to be on at this point */
> >> +
> >> +     asm volatile(
> >> +
> >> +     /* Start the CPU */
> >> +     /* R0 = reset vector for CPU */
> >> +     "ldr     r0, =cold_boot      \n"
> >> +     "bl      start_cpu           \n"
> >> +
> >> +     /* Transfer control to the AVP code */
> >> +     "bl      halt_avp            \n"
> >> +
> >> +     /* Should never get here */
> >> +     "b       .                   \n"
> >> +     );
> >
> > The assembly code should be indented.  Actually, is there a reason not
> > to move all these assembly functions into a seperate assembly file?
> I can indent the asm code, no problem.
> I don't see the need to move it to a .S file, though. Lots of examples
> of embedded assembly code in the U-Boot tree.
> Unless I see some strong objections, I'm just going to indent it (and
> other asm statements).

Agreed are lots of embedded assembly in C files, but they are generally
small snippets that interact with surrounding C-code, or simple
one-liners.

Eg look in arch/powerpc:
find grep -r asm arch/powerpc | grep vola | grep -v ";"

There are only a handful of multi-line inline assembly references, lots
are in headers, and the remaining generally interact with surrounding
code.

It looks like most of your uses are standalone functions that would
function just fine on their own.  Is there a reason you prefer to have
them in a C-file instead of in an assembly file?

<snip>

> >
> >> +}
> >> +
> >> +void cache_configure(void)
> >> +{
> >> +     asm volatile(
> >> +
> >> +     "stmdb r13!,{r14}                 \n"
> >> +     /* invalidate instruction cache */
> >> +     "mov r1, #0                       \n"
> >> +     "mcr p15, 0, r1, c7, c5, 0        \n"
> >> +
> >> +     /* invalidate the i&d tlb entries */
> >> +     "mcr p15, 0, r1, c8, c5, 0        \n"
> >> +     "mcr p15, 0, r1, c8, c6, 0        \n"
> >> +
> >> +     /* enable instruction cache */
> >> +     "mrc  p15, 0, r1, c1, c0, 0       \n"
> >> +     "orr  r1, r1, #(1<<12)            \n"
> >> +     "mcr  p15, 0, r1, c1, c0, 0       \n"
> >> +
> >> +     "bl enable_scu                    \n"
> >> +
> >> +     /* enable SMP mode and FW for CPU0, by writing to Auxiliary Ctl reg */
> >> +     "mrc p15, 0, r0, c1, c0, 1        \n"
> >> +     "orr r0, r0, #0x41                \n"
> >> +     "mcr p15, 0, r0, c1, c0, 1        \n"
> >> +
> >> +     /* Now flush the Dcache */
> >> +     "mov r0, #0                       \n"
> >> +     /* 256 cache lines */
> >> +     "mov r1, #256                     \n"
> >> +
> >> +     "invalidate_loop:                 \n"
> >> +
> >> +     "add r1, r1, #-1                  \n"
> >> +     "mov r0, r1, lsl #5               \n"
> >> +     /* invalidate d-cache using line (way0) */
> >> +     "mcr p15, 0, r0, c7, c6, 2        \n"
> >> +
> >> +     "orr r2, r0, #(1<<30)             \n"
> >> +     /* invalidate d-cache using line (way1) */
> >> +     "mcr p15, 0, r2, c7, c6, 2        \n"
> >> +
> >> +     "orr r2, r0, #(2<<30)             \n"
> >> +     /* invalidate d-cache using line (way2) */
> >> +     "mcr p15, 0, r2, c7, c6, 2        \n"
> >> +
> >> +     "orr r2, r0, #(3<<30)             \n"
> >> +     /* invalidate d-cache using line (way3) */
> >> +     "mcr p15, 0, r2, c7, c6, 2        \n"
> >> +     "cmp r1, #0                       \n"
> >> +     "bne invalidate_loop              \n"
> >> +
> >> +     /* FIXME: should have ap20's L2 disabled too */
> >> +     "invalidate_done:                 \n"
> >> +     "ldmia r13!,{pc}                  \n"
> >> +     ".ltorg                           \n"
> >> +     );
> >
> > Ditto on indentation/move.
> I'll indent it.

That's a pretty big and ugly inline assembly function...  I'd push to
have it in an assembly file along with the few other assembly-only
functions, but its not my say ultimately.

> >
> >> +}
> >> +
> >> +void init_pmc_scratch(void)
> >> +{
> >> +     struct pmc_ctlr *const pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
> >> +
> >> +     /* SCRATCH0 is initialized by the boot ROM and shouldn't be cleared */
> >> +     memset(&pmc->pmc_scratch1, 0, 23*4);
> >
> > Is it safe to memset on IO regions here?  A for loop of writel's would
> > be safer, and more consistent.
> The generated assembly looks OK. These are mem-mapped scratch
> registers, so there're no side-effects here.
> I can convert to a loop if you insist.

The policy is if you are accessing registers, you should use proper IO
access functions.  The generated assembly and/or CPU may behave
differently down the road, as others have ran into.

<snip>

> >> +#include <asm/types.h>
> >> +
> >> +#ifndef      FALSE
> >> +#define FALSE        0
> >> +#endif
> >> +#ifndef TRUE
> >> +#define TRUE 1
> >> +#endif
> >
> > These shouldn't be added here.  They should be somewhere common, or
> > shouldn't be used (my preference).
> I would think they'd be in the ARM tree somewhere, but I couldn't find
> them so I added 'em here.
> My preference is to use TRUE/FALSE - it carries more context than 1/0 to me.

If you prefer to use TRUE/FALSE, they should be moved into a common
location so everywhere uses the same, once-defined definition.  Their
definitions are already littered over a few files, which would ideally
be cleaned up.  Perhaps moving all definitions into include/common.h, or
somewhere similar would work.  Others may have opinions about TRUE/FALSE
vs 1/0 - it seems like TRUE/FALSE aren't generally used.

< snip>

> >> +/* ARM Snoop Control Unit (SCU) registers */
> >> +struct scu_ctlr {
> >> +     uint scu_ctrl;          /* SCU Control Register, offset 00 */
> >> +     uint scu_cfg;           /* SCU Config Register, offset 04 */
> >> +     uint scu_cpu_pwr_stat;  /* SCU CPU Power Status Register, offset 08 */
> >> +     uint scu_inv_all;       /* SCU Invalidate All Register, offset 0C */
> >> +     uint scu_reserved0[12]; /* reserved, offset 10-3C */
> >> +     uint scu_filt_start;    /* SCU Filtering Start Address Reg, offset 40 */
> >> +     uint scu_filt_end;      /* SCU Filtering End Address Reg, offset 44 */
> >> +     uint scu_reserved1[2];  /* reserved, offset 48-4C */
> >> +     uint scu_acc_ctl;       /* SCU Access Control Register, offset 50 */
> >> +     uint scu_ns_acc_ctl;    /* SCU Non-secure Access Cntrl Reg, offset 54 */
> >> +};
> >> +
> >> +#define SCU_ENABLE   1
> >
> > Should this be here?  Seems like its not the proper location to enable
> > things...
> It's a bit setting, for the scu_ctrl register. I could change it to (1
> << 0), if you'd prefer.
> There are other bits in the SCU, but since they aren't used in this
> code, I didn't declare 'em.

Ahh, I get it - wasn't thinking of a bit definition.  I think (1 << 0)
would be clearer, or a comment, or the inclusion of the other SCU
registers.  If its for a specific register, naming it similarly would
help too, eg "SCU_CTRL_ENABLE".

Best,
Peter
Tom Warren - March 14, 2011, 11:08 p.m.
Peter,

On Mon, Mar 14, 2011 at 3:20 PM, Peter Tyser <ptyser@xes-inc.com> wrote:
> Hi Tom,
>
> <snip>
>
>> >> +static void init_pll_x(void)
>> >> +{
>> >> +     struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST _BASE;
>> >> +     u32     reg;
>> >
>> > The spaces in front of "reg" can be removed.  Ditto for all
>> > functions/variables.
>> Not sure what you mean, here, Peter. There are no spaces here in my
>> ap20.c file, just tabs.
>> Please clarify.
>
> My email client converted to spaces I was referring to the "u32    reg;"
> above.  I think it should be "u32<space>reg" instead of "u32<tab>reg".
> Other places in this patch spaces are used (eg in enable_cpu_clock, and
> in the struct declaration above), so at a minimum the tabs/spaces should
> be changed to be consistent, and <type><space><name> seems cleanest to
> me.
I see. I can change to <type><space><name> globally, no problem.

>
>> >
>> >> +     /* Is PLL-X already running? */
>> >> +     reg = readl(&clkrst->crc_pllx_base);
>> >> +     if (reg & PLL_ENABLE)
>> >> +             return;
>> >> +
>> >> +     /* Do PLLX init if it isn't running, but BootROM sets it, so TBD */
>> >> +}
>> >
>> > The above function looks incorrect.
>> What looks incorrect? It checks to see if the PLL is already
>> running/enabled, and returns if it is.
>> Tegra2 mask ROM (BootROM) currently always sets PLLX, so this will
>> always return, but the comment is there for future chips that may not
>> set up PLLX.
>
> It looks like a function that does a read of a value it doesn't care
> about, does an unnecessary comparison, and then returns either way,
> without doing anything:)  If/when you want to do future stuff with the
> PLL-X, code should be added then - as is it doesn't do anything and is
> confusing.  The general policy of U-Boot is not to add dead code.
OK, so not really incorrect, just unnecessary. Agreed - again a
vestigial leftover from our in-house code. I'll remove it.

>
> <snip>
>
>> >> +static void enable_cpu_power_rail(void)
>> >> +{
>> >> +     struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
>> >> +     u32   reg;
>> >> +
>> >> +     reg = readl(&pmc->pmc_cntrl);
>> >> +     reg |= CPUPWRREQ_OE;
>> >> +     writel(reg, &pmc->pmc_cntrl);
>> >> +
>> >> +     /* Delay to allow for stable power (CPU_PWR_REQ -> VDD_CPU from PMU) */
>> >> +     udelay(3750);
>> >
>> > Ditto for description.
>> Ditto on why the delay? In this case, I did find a reference to the
>> time needed between the request from the chipset to the PMU, hence the
>> comment.
>
> It'd be nice mention that reference in the comment.  For those designing
> boards around your chips, during debug they will look through the
> initialization code and wonder "I wonder if we need to delay longer", or
> "I'm not using the same power supply sequencer, I wonder if this might
> be a problem".  If you more explicitly state where the delay came from,
> or what the delay specifically accomplishes, it saves others from having
> to guess and investigate.
OK, I'll expand the comment.

>
>> >> +}
>> >> +
>> >> +static void reset_A9_cpu(int reset)
>> >> +{
>> >> +     struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
>> >> +     u32   reg, cpu;
>> >> +
>> >> +     /*
>> >> +     * NOTE:  Regardless of whether the request is to hold the CPU in reset
>> >> +     *        or take it out of reset, every processor in the CPU complex
>> >> +     *        except the master (CPU 0) will be held in reset because the
>> >> +     *        AVP only talks to the master. The AVP does not know that there
>> >> +     *        are multiple processors in the CPU complex.
>> >> +     */
>> >> +
>> >> +     /* Hold CPU 1 in reset */
>> >> +     cpu = SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1;
>> >> +     writel(cpu, &clkrst->crc_cpu_cmplx_set);
>> >
>> > The cpu variable can be removed.
>> It could be, sure. But it makes the line longer, >80 chars, and hence
>> it would have to be broken into two lines.
>> Actually, now that I look at the code again, it appears that 'cpu'
>> later should be OR'd with the SET_/CLR_DBGRESET0, etc. bits, depending
>> on the state of 'reset'.
>> I'll give it a rewrite for the next patch.
>
> Its a matter of preference, but is acceptable either way.  I think:
> +       writel(SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1,
> +               &clkrst->crc_cpu_cmplx_set);
>
> makes it clearer what is going on.  Setting 'cpu', then writing would
> imply to me that 'cpu' has some additional purpose, or is being used
> later.  If you restructure the code, this comment will likely be moot.
>
> <snip>
>
>> >> +     if (enable) {
>> >> +             /*
>> >> +              * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
>> >> +              *  1.5, giving an effective frequency of 144MHz.
>> >> +              * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
>> >> +              *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
>> >> +              */
>> >> +             src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
>> >> +             writel(src, &clkrst->crc_clk_src_csite);
>> >> +
>> >> +             /* Unlock the CPU CoreSight interfaces */
>> >> +             rst = 0xC5ACCE55;
>> >
>> > What's this number?  Is there a define that could be used instead?
>> Sure, I can add a #define. Some magic ARM access sequence to unlock
>> the CoreSight I/F, as the comment says
>
> Its not clear what the number is from the comment.  Is it a bitmask
> where each bit has some significance?  Or is it just a "magic number" of
> some sort?  If its a magic number with no other significance, a more
> verbose comment is fine stating so without adding a define.
OK, I'll expand the comment.

>
>> >
>> >> +             writel(rst, CSITE_CPU_DBG0_LAR);
>> >> +             writel(rst, CSITE_CPU_DBG1_LAR);
>> >> +     }
>> >> +}
>> >> +
>> >> +void start_cpu(u32 reset_vector)
>> >> +{
>> >> +     /* Enable VDD_CPU */
>> >> +     enable_cpu_power_rail();
>> >> +
>> >> +     /* Hold the CPUs in reset */
>> >> +     reset_A9_cpu(TRUE);
>> >> +
>> >> +     /* Disable the CPU clock */
>> >> +     enable_cpu_clock(FALSE);
>> >> +
>> >> +     /* Enable CoreSight */
>> >> +     clock_enable_coresight(TRUE);
>> >> +
>> >> +     /*
>> >> +     * Set the entry point for CPU execution from reset,
>> >> +     *  if it's a non-zero value.
>> >> +     */
>> >
>> > Spaces should be added above.
>> Spaces where? Please be more specific.  Again, checkpatch had no
>> problem with this section.
>
> The multiline comment is not aligned:
> /*
> *
> *
> */
> should be
> /*
>  *
>  */
>
OK, now I see it. Will fix, thanks.

> <snip>
>
>> >> +void halt_avp(void)
>> >> +{
>> >> +     u32   reg;
>> >> +
>> >> +     for (;;) {
>> >> +             reg = HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
>> >> +                     | HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29);
>> >> +             writel(reg, FLOW_CTLR_HALT_COP_EVENTS);
>> >> +     }
>> >> +     /* Should never get here */
>> >> +     for (;;)
>> >> +             ;
>> >
>> > I'd remove the use of reg, and a secondary infinite loop seems
>> > necessary.
>> OK, I'll rewrite it.
>
> Sorry, should have said "unnecessary" instead of "necessary" in the
> original comment.
>
>> >> +void startup_cpu(void)
>> >> +{
>> >> +     /* Initialize the AVP, clocks, and memory controller */
>> >> +     /* SDRAM is guaranteed to be on at this point */
>> >> +
>> >> +     asm volatile(
>> >> +
>> >> +     /* Start the CPU */
>> >> +     /* R0 = reset vector for CPU */
>> >> +     "ldr     r0, =cold_boot      \n"
>> >> +     "bl      start_cpu           \n"
>> >> +
>> >> +     /* Transfer control to the AVP code */
>> >> +     "bl      halt_avp            \n"
>> >> +
>> >> +     /* Should never get here */
>> >> +     "b       .                   \n"
>> >> +     );
>> >
>> > The assembly code should be indented.  Actually, is there a reason not
>> > to move all these assembly functions into a seperate assembly file?
>> I can indent the asm code, no problem.
>> I don't see the need to move it to a .S file, though. Lots of examples
>> of embedded assembly code in the U-Boot tree.
>> Unless I see some strong objections, I'm just going to indent it (and
>> other asm statements).
>
> Agreed are lots of embedded assembly in C files, but they are generally
> small snippets that interact with surrounding C-code, or simple
> one-liners.
>
> Eg look in arch/powerpc:
> find grep -r asm arch/powerpc | grep vola | grep -v ";"
>
> There are only a handful of multi-line inline assembly references, lots
> are in headers, and the remaining generally interact with surrounding
> code.
>
> It looks like most of your uses are standalone functions that would
> function just fine on their own.  Is there a reason you prefer to have
> them in a C-file instead of in an assembly file?
Just laziness ;)
I'll move these to a new .S file in the next patchset.

>
> <snip>
>
>> >
>> >> +}
>> >> +
>> >> +void cache_configure(void)
>> >> +{
>> >> +     asm volatile(
>> >> +
>> >> +     "stmdb r13!,{r14}                 \n"
>> >> +     /* invalidate instruction cache */
>> >> +     "mov r1, #0                       \n"
>> >> +     "mcr p15, 0, r1, c7, c5, 0        \n"
>> >> +
>> >> +     /* invalidate the i&d tlb entries */
>> >> +     "mcr p15, 0, r1, c8, c5, 0        \n"
>> >> +     "mcr p15, 0, r1, c8, c6, 0        \n"
>> >> +
>> >> +     /* enable instruction cache */
>> >> +     "mrc  p15, 0, r1, c1, c0, 0       \n"
>> >> +     "orr  r1, r1, #(1<<12)            \n"
>> >> +     "mcr  p15, 0, r1, c1, c0, 0       \n"
>> >> +
>> >> +     "bl enable_scu                    \n"
>> >> +
>> >> +     /* enable SMP mode and FW for CPU0, by writing to Auxiliary Ctl reg */
>> >> +     "mrc p15, 0, r0, c1, c0, 1        \n"
>> >> +     "orr r0, r0, #0x41                \n"
>> >> +     "mcr p15, 0, r0, c1, c0, 1        \n"
>> >> +
>> >> +     /* Now flush the Dcache */
>> >> +     "mov r0, #0                       \n"
>> >> +     /* 256 cache lines */
>> >> +     "mov r1, #256                     \n"
>> >> +
>> >> +     "invalidate_loop:                 \n"
>> >> +
>> >> +     "add r1, r1, #-1                  \n"
>> >> +     "mov r0, r1, lsl #5               \n"
>> >> +     /* invalidate d-cache using line (way0) */
>> >> +     "mcr p15, 0, r0, c7, c6, 2        \n"
>> >> +
>> >> +     "orr r2, r0, #(1<<30)             \n"
>> >> +     /* invalidate d-cache using line (way1) */
>> >> +     "mcr p15, 0, r2, c7, c6, 2        \n"
>> >> +
>> >> +     "orr r2, r0, #(2<<30)             \n"
>> >> +     /* invalidate d-cache using line (way2) */
>> >> +     "mcr p15, 0, r2, c7, c6, 2        \n"
>> >> +
>> >> +     "orr r2, r0, #(3<<30)             \n"
>> >> +     /* invalidate d-cache using line (way3) */
>> >> +     "mcr p15, 0, r2, c7, c6, 2        \n"
>> >> +     "cmp r1, #0                       \n"
>> >> +     "bne invalidate_loop              \n"
>> >> +
>> >> +     /* FIXME: should have ap20's L2 disabled too */
>> >> +     "invalidate_done:                 \n"
>> >> +     "ldmia r13!,{pc}                  \n"
>> >> +     ".ltorg                           \n"
>> >> +     );
>> >
>> > Ditto on indentation/move.
>> I'll indent it.
>
> That's a pretty big and ugly inline assembly function...  I'd push to
> have it in an assembly file along with the few other assembly-only
> functions, but its not my say ultimately.
No problem - I'll move to .S.

>
>> >
>> >> +}
>> >> +
>> >> +void init_pmc_scratch(void)
>> >> +{
>> >> +     struct pmc_ctlr *const pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
>> >> +
>> >> +     /* SCRATCH0 is initialized by the boot ROM and shouldn't be cleared */
>> >> +     memset(&pmc->pmc_scratch1, 0, 23*4);
>> >
>> > Is it safe to memset on IO regions here?  A for loop of writel's would
>> > be safer, and more consistent.
>> The generated assembly looks OK. These are mem-mapped scratch
>> registers, so there're no side-effects here.
>> I can convert to a loop if you insist.
>
> The policy is if you are accessing registers, you should use proper IO
> access functions.  The generated assembly and/or CPU may behave
> differently down the road, as others have ran into.
All right. I'll put in a loop of writel()'s.

>
> <snip>
>
>> >> +#include <asm/types.h>
>> >> +
>> >> +#ifndef      FALSE
>> >> +#define FALSE        0
>> >> +#endif
>> >> +#ifndef TRUE
>> >> +#define TRUE 1
>> >> +#endif
>> >
>> > These shouldn't be added here.  They should be somewhere common, or
>> > shouldn't be used (my preference).
>> I would think they'd be in the ARM tree somewhere, but I couldn't find
>> them so I added 'em here.
>> My preference is to use TRUE/FALSE - it carries more context than 1/0 to me.
>
> If you prefer to use TRUE/FALSE, they should be moved into a common
> location so everywhere uses the same, once-defined definition.  Their
> definitions are already littered over a few files, which would ideally
> be cleaned up.  Perhaps moving all definitions into include/common.h, or
> somewhere similar would work.  Others may have opinions about TRUE/FALSE
> vs 1/0 - it seems like TRUE/FALSE aren't generally used.
I don't want to pollute all builds by adding to include/common.h.
I'll try to find a more central header in my own tree.

>
> < snip>
>
>> >> +/* ARM Snoop Control Unit (SCU) registers */
>> >> +struct scu_ctlr {
>> >> +     uint scu_ctrl;          /* SCU Control Register, offset 00 */
>> >> +     uint scu_cfg;           /* SCU Config Register, offset 04 */
>> >> +     uint scu_cpu_pwr_stat;  /* SCU CPU Power Status Register, offset 08 */
>> >> +     uint scu_inv_all;       /* SCU Invalidate All Register, offset 0C */
>> >> +     uint scu_reserved0[12]; /* reserved, offset 10-3C */
>> >> +     uint scu_filt_start;    /* SCU Filtering Start Address Reg, offset 40 */
>> >> +     uint scu_filt_end;      /* SCU Filtering End Address Reg, offset 44 */
>> >> +     uint scu_reserved1[2];  /* reserved, offset 48-4C */
>> >> +     uint scu_acc_ctl;       /* SCU Access Control Register, offset 50 */
>> >> +     uint scu_ns_acc_ctl;    /* SCU Non-secure Access Cntrl Reg, offset 54 */
>> >> +};
>> >> +
>> >> +#define SCU_ENABLE   1
>> >
>> > Should this be here?  Seems like its not the proper location to enable
>> > things...
>> It's a bit setting, for the scu_ctrl register. I could change it to (1
>> << 0), if you'd prefer.
>> There are other bits in the SCU, but since they aren't used in this
>> code, I didn't declare 'em.
>
> Ahh, I get it - wasn't thinking of a bit definition.  I think (1 << 0)
> would be clearer, or a comment, or the inclusion of the other SCU
> registers.  If its for a specific register, naming it similarly would
> help too, eg "SCU_CTRL_ENABLE".
Good idea. I'll make it SCU_CTRL_ENABLE. Thanks.

>
> Best,
> Peter

Thanks,
Tom
>
Peter Tyser - March 17, 2011, 2:32 p.m.
Hi Tom,

> >> >
> >> >> +     /* Is PLL-X already running? */
> >> >> +     reg = readl(&clkrst->crc_pllx_base);
> >> >> +     if (reg & PLL_ENABLE)
> >> >> +             return;
> >> >> +
> >> >> +     /* Do PLLX init if it isn't running, but BootROM sets it, so TBD */
> >> >> +}
> >> >
> >> > The above function looks incorrect.
> >> What looks incorrect? It checks to see if the PLL is already
> >> running/enabled, and returns if it is.
> >> Tegra2 mask ROM (BootROM) currently always sets PLLX, so this will
> >> always return, but the comment is there for future chips that may not
> >> set up PLLX.
> >
> > It looks like a function that does a read of a value it doesn't care
> > about, does an unnecessary comparison, and then returns either way,
> > without doing anything:)  If/when you want to do future stuff with the
> > PLL-X, code should be added then - as is it doesn't do anything and is
> > confusing.  The general policy of U-Boot is not to add dead code.
> OK, so not really incorrect, just unnecessary. Agreed - again a
> vestigial leftover from our in-house code. I'll remove it.

Unnecessary/misleading/dead code is inherently not correct:)

<snip>

> >> >> +#include <asm/types.h>
> >> >> +
> >> >> +#ifndef      FALSE
> >> >> +#define FALSE        0
> >> >> +#endif
> >> >> +#ifndef TRUE
> >> >> +#define TRUE 1
> >> >> +#endif
> >> >
> >> > These shouldn't be added here.  They should be somewhere common, or
> >> > shouldn't be used (my preference).
> >> I would think they'd be in the ARM tree somewhere, but I couldn't find
> >> them so I added 'em here.
> >> My preference is to use TRUE/FALSE - it carries more context than 1/0 to me.
> >
> > If you prefer to use TRUE/FALSE, they should be moved into a common
> > location so everywhere uses the same, once-defined definition.  Their
> > definitions are already littered over a few files, which would ideally
> > be cleaned up.  Perhaps moving all definitions into include/common.h, or
> > somewhere similar would work.  Others may have opinions about TRUE/FALSE
> > vs 1/0 - it seems like TRUE/FALSE aren't generally used.
> I don't want to pollute all builds by adding to include/common.h.
> I'll try to find a more central header in my own tree.

My point is that there are already 32 definitions of TRUE/FALSE - adding
a 33rd doesn't seem like a good thing to do.  I view a 33rd identical
definition as polluting the code more than 1 common definition that most
people won't generally use.

Its not my decision, but I assume the powers that be would recommend one
of:
- Not using TRUE/FALSE since using non-zero values and 0 are widely
accepted as TRUE/FALSE.  I think using TRUE/FALSE makes the code harder
to understand and more open to bugs.  Eg for other code that interacts
with your code, or someone reviewing your code, they either have to
either assume you defined TRUE as 1, FALSE as 0, or import your
definitions.  Anyway, I view their use as another layer of unnecessary
abstraction with very little benefit.

- Consolidating the definition of TRUE/FALSE.

Wolfgang, do you have a preference about how TRUE/FALSE are generally
used/defined?

Best,
Peter
Alessandro Rubini - March 17, 2011, 3:30 p.m.
>> It looks like most of your uses are standalone functions that would
>> function just fine on their own.  Is there a reason you prefer to have
>> them in a C-file instead of in an assembly file?

> Just laziness ;)
> I'll move these to a new .S file in the next patchset.

Actually, writing assembly-only C functions is difficul and
error-pronet. I've seen you use "r0" and other registers esplicitly,
but this is not allowed in general.

I once wasted some hours in tracking why a non-submitted port of
u-boot was not working with a newer compiler. The problem was just
that: the new compiler was inlining a void(void) function; the asm
used "r0" and "r1" explicitly, which worked over a function call
but was corrupting data when inlined by the newer and more optimizing
compiler.

While your functions are currently not inlined (or, like cold_boot,
they may be inlined in a place where no register needs to be
preserved), another user may move them to a context where the
semantics are different, for another board or another boot loader.  If
they are in a .S files, they will only be called by "bl" and you know
the rules for register allocation appy. Besides, a _real_ lazy
programmer avoids the extra quotes and \n in the code :)

/alessandro
Tom Warren - March 18, 2011, 6:06 p.m.
Peter,

On Thu, Mar 17, 2011 at 7:32 AM, Peter Tyser <ptyser@xes-inc.com> wrote:
> Hi Tom,
>
>> >> >
>> >> >> +     /* Is PLL-X already running? */
>> >> >> +     reg = readl(&clkrst->crc_pllx_base);
>> >> >> +     if (reg & PLL_ENABLE)
>> >> >> +             return;
>> >> >> +
>> >> >> +     /* Do PLLX init if it isn't running, but BootROM sets it, so TBD */
>> >> >> +}
>> >> >
>> >> > The above function looks incorrect.
>> >> What looks incorrect? It checks to see if the PLL is already
>> >> running/enabled, and returns if it is.
>> >> Tegra2 mask ROM (BootROM) currently always sets PLLX, so this will
>> >> always return, but the comment is there for future chips that may not
>> >> set up PLLX.
>> >
>> > It looks like a function that does a read of a value it doesn't care
>> > about, does an unnecessary comparison, and then returns either way,
>> > without doing anything:)  If/when you want to do future stuff with the
>> > PLL-X, code should be added then - as is it doesn't do anything and is
>> > confusing.  The general policy of U-Boot is not to add dead code.
>> OK, so not really incorrect, just unnecessary. Agreed - again a
>> vestigial leftover from our in-house code. I'll remove it.
>
> Unnecessary/misleading/dead code is inherently not correct:)
>
> <snip>
>
>> >> >> +#include <asm/types.h>
>> >> >> +
>> >> >> +#ifndef      FALSE
>> >> >> +#define FALSE        0
>> >> >> +#endif
>> >> >> +#ifndef TRUE
>> >> >> +#define TRUE 1
>> >> >> +#endif
>> >> >
>> >> > These shouldn't be added here.  They should be somewhere common, or
>> >> > shouldn't be used (my preference).
>> >> I would think they'd be in the ARM tree somewhere, but I couldn't find
>> >> them so I added 'em here.
>> >> My preference is to use TRUE/FALSE - it carries more context than 1/0 to me.
>> >
>> > If you prefer to use TRUE/FALSE, they should be moved into a common
>> > location so everywhere uses the same, once-defined definition.  Their
>> > definitions are already littered over a few files, which would ideally
>> > be cleaned up.  Perhaps moving all definitions into include/common.h, or
>> > somewhere similar would work.  Others may have opinions about TRUE/FALSE
>> > vs 1/0 - it seems like TRUE/FALSE aren't generally used.
>> I don't want to pollute all builds by adding to include/common.h.
>> I'll try to find a more central header in my own tree.
>
> My point is that there are already 32 definitions of TRUE/FALSE - adding
> a 33rd doesn't seem like a good thing to do.  I view a 33rd identical
> definition as polluting the code more than 1 common definition that most
> people won't generally use.
>
> Its not my decision, but I assume the powers that be would recommend one
> of:
> - Not using TRUE/FALSE since using non-zero values and 0 are widely
> accepted as TRUE/FALSE.  I think using TRUE/FALSE makes the code harder
> to understand and more open to bugs.  Eg for other code that interacts
> with your code, or someone reviewing your code, they either have to
> either assume you defined TRUE as 1, FALSE as 0, or import your
> definitions.  Anyway, I view their use as another layer of unnecessary
> abstraction with very little benefit.
I've removed both the defines and the use of TRUE/FALSE in ap20.* -
there were only a few instances.

>
> - Consolidating the definition of TRUE/FALSE.
>
> Wolfgang, do you have a preference about how TRUE/FALSE are generally
> used/defined?
>
> Best,
> Peter
Thanks,
Tom
>
>
>
>
Tom Warren - March 18, 2011, 6:16 p.m.
Allesandro,

On Thu, Mar 17, 2011 at 8:30 AM, Alessandro Rubini
<rubini-list@gnudd.com> wrote:
>
>>> It looks like most of your uses are standalone functions that would
>>> function just fine on their own.  Is there a reason you prefer to have
>>> them in a C-file instead of in an assembly file?
>
>> Just laziness ;)
>> I'll move these to a new .S file in the next patchset.
>
> Actually, writing assembly-only C functions is difficul and
> error-pronet. I've seen you use "r0" and other registers esplicitly,
> but this is not allowed in general.
>
> I once wasted some hours in tracking why a non-submitted port of
> u-boot was not working with a newer compiler. The problem was just
> that: the new compiler was inlining a void(void) function; the asm
> used "r0" and "r1" explicitly, which worked over a function call
> but was corrupting data when inlined by the newer and more optimizing
> compiler.
I've moved most of the in-line assembly to lowlevel_init.S.
However, the cold_boot() code proved difficult - I got fixup errors,
undefined constants, etc.
I've cleaned it up a bit, but left it in ap20.c, using the %vars -
again, replacing them with the actual values caused errors that I
couldn't resolve.
Maybe someone else on the list can help to port this code to the
assembly file - I'm not expert enough in gcc/as and ARM to do it.
New patch to follow RSN.

>
> While your functions are currently not inlined (or, like cold_boot,
> they may be inlined in a place where no register needs to be
> preserved), another user may move them to a context where the
> semantics are different, for another board or another boot loader.  If
> they are in a .S files, they will only be called by "bl" and you know
> the rules for register allocation appy. Besides, a _real_ lazy
> programmer avoids the extra quotes and \n in the code :)
>
> /alessandro
Thanks for your insight,
Tom
>

Patch

diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
index 684f2d2..50a1725 100644
--- a/arch/arm/cpu/armv7/start.S
+++ b/arch/arm/cpu/armv7/start.S
@@ -70,6 +70,12 @@  _end_vect:
 _TEXT_BASE:
 	.word	CONFIG_SYS_TEXT_BASE
 
+#ifdef CONFIG_TEGRA2
+.globl _armboot_start
+_armboot_start:
+        .word _start
+#endif
+
 /*
  * These are defined in the board-specific linker script.
  */
diff --git a/arch/arm/cpu/armv7/tegra2/Makefile b/arch/arm/cpu/armv7/tegra2/Makefile
index 687c887..f1ea915 100644
--- a/arch/arm/cpu/armv7/tegra2/Makefile
+++ b/arch/arm/cpu/armv7/tegra2/Makefile
@@ -28,7 +28,7 @@  include $(TOPDIR)/config.mk
 LIB	=  $(obj)lib$(SOC).o
 
 SOBJS	:= lowlevel_init.o
-COBJS	:= board.o sys_info.o timer.o
+COBJS	:= ap20.o board.o sys_info.o timer.o
 
 SRCS	:= $(SOBJS:.o=.S) $(COBJS:.o=.c)
 OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
diff --git a/arch/arm/cpu/armv7/tegra2/ap20.c b/arch/arm/cpu/armv7/tegra2/ap20.c
new file mode 100644
index 0000000..89d0d5e
--- /dev/null
+++ b/arch/arm/cpu/armv7/tegra2/ap20.c
@@ -0,0 +1,490 @@ 
+/*
+* (C) Copyright 2010-2011
+* NVIDIA Corporation <www.nvidia.com>
+*
+* See file CREDITS for list of people who contributed to this
+* project.
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of
+* the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+* MA 02111-1307 USA
+*/
+
+#include "ap20.h"
+#include <asm/io.h>
+#include <asm/arch/tegra2.h>
+#include <asm/arch/clk_rst.h>
+#include <asm/arch/pmc.h>
+#include <asm/arch/pinmux.h>
+#include <asm/arch/scu.h>
+#include <common.h>
+
+static void init_pll_x(void)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32	reg;
+
+	/* Is PLL-X already running? */
+	reg = readl(&clkrst->crc_pllx_base);
+	if (reg & PLL_ENABLE)
+		return;
+
+	/* Do PLLX init if it isn't running, but BootROM sets it, so TBD */
+}
+
+static void set_cpu_reset_vector(u32 vector)
+{
+	writel(vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
+}
+
+static void enable_cpu_clock(int enable)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32   reg, clk;
+
+	/*
+	 * NOTE:
+	 * Regardless of whether the request is to enable or disable the CPU
+	 * clock, every processor in the CPU complex except the master (CPU 0)
+	 * will have it's clock stopped because the AVP only talks to the
+	 * master. The AVP does not know (nor does it need to know) that there
+	 * are multiple processors in the CPU complex.
+	 */
+
+	/* Need to initialize PLLX? */
+	if (enable) {
+		/* Initialize PLL */
+		init_pll_x();
+
+		/* Wait until stable */
+		udelay(NVBOOT_CLOCKS_PLL_STABILIZATION_DELAY);
+
+		reg = CCLK_BURST_POLICY;
+		writel(reg, &clkrst->crc_cclk_brst_pol);
+
+		reg = SUPER_CCLK_DIVIDER;
+		writel(reg, &clkrst->crc_super_cclk_div);
+	}
+
+	/* Fetch the register containing the main CPU complex clock enable */
+	reg = readl(&clkrst->crc_clk_out_enb_l);
+
+	/*
+	 * Read the register containing the individual CPU clock enables and
+	 * always stop the clock to CPU 1.
+	 */
+	clk = readl(&clkrst->crc_clk_cpu_cmplx);
+	clk |= CPU1_CLK_STP;
+
+	if (enable) {
+		/* Enable the CPU clock */
+		reg = readl(&clkrst->crc_clk_out_enb_l);
+		reg |= CLK_ENB_CPU;
+		clk = readl(&clkrst->crc_clk_cpu_cmplx);
+		clk &= ~CPU0_CLK_STP;
+	} else {
+		/* Disable the CPU clock */
+		reg = readl(&clkrst->crc_clk_out_enb_l);
+		reg |= CLK_ENB_CPU;
+		clk = readl(&clkrst->crc_clk_cpu_cmplx);
+		clk |= CPU0_CLK_STP;
+	}
+
+	writel(clk, &clkrst->crc_clk_cpu_cmplx);
+	writel(reg, &clkrst->crc_clk_out_enb_l);
+}
+
+static int is_cpu_powered(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32   reg;
+
+	reg = readl(&pmc->pmc_pwrgate_status);
+
+	if (reg & CPU_PWRED)
+		return TRUE;
+
+	return FALSE;
+}
+
+static void remove_cpu_io_clamps(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32   reg;
+
+	/* Remove the clamps on the CPU I/O signals */
+	reg = readl(&pmc->pmc_remove_clamping);
+	reg |= CPU_CLMP;
+	writel(reg, &pmc->pmc_remove_clamping);
+
+	/* Give I/O signals time to stabilize */
+	udelay(1000);
+}
+
+static void powerup_cpu(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32   reg;
+
+	if (!is_cpu_powered()) {
+		/* Toggle the CPU power state (OFF -> ON) */
+		reg = readl(&pmc->pmc_pwrgate_toggle);
+		reg &= (PARTID_CP);
+		reg |= START_CP;
+		writel(reg, &pmc->pmc_pwrgate_toggle);
+
+		/* Wait for the power to come up */
+		while (!is_cpu_powered())
+			;			/* Do nothing */
+
+		/*
+		 * Remove the I/O clamps from CPU power partition.
+		 * Recommended only on a Warm boot, if the CPU partition gets
+		 * power gated. Shouldn't cause any harm when called after a
+		 * cold boot according to HW, probably just redundant.
+		 */
+		remove_cpu_io_clamps();
+	}
+}
+
+static void enable_cpu_power_rail(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32   reg;
+
+	reg = readl(&pmc->pmc_cntrl);
+	reg |= CPUPWRREQ_OE;
+	writel(reg, &pmc->pmc_cntrl);
+
+	/* Delay to allow for stable power (CPU_PWR_REQ -> VDD_CPU from PMU) */
+	udelay(3750);
+}
+
+static void reset_A9_cpu(int reset)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32   reg, cpu;
+
+	/*
+	* NOTE:  Regardless of whether the request is to hold the CPU in reset
+	*        or take it out of reset, every processor in the CPU complex
+	*        except the master (CPU 0) will be held in reset because the
+	*        AVP only talks to the master. The AVP does not know that there
+	*        are multiple processors in the CPU complex.
+	*/
+
+	/* Hold CPU 1 in reset */
+	cpu = SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1;
+	writel(cpu, &clkrst->crc_cpu_cmplx_set);
+
+	reg = readl(&clkrst->crc_rst_dev_l);
+	if (reset) {
+		/* Place CPU0 into reset */
+		cpu = SET_DBGRESET0 | SET_DERESET0 | SET_CPURESET0;
+		writel(cpu, &clkrst->crc_cpu_cmplx_set);
+
+		/* Enable master CPU reset */
+		reg |= SWR_CPU_RST;
+
+	} else {
+		/* Take CPU0 out of reset */
+		cpu = CLR_DBGRESET0 | CLR_DERESET0 | CLR_CPURESET0;
+		writel(cpu, &clkrst->crc_cpu_cmplx_clr);
+
+		/* Disable master CPU reset */
+		reg &= ~SWR_CPU_RST;
+	}
+
+	writel(reg, &clkrst->crc_rst_dev_l);
+}
+
+static void clock_enable_coresight(int enable)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32   rst, clk, src;
+
+	rst = readl(&clkrst->crc_rst_dev_u);
+	clk = readl(&clkrst->crc_clk_out_enb_u);
+
+	if (enable) {
+		rst &= ~SWR_CSITE_RST;
+		clk |= CLK_ENB_CSITE;
+	} else {
+		rst |= SWR_CSITE_RST;
+		clk &= ~CLK_ENB_CSITE;
+	}
+
+	writel(clk, &clkrst->crc_clk_out_enb_u);
+	writel(rst, &clkrst->crc_rst_dev_u);
+
+	if (enable) {
+		/*
+		 * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
+		 *  1.5, giving an effective frequency of 144MHz.
+		 * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
+		 *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
+		 */
+		src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
+		writel(src, &clkrst->crc_clk_src_csite);
+
+		/* Unlock the CPU CoreSight interfaces */
+		rst = 0xC5ACCE55;
+		writel(rst, CSITE_CPU_DBG0_LAR);
+		writel(rst, CSITE_CPU_DBG1_LAR);
+	}
+}
+
+void start_cpu(u32 reset_vector)
+{
+	/* Enable VDD_CPU */
+	enable_cpu_power_rail();
+
+	/* Hold the CPUs in reset */
+	reset_A9_cpu(TRUE);
+
+	/* Disable the CPU clock */
+	enable_cpu_clock(FALSE);
+
+	/* Enable CoreSight */
+	clock_enable_coresight(TRUE);
+
+	/*
+	* Set the entry point for CPU execution from reset,
+	*  if it's a non-zero value.
+	*/
+	if (reset_vector)
+		set_cpu_reset_vector(reset_vector);
+
+	/* Enable the CPU clock */
+	enable_cpu_clock(TRUE);
+
+	/* If the CPU doesn't already have power, power it up */
+	if (!is_cpu_powered())
+		powerup_cpu();
+
+	/* Take the CPU out of reset */
+	reset_A9_cpu(FALSE);
+}
+
+
+void halt_avp(void)
+{
+	u32   reg;
+
+	for (;;) {
+		reg = HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
+			| HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29);
+		writel(reg, FLOW_CTLR_HALT_COP_EVENTS);
+	}
+	/* Should never get here */
+	for (;;)
+		;
+}
+
+void startup_cpu(void)
+{
+	/* Initialize the AVP, clocks, and memory controller */
+	/* SDRAM is guaranteed to be on at this point */
+
+	asm volatile(
+
+	/* Start the CPU */
+	/* R0 = reset vector for CPU */
+	"ldr     r0, =cold_boot      \n"
+	"bl      start_cpu           \n"
+
+	/* Transfer control to the AVP code */
+	"bl      halt_avp            \n"
+
+	/* Should never get here */
+	"b       .                   \n"
+	);
+}
+
+extern ulong _armboot_start;
+u32 cpu_boot_stack = CPU_EARLY_BOOT_STACK_LIMIT;
+u32 avp_boot_stack = AVP_EARLY_BOOT_STACK_LIMIT;
+u32 proc_tag = PG_UP_TAG_0_PID_CPU & 0xFF;
+u32 deadbeef = 0xdeadbeef;
+
+void cold_boot(void)
+{
+	asm volatile(
+
+	"msr     cpsr_c, #0xd3            \n"
+	/*
+	* Check current processor: CPU or AVP?
+	* If AVP, go to AVP boot code, else continue on.
+	*/
+	"mov     r0, %0                   \n"
+	"ldrb    r2, [r0, %1]             \n"
+	/* are we the CPU? */
+	"cmp     r2, %2                   \n"
+	"mov     sp, %3                   \n"
+	/* leave in some symbols for release debugging */
+	"mov     r3, %6                   \n"
+	"str     r3, [sp, #-4]!           \n"
+	"str     r3, [sp, #-4]!           \n"
+	/*  yep, we are the CPU */
+	"bxeq     %4                      \n"
+	/* AVP Initialization follows this path */
+	"mov     sp, %5                   \n"
+	/* leave in some symbols for release debugging */
+	"mov     r3, %6                   \n"
+	"str     r3, [sp, #-4]!           \n"
+	"str     r3, [sp, #-4]!           \n"
+
+	/* Init and Start CPU */
+	"b       startup_cpu              \n"
+	:
+	: "i"(NV_PA_PG_UP_BASE),
+	"i"(PG_UP_TAG_0),
+	"r"(proc_tag),
+	"r"(cpu_boot_stack),
+	"r"(_armboot_start),
+	"r"(avp_boot_stack),
+	"r"(deadbeef)
+	: "r0", "r2", "r3", "cc", "lr"
+	);
+}
+
+void enable_scu(void)
+{
+	struct scu_ctlr *scu = (struct scu_ctlr *)NV_PA_ARM_PERIPHBASE;
+	u32 reg;
+
+	reg = readl(&scu->scu_ctrl);
+	if (reg & SCU_ENABLE) {
+		/* SCU already enabled, return */
+		return;
+	}
+
+	/* Invalidate all ways for all processors */
+	writel(0xFFFF, &scu->scu_inv_all);
+
+	/* Enable SCU - bit 0 */
+	reg = readl(&scu->scu_ctrl);
+	reg |= SCU_ENABLE;
+	writel(reg, &scu->scu_ctrl);
+
+	return;
+}
+
+void cache_configure(void)
+{
+	asm volatile(
+
+	"stmdb r13!,{r14}                 \n"
+	/* invalidate instruction cache */
+	"mov r1, #0                       \n"
+	"mcr p15, 0, r1, c7, c5, 0        \n"
+
+	/* invalidate the i&d tlb entries */
+	"mcr p15, 0, r1, c8, c5, 0        \n"
+	"mcr p15, 0, r1, c8, c6, 0        \n"
+
+	/* enable instruction cache */
+	"mrc  p15, 0, r1, c1, c0, 0       \n"
+	"orr  r1, r1, #(1<<12)            \n"
+	"mcr  p15, 0, r1, c1, c0, 0       \n"
+
+	"bl enable_scu                    \n"
+
+	/* enable SMP mode and FW for CPU0, by writing to Auxiliary Ctl reg */
+	"mrc p15, 0, r0, c1, c0, 1        \n"
+	"orr r0, r0, #0x41                \n"
+	"mcr p15, 0, r0, c1, c0, 1        \n"
+
+	/* Now flush the Dcache */
+	"mov r0, #0                       \n"
+	/* 256 cache lines */
+	"mov r1, #256                     \n"
+
+	"invalidate_loop:                 \n"
+
+	"add r1, r1, #-1                  \n"
+	"mov r0, r1, lsl #5               \n"
+	/* invalidate d-cache using line (way0) */
+	"mcr p15, 0, r0, c7, c6, 2        \n"
+
+	"orr r2, r0, #(1<<30)             \n"
+	/* invalidate d-cache using line (way1) */
+	"mcr p15, 0, r2, c7, c6, 2        \n"
+
+	"orr r2, r0, #(2<<30)             \n"
+	/* invalidate d-cache using line (way2) */
+	"mcr p15, 0, r2, c7, c6, 2        \n"
+
+	"orr r2, r0, #(3<<30)             \n"
+	/* invalidate d-cache using line (way3) */
+	"mcr p15, 0, r2, c7, c6, 2        \n"
+	"cmp r1, #0                       \n"
+	"bne invalidate_loop              \n"
+
+	/* FIXME: should have ap20's L2 disabled too */
+	"invalidate_done:                 \n"
+	"ldmia r13!,{pc}                  \n"
+	".ltorg                           \n"
+	);
+}
+
+void init_pmc_scratch(void)
+{
+	struct pmc_ctlr *const pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+
+	/* SCRATCH0 is initialized by the boot ROM and shouldn't be cleared */
+	memset(&pmc->pmc_scratch1, 0, 23*4);
+	writel(CONFIG_SYS_BOARD_ODMDATA, &pmc->pmc_scratch20);
+}
+
+u32 s_first_boot = 1;
+
+void cpu_start(void)
+{
+	struct pmux_tri_ctlr *pmt = (struct pmux_tri_ctlr *)NV_PA_APB_MISC_BASE;
+
+	/* enable JTAG */
+	writel(0xC0, &pmt->pmt_cfg_ctl);
+
+	if (s_first_boot) {
+		/*
+		 * Need to set this before cold-booting,
+		 *  otherwise we'll end up in an infinite loop.
+		 */
+		s_first_boot = 0;
+		cold_boot();
+	}
+
+	return;
+}
+
+void tegra2_start()
+{
+	if (s_first_boot) {
+		/* Init Debug UART Port (115200 8n1) */
+		uart_init();
+
+		/* Init PMC scratch memory */
+		init_pmc_scratch();
+	}
+
+#ifdef CONFIG_ENABLE_CORTEXA9
+	/* take the mpcore out of reset */
+	cpu_start();
+
+	/* configure cache */
+	cache_configure();
+#endif
+}
+
diff --git a/arch/arm/cpu/armv7/tegra2/ap20.h b/arch/arm/cpu/armv7/tegra2/ap20.h
new file mode 100644
index 0000000..de7622d
--- /dev/null
+++ b/arch/arm/cpu/armv7/tegra2/ap20.h
@@ -0,0 +1,105 @@ 
+/*
+ * (C) Copyright 2010-2011
+ * NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+#include <asm/types.h>
+
+#ifndef	FALSE
+#define FALSE	0
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
+
+#define NVBL_PLLP_KHZ	(432000/2)
+/* The stabilization delay in usec */
+#define NVBOOT_CLOCKS_PLL_STABILIZATION_DELAY (300)
+
+#define PLLX_ENABLED		(1 << 30)
+#define CCLK_BURST_POLICY	0x20008888
+#define SUPER_CCLK_DIVIDER	0x80000000
+
+/* Calculate clock fractional divider value from ref and target frequencies */
+#define CLK_DIVIDER(REF, FREQ)  ((((REF) * 2) / FREQ) - 2)
+
+/* Calculate clock frequency value from reference and clock divider value */
+#define CLK_FREQUENCY(REF, REG)  (((REF) * 2) / (REG + 2))
+
+/* AVP/CPU ID */
+#define PG_UP_TAG_0_PID_CPU	0x55555555	/* CPU aka "a9" aka "mpcore" */
+#define PG_UP_TAG_0             0x0
+
+/* AP20-Specific Base Addresses */
+
+/* AP20 Base physical address of SDRAM. */
+#define AP20_BASE_PA_SDRAM      0x00000000
+/* AP20 Base physical address of internal SRAM. */
+#define AP20_BASE_PA_SRAM       0x40000000
+/* AP20 Size of internal SRAM (256KB). */
+#define AP20_BASE_PA_SRAM_SIZE  0x00040000
+/* AP20 Base physical address of flash. */
+#define AP20_BASE_PA_NOR_FLASH  0xD0000000
+/* AP20 Base physical address of boot information table. */
+#define AP20_BASE_PA_BOOT_INFO  AP20_BASE_PA_SRAM
+
+/*
+ * Super-temporary stacks for EXTREMELY early startup. The values chosen for
+ * these addresses must be valid on ALL SOCs because this value is used before
+ * we are able to differentiate between the SOC types.
+ *
+ * NOTE: The since CPU's stack will eventually be moved from IRAM to SDRAM, its
+ *       stack is placed below the AVP stack. Once the CPU stack has been moved,
+ *       the AVP is free to use the IRAM the CPU stack previously occupied if
+ *       it should need to do so.
+ *
+ * NOTE: In multi-processor CPU complex configurations, each processor will have
+ *       its own stack of size CPU_EARLY_BOOT_STACK_SIZE. CPU 0 will have a
+ *       limit of CPU_EARLY_BOOT_STACK_LIMIT. Each successive CPU will have a
+ *       stack limit that is CPU_EARLY_BOOT_STACK_SIZE less then the previous
+ *       CPU.
+ */
+
+/* Common AVP early boot stack limit */
+#define AVP_EARLY_BOOT_STACK_LIMIT	\
+	(AP20_BASE_PA_SRAM + (AP20_BASE_PA_SRAM_SIZE/2))
+/* Common AVP early boot stack size */
+#define AVP_EARLY_BOOT_STACK_SIZE	0x1000
+/* Common CPU early boot stack limit */
+#define CPU_EARLY_BOOT_STACK_LIMIT	\
+	(AVP_EARLY_BOOT_STACK_LIMIT - AVP_EARLY_BOOT_STACK_SIZE)
+/* Common CPU early boot stack size */
+#define CPU_EARLY_BOOT_STACK_SIZE	0x1000
+
+#define EXCEP_VECTOR_CPU_RESET_VECTOR	(NV_PA_EVP_BASE + 0x100)
+#define CSITE_CPU_DBG0_LAR		(NV_PA_CSITE_BASE + 0x10FB0)
+#define CSITE_CPU_DBG1_LAR		(NV_PA_CSITE_BASE + 0x12FB0)
+
+#define FLOW_CTLR_HALT_COP_EVENTS	(NV_PA_FLOW_BASE + 4)
+#define FLOW_MODE_STOP			2
+#define HALT_COP_EVENT_JTAG		(1 << 28)
+#define HALT_COP_EVENT_IRQ_1		(1 << 11)
+#define HALT_COP_EVENT_FIQ_1		(1 << 9)
+
+/* Prototypes */
+
+void tegra2_start(void);
+void uart_init(void);
+void udelay(unsigned long);
diff --git a/arch/arm/include/asm/arch-tegra2/clk_rst.h b/arch/arm/include/asm/arch-tegra2/clk_rst.h
index 6d573bf..d67a5d7 100644
--- a/arch/arm/include/asm/arch-tegra2/clk_rst.h
+++ b/arch/arm/include/asm/arch-tegra2/clk_rst.h
@@ -149,6 +149,9 @@  struct clk_rst_ctlr {
 	uint crc_clk_src_csite;		/*_CSITE_0,		0x1D4 */
 	uint crc_reserved19[9];		/*			0x1D8-1F8 */
 	uint crc_clk_src_osc;		/*_OSC_0,		0x1FC */
+	uint crc_reserved20[80];	/*			0x200-33C */
+	uint crc_cpu_cmplx_set;		/* _CPU_CMPLX_SET_0,	0x340 */
+	uint crc_cpu_cmplx_clr;		/* _CPU_CMPLX_CLR_0,	0x344 */
 };
 
 #define PLL_BYPASS		(1 << 31)
@@ -162,4 +165,28 @@  struct clk_rst_ctlr {
 #define SWR_UARTA_RST		(1 << 6)
 #define CLK_ENB_UARTA		(1 << 6)
 
+#define SWR_CPU_RST		(1 << 0)
+#define CLK_ENB_CPU		(1 << 0)
+#define SWR_CSITE_RST		(1 << 9)
+#define CLK_ENB_CSITE		(1 << 9)
+
+#define SET_CPURESET0		(1 << 0)
+#define SET_DERESET0		(1 << 4)
+#define SET_DBGRESET0		(1 << 12)
+
+#define SET_CPURESET1		(1 << 1)
+#define SET_DERESET1		(1 << 5)
+#define SET_DBGRESET1		(1 << 13)
+
+#define CLR_CPURESET0		(1 << 0)
+#define CLR_DERESET0		(1 << 4)
+#define CLR_DBGRESET0		(1 << 12)
+
+#define CLR_CPURESET1		(1 << 1)
+#define CLR_DERESET1		(1 << 5)
+#define CLR_DBGRESET1		(1 << 13)
+
+#define CPU0_CLK_STP		(1 << 8)
+#define CPU1_CLK_STP		(1 << 9)
+
 #endif	/* CLK_RST_H */
diff --git a/arch/arm/include/asm/arch-tegra2/pmc.h b/arch/arm/include/asm/arch-tegra2/pmc.h
index 7ec9eeb..b1d47cd 100644
--- a/arch/arm/include/asm/arch-tegra2/pmc.h
+++ b/arch/arm/include/asm/arch-tegra2/pmc.h
@@ -121,4 +121,12 @@  struct pmc_ctlr {
 	uint pmc_gate;			/* _GATE_0, offset 15C */
 };
 
+#define CPU_PWRED	1
+#define CPU_CLMP	1
+
+#define PARTID_CP	0xFFFFFFF8
+#define START_CP	(1 << 8)
+
+#define CPUPWRREQ_OE	(1 << 16)
+
 #endif	/* PMC_H */
diff --git a/arch/arm/include/asm/arch-tegra2/scu.h b/arch/arm/include/asm/arch-tegra2/scu.h
new file mode 100644
index 0000000..d2faa6f
--- /dev/null
+++ b/arch/arm/include/asm/arch-tegra2/scu.h
@@ -0,0 +1,43 @@ 
+/*
+ *  (C) Copyright 2010,2011
+ *  NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _SCU_H_
+#define _SCU_H_
+
+/* ARM Snoop Control Unit (SCU) registers */
+struct scu_ctlr {
+	uint scu_ctrl;		/* SCU Control Register, offset 00 */
+	uint scu_cfg;		/* SCU Config Register, offset 04 */
+	uint scu_cpu_pwr_stat;	/* SCU CPU Power Status Register, offset 08 */
+	uint scu_inv_all;	/* SCU Invalidate All Register, offset 0C */
+	uint scu_reserved0[12];	/* reserved, offset 10-3C */
+	uint scu_filt_start;	/* SCU Filtering Start Address Reg, offset 40 */
+	uint scu_filt_end;	/* SCU Filtering End Address Reg, offset 44 */
+	uint scu_reserved1[2];	/* reserved, offset 48-4C */
+	uint scu_acc_ctl;	/* SCU Access Control Register, offset 50 */
+	uint scu_ns_acc_ctl;	/* SCU Non-secure Access Cntrl Reg, offset 54 */
+};
+
+#define SCU_ENABLE	1
+
+#endif	/* SCU_H */
diff --git a/arch/arm/include/asm/arch-tegra2/tegra2.h b/arch/arm/include/asm/arch-tegra2/tegra2.h
index 9001b68..5813cd9 100644
--- a/arch/arm/include/asm/arch-tegra2/tegra2.h
+++ b/arch/arm/include/asm/arch-tegra2/tegra2.h
@@ -25,8 +25,12 @@ 
 #define _TEGRA2_H_
 
 #define NV_PA_SDRAM_BASE	0x00000000
+#define NV_PA_ARM_PERIPHBASE	0x50040000
+#define NV_PA_PG_UP_BASE	0x60000000
 #define NV_PA_TMRUS_BASE	0x60005010
 #define NV_PA_CLK_RST_BASE	0x60006000
+#define NV_PA_FLOW_BASE		0x60007000
+#define NV_PA_EVP_BASE		0x6000F000
 #define NV_PA_APB_MISC_BASE	0x70000000
 #define NV_PA_APB_UARTA_BASE	(NV_PA_APB_MISC_BASE + 0x6000)
 #define NV_PA_APB_UARTB_BASE	(NV_PA_APB_MISC_BASE + 0x6040)
@@ -34,6 +38,7 @@ 
 #define NV_PA_APB_UARTD_BASE	(NV_PA_APB_MISC_BASE + 0x6300)
 #define NV_PA_APB_UARTE_BASE	(NV_PA_APB_MISC_BASE + 0x6400)
 #define NV_PA_PMC_BASE		0x7000E400
+#define NV_PA_CSITE_BASE	0x70040000
 
 #define TEGRA2_SDRC_CS0		NV_PA_SDRAM_BASE
 #define LOW_LEVEL_SRAM_STACK	0x4000FFFC
diff --git a/board/nvidia/common/board.c b/board/nvidia/common/board.c
index b2c412c..078547b 100644
--- a/board/nvidia/common/board.c
+++ b/board/nvidia/common/board.c
@@ -30,6 +30,7 @@ 
 #include <asm/arch/clk_rst.h>
 #include <asm/arch/pinmux.h>
 #include <asm/arch/uart.h>
+#include "board.h"
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -37,6 +38,15 @@  const struct tegra2_sysinfo sysinfo = {
 	CONFIG_TEGRA2_BOARD_STRING
 };
 
+#ifdef CONFIG_BOARD_EARLY_INIT_F
+int board_early_init_f(void)
+{
+	debug("Board Early Init\n");
+	tegra2_start();
+	return 0;
+}
+#endif	/* EARLY_INIT */
+
 /*
  * Routine: timer_init
  * Description: init the timestamp and lastinc value
diff --git a/board/nvidia/common/board.h b/board/nvidia/common/board.h
new file mode 100644
index 0000000..47c7885
--- /dev/null
+++ b/board/nvidia/common/board.h
@@ -0,0 +1,29 @@ 
+/*
+ *  (C) Copyright 2010,2011
+ *  NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _BOARD_H_
+#define _BOARD_H_
+
+void tegra2_start(void);
+
+#endif	/* BOARD_H */
diff --git a/include/configs/harmony.h b/include/configs/harmony.h
index d004f31..34bd899 100644
--- a/include/configs/harmony.h
+++ b/include/configs/harmony.h
@@ -46,4 +46,5 @@ 
 #define CONFIG_MACH_TYPE		MACH_TYPE_HARMONY
 #define CONFIG_SYS_BOARD_ODMDATA	0x300d8011 /* lp1, 1GB */
 
+#define CONFIG_BOARD_EARLY_INIT_F
 #endif /* __CONFIG_H */
diff --git a/include/configs/seaboard.h b/include/configs/seaboard.h
index fd87560..b7b72bb 100644
--- a/include/configs/seaboard.h
+++ b/include/configs/seaboard.h
@@ -40,4 +40,5 @@ 
 #define CONFIG_MACH_TYPE		MACH_TYPE_TEGRA_SEABOARD
 #define CONFIG_SYS_BOARD_ODMDATA	0x300d8011 /* lp1, 1GB */
 
+#define CONFIG_BOARD_EARLY_INIT_F
 #endif /* __CONFIG_H */
diff --git a/include/configs/tegra2-common.h b/include/configs/tegra2-common.h
index 4f4374a..2924325 100644
--- a/include/configs/tegra2-common.h
+++ b/include/configs/tegra2-common.h
@@ -33,6 +33,8 @@ 
 #define CONFIG_MACH_TEGRA_GENERIC	/* which is a Tegra generic machine */
 #define CONFIG_L2_OFF			/* No L2 cache */
 
+#define CONFIG_ENABLE_CORTEXA9		/* enable CPU (A9 complex) */
+
 #include <asm/arch/tegra2.h>		/* get chip and board defs */
 
 /*