diff mbox

[U-Boot,V2] arm: Tegra2: add support for A9 CPU init

Message ID 1300492605-15129-1-git-send-email-twarren@nvidia.com
State Changes Requested
Headers show

Commit Message

Tom Warren March 18, 2011, 11:56 p.m. UTC
Signed-off-by: Tom Warren <twarren@nvidia.com>
---
Changes for V2:
	- Remove returns in void functions
	- Move inline assembly code to .S file
	- Simplify some if/else code, break out common code
	- Minimize the use of local vars
	- Inline some single-instance functions
	- Remove TRUE/FALSE define, use 1/0 instead
	- Replace memset of mem-mapped regs w/loop of writel's

 arch/arm/cpu/armv7/start.S                 |   12 +
 arch/arm/cpu/armv7/tegra2/Makefile         |    2 +-
 arch/arm/cpu/armv7/tegra2/ap20.c           |  366 ++++++++++++++++++++++++++++
 arch/arm/cpu/armv7/tegra2/ap20.h           |  105 ++++++++
 arch/arm/cpu/armv7/tegra2/lowlevel_init.S  |   70 ++++++
 arch/arm/include/asm/arch-tegra2/clk_rst.h |   27 ++
 arch/arm/include/asm/arch-tegra2/pmc.h     |    8 +
 arch/arm/include/asm/arch-tegra2/scu.h     |   43 ++++
 arch/arm/include/asm/arch-tegra2/tegra2.h  |    5 +
 board/nvidia/common/board.c                |   10 +
 board/nvidia/common/board.h                |   29 +++
 include/configs/harmony.h                  |    1 +
 include/configs/seaboard.h                 |    3 +-
 include/configs/tegra2-common.h            |    2 +
 14 files changed, 681 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/cpu/armv7/tegra2/ap20.c
 create mode 100644 arch/arm/cpu/armv7/tegra2/ap20.h
 create mode 100644 arch/arm/include/asm/arch-tegra2/scu.h
 create mode 100644 board/nvidia/common/board.h

Comments

Peter Tyser March 25, 2011, 4:02 p.m. UTC | #1
Hi Tom,
Things look pretty good.  Minor comments/questions below.

<snip>

> +/*
> + * TBD: Move cold_boot() to assembly file.
> + * Values/offsets of the table vars make this difficult.
> + */
> +
> +void cold_boot(void)
> +{
> +	asm volatile(
> +		"msr	cpsr_c, #0xD3	\n"
> +		/*
> +		* Check current processor: CPU or AVP?
> +		* If CPU, go to CPU boot code, else continue on AVP path.
> +		*/
> +		"mov	r0, %0		\n"
> +		"ldrb	r2, [r0, %1]	\n"
> +		/* are we the CPU? */
> +		"cmp	r2, %2		\n"
> +		"mov	sp, %3		\n"
> +		/*  yep, we are the CPU */
> +		"bxeq	%4		\n"
> +
> +		/* AVP initialization follows this path */
> +		"mov	sp, %5		\n"
> +		/* Init and start CPU */
> +		"b	startup_cpu	\n"
> +		:
> +		: "i"(NV_PA_PG_UP_BASE),
> +		"i"(PG_UP_TAG_0),
> +		"r"(proc_tag),
> +		"r"(cpu_boot_stack),
> +		"r"(_armboot_start),
> +		"r"(avp_boot_stack)
> +		: "r0", "r2", "cc", "lr"
> +	);
> +}

What errors did you encounter when this was in the assembly file?  It'd
be nice to put it there now.  Likely it will never get fixed if it
doesn't implemented correctly off the bat.  If you post the errors
perhaps someone on the list can provide insight.

<snip>

> +.globl startup_cpu
> +startup_cpu:
> +	@ Initialize the AVP, clocks, and memory controller
> +	@ SDRAM is guaranteed to be on at this point
> +
> +	ldr     r0, =cold_boot			@ R0 = reset vector for CPU
> +	bl      start_cpu			@ start the CPU
> +
> +	@ Transfer control to the AVP code */
> +	bl      halt_avp
> +
> +	@ Should never get here
> +_loop_forever2:
> +	b	_loop_forever2
> +
> +.globl cache_configure
> +cache_configure:
> +	stmdb r13!,{r14}
> +@	/* invalidate instruction cache */

It looks like there's a combination of comment forms @, @ */, and @ /*
*/.  Is there a reason not to use the normal /* */ universally?

Best,
Peter
Tom Warren March 25, 2011, 4:16 p.m. UTC | #2
Peter,

On Fri, Mar 25, 2011 at 9:02 AM, Peter Tyser <ptyser@xes-inc.com> wrote:
> Hi Tom,
> Things look pretty good.  Minor comments/questions below.
>
> <snip>
>
>> +/*
>> + * TBD: Move cold_boot() to assembly file.
>> + * Values/offsets of the table vars make this difficult.
>> + */
>> +
>> +void cold_boot(void)
>> +{
>> +     asm volatile(
>> +             "msr    cpsr_c, #0xD3   \n"
>> +             /*
>> +             * Check current processor: CPU or AVP?
>> +             * If CPU, go to CPU boot code, else continue on AVP path.
>> +             */
>> +             "mov    r0, %0          \n"
>> +             "ldrb   r2, [r0, %1]    \n"
>> +             /* are we the CPU? */
>> +             "cmp    r2, %2          \n"
>> +             "mov    sp, %3          \n"
>> +             /*  yep, we are the CPU */
>> +             "bxeq   %4              \n"
>> +
>> +             /* AVP initialization follows this path */
>> +             "mov    sp, %5          \n"
>> +             /* Init and start CPU */
>> +             "b      startup_cpu     \n"
>> +             :
>> +             : "i"(NV_PA_PG_UP_BASE),
>> +             "i"(PG_UP_TAG_0),
>> +             "r"(proc_tag),
>> +             "r"(cpu_boot_stack),
>> +             "r"(_armboot_start),
>> +             "r"(avp_boot_stack)
>> +             : "r0", "r2", "cc", "lr"
>> +     );
>> +}
>
> What errors did you encounter when this was in the assembly file?  It'd
> be nice to put it there now.  Likely it will never get fixed if it
> doesn't implemented correctly off the bat.  If you post the errors
> perhaps someone on the list can provide insight.
I didn't capture a log of the errors when I was trying to put the
cold_boot code into lowlevel_init.S. But I saw fixup errors and
undefined constant errors, all related to the #defines (NV_PG_UP_BASE,
avp/cpu_boot_stack, etc.) and how the compiler/assembler references
indirect and relative constants.

Note that this code works perfectly as-is, so there's no pressing need
to move it to assembly now, except for a cosmetic/procedural one. I'd
rather get this accepted into mainline, so I can move on to the
eMMC/SPI/USB drivers so people can use the code to boot an OS on our
(many) Tegra2 boards coming to market RSN.

If some ARM / gcc assembly wizard wants to attempt moving this code to
a .S file, I welcome the help - I may even attack it at a later date,
when I've got more bandwidth. But it isn't a priority for me right
now, unless someone on the list adamantly opposes the code as-s. But
I'd expect anyone with that strong an opinion about to be able to fix
it, or at least attempt it and see why I decided to defer moving it to
assembly for now.

>
> <snip>
>
>> +.globl startup_cpu
>> +startup_cpu:
>> +     @ Initialize the AVP, clocks, and memory controller
>> +     @ SDRAM is guaranteed to be on at this point
>> +
>> +     ldr     r0, =cold_boot                  @ R0 = reset vector for CPU
>> +     bl      start_cpu                       @ start the CPU
>> +
>> +     @ Transfer control to the AVP code */
>> +     bl      halt_avp
>> +
>> +     @ Should never get here
>> +_loop_forever2:
>> +     b       _loop_forever2
>> +
>> +.globl cache_configure
>> +cache_configure:
>> +     stmdb r13!,{r14}
>> +@    /* invalidate instruction cache */
>
> It looks like there's a combination of comment forms @, @ */, and @ /*
> */.  Is there a reason not to use the normal /* */ universally?
No, just dross left over from moving the inline assembly from .c to
.S. The rest of lowlevel_init.S uses @ for comments, so I tried to
stick with that. I'l fix it, thanks.

>
> Best,
> Peter
>
>
Thanks,
Tom
Peter Tyser March 25, 2011, 5:22 p.m. UTC | #3
On Fri, 2011-03-25 at 09:16 -0700, Tom Warren wrote:
> Peter,
> 
> On Fri, Mar 25, 2011 at 9:02 AM, Peter Tyser <ptyser@xes-inc.com> wrote:
> > Hi Tom,
> > Things look pretty good.  Minor comments/questions below.
> >
> > <snip>
> >
> >> +/*
> >> + * TBD: Move cold_boot() to assembly file.
> >> + * Values/offsets of the table vars make this difficult.
> >> + */
> >> +
> >> +void cold_boot(void)
> >> +{
> >> +     asm volatile(
> >> +             "msr    cpsr_c, #0xD3   \n"
> >> +             /*
> >> +             * Check current processor: CPU or AVP?
> >> +             * If CPU, go to CPU boot code, else continue on AVP path.
> >> +             */
> >> +             "mov    r0, %0          \n"
> >> +             "ldrb   r2, [r0, %1]    \n"
> >> +             /* are we the CPU? */
> >> +             "cmp    r2, %2          \n"
> >> +             "mov    sp, %3          \n"
> >> +             /*  yep, we are the CPU */
> >> +             "bxeq   %4              \n"
> >> +
> >> +             /* AVP initialization follows this path */
> >> +             "mov    sp, %5          \n"
> >> +             /* Init and start CPU */
> >> +             "b      startup_cpu     \n"
> >> +             :
> >> +             : "i"(NV_PA_PG_UP_BASE),
> >> +             "i"(PG_UP_TAG_0),
> >> +             "r"(proc_tag),
> >> +             "r"(cpu_boot_stack),
> >> +             "r"(_armboot_start),
> >> +             "r"(avp_boot_stack)
> >> +             : "r0", "r2", "cc", "lr"
> >> +     );
> >> +}
> >
> > What errors did you encounter when this was in the assembly file?  It'd
> > be nice to put it there now.  Likely it will never get fixed if it
> > doesn't implemented correctly off the bat.  If you post the errors
> > perhaps someone on the list can provide insight.
> I didn't capture a log of the errors when I was trying to put the
> cold_boot code into lowlevel_init.S. But I saw fixup errors and
> undefined constant errors, all related to the #defines (NV_PG_UP_BASE,
> avp/cpu_boot_stack, etc.) and how the compiler/assembler references
> indirect and relative constants.
> 
> Note that this code works perfectly as-is, so there's no pressing need
> to move it to assembly now, except for a cosmetic/procedural one. I'd
> rather get this accepted into mainline, so I can move on to the
> eMMC/SPI/USB drivers so people can use the code to boot an OS on our
> (many) Tegra2 boards coming to market RSN.
> 
> If some ARM / gcc assembly wizard wants to attempt moving this code to
> a .S file, I welcome the help - I may even attack it at a later date,
> when I've got more bandwidth. But it isn't a priority for me right
> now, unless someone on the list adamantly opposes the code as-s. But
> I'd expect anyone with that strong an opinion about to be able to fix
> it, or at least attempt it and see why I decided to defer moving it to
> assembly for now.

I understand your perspective, but why not spend the extra 30 minutes
and do it the right way?  Passing the buck to someone else who cares
about maintaining high quality code isn't the right thing to do in my
opinion.  This patch isn't going to make it into the upcoming release,
so it won't gate the other eMMC/SPI/USB drivers you want to add.  The
bar to get code into open source project generally is higher than "it
works" - it has to adhere to the project's design principles and
guidelines.  U-Boot already needs cleanup as is without adding more
cruft.  Solving this small issue now results in cleaner code, less
headache down the road, and shouldn't take long.  As usual, I'm not the
maintainer, so its just my $0.02.

Best,
Peter
Tom Warren March 25, 2011, 6:05 p.m. UTC | #4
Peter,

On Fri, Mar 25, 2011 at 10:22 AM, Peter Tyser <ptyser@xes-inc.com> wrote:
> On Fri, 2011-03-25 at 09:16 -0700, Tom Warren wrote:
>> Peter,
>>
>> On Fri, Mar 25, 2011 at 9:02 AM, Peter Tyser <ptyser@xes-inc.com> wrote:
>> > Hi Tom,
>> > Things look pretty good.  Minor comments/questions below.
>> >
>> > <snip>
>> >
>> >> +/*
>> >> + * TBD: Move cold_boot() to assembly file.
>> >> + * Values/offsets of the table vars make this difficult.
>> >> + */
>> >> +
>> >> +void cold_boot(void)
>> >> +{
>> >> +     asm volatile(
>> >> +             "msr    cpsr_c, #0xD3   \n"
>> >> +             /*
>> >> +             * Check current processor: CPU or AVP?
>> >> +             * If CPU, go to CPU boot code, else continue on AVP path.
>> >> +             */
>> >> +             "mov    r0, %0          \n"
>> >> +             "ldrb   r2, [r0, %1]    \n"
>> >> +             /* are we the CPU? */
>> >> +             "cmp    r2, %2          \n"
>> >> +             "mov    sp, %3          \n"
>> >> +             /*  yep, we are the CPU */
>> >> +             "bxeq   %4              \n"
>> >> +
>> >> +             /* AVP initialization follows this path */
>> >> +             "mov    sp, %5          \n"
>> >> +             /* Init and start CPU */
>> >> +             "b      startup_cpu     \n"
>> >> +             :
>> >> +             : "i"(NV_PA_PG_UP_BASE),
>> >> +             "i"(PG_UP_TAG_0),
>> >> +             "r"(proc_tag),
>> >> +             "r"(cpu_boot_stack),
>> >> +             "r"(_armboot_start),
>> >> +             "r"(avp_boot_stack)
>> >> +             : "r0", "r2", "cc", "lr"
>> >> +     );
>> >> +}
>> >
>> > What errors did you encounter when this was in the assembly file?  It'd
>> > be nice to put it there now.  Likely it will never get fixed if it
>> > doesn't implemented correctly off the bat.  If you post the errors
>> > perhaps someone on the list can provide insight.
>> I didn't capture a log of the errors when I was trying to put the
>> cold_boot code into lowlevel_init.S. But I saw fixup errors and
>> undefined constant errors, all related to the #defines (NV_PG_UP_BASE,
>> avp/cpu_boot_stack, etc.) and how the compiler/assembler references
>> indirect and relative constants.
>>
>> Note that this code works perfectly as-is, so there's no pressing need
>> to move it to assembly now, except for a cosmetic/procedural one. I'd
>> rather get this accepted into mainline, so I can move on to the
>> eMMC/SPI/USB drivers so people can use the code to boot an OS on our
>> (many) Tegra2 boards coming to market RSN.
>>
>> If some ARM / gcc assembly wizard wants to attempt moving this code to
>> a .S file, I welcome the help - I may even attack it at a later date,
>> when I've got more bandwidth. But it isn't a priority for me right
>> now, unless someone on the list adamantly opposes the code as-s. But
>> I'd expect anyone with that strong an opinion about to be able to fix
>> it, or at least attempt it and see why I decided to defer moving it to
>> assembly for now.
>
> I understand your perspective, but why not spend the extra 30 minutes
> and do it the right way?  Passing the buck to someone else who cares
> about maintaining high quality code isn't the right thing to do in my
> opinion.  This patch isn't going to make it into the upcoming release,
> so it won't gate the other eMMC/SPI/USB drivers you want to add.  The
> bar to get code into open source project generally is higher than "it
> works" - it has to adhere to the project's design principles and
> guidelines.  U-Boot already needs cleanup as is without adding more
> cruft.  Solving this small issue now results in cleaner code, less
> headache down the road, and shouldn't take long.  As usual, I'm not the
> maintainer, so its just my $0.02.
FWIW, I spent _far_ more than 30 minutes on this .. close to a full
day of frustration/banging my head against the wall.  I have other
priorities besides upstreaming Tegra2 U-Boot support, and I can't
justify spending days on this. As I originally stated, I'm no expert
in the intricacies of ARM asm programming - my expertise is in x86
CPUs, and hit a wall with this port to assembly.

I'm not passing the buck, and while I agree that this code is not the
cleanest I've seen, I don't think I'm pushing low-quality code here.
It's 8 lines of embedded assembly, plus a table. Looking at what the C
compiler produces in assembly, it's twice as long, pushes some of the
tabled values on the stack and then pulls them back into registers,
and is, IMHO, harder to understand.  I could just cut-and-paste the
compiler output into lowlevel_init.S, and add some comments, but is
that really any better?  Is the goal to get clean, understandable
code, or messier, harder to parse code in the right files?

As to adhering to U-Boot's design principles and guidelines, could you
point me to the section on embedded assembly in C files? I don't
remember seeing a specific section on that topic, don't see it under
U-Boot Coding Style, nor in the Linux coding guidelines, and I'd like
to reference it for future use.

For now, I'm going to let this percolate, as I have other fish to fry.

Thanks for your input,

Tom
>
> Best,
> Peter
>
>
Peter Tyser March 25, 2011, 7:56 p.m. UTC | #5
On Fri, 2011-03-25 at 11:05 -0700, Tom Warren wrote:
> Peter,
> 
> On Fri, Mar 25, 2011 at 10:22 AM, Peter Tyser <ptyser@xes-inc.com> wrote:
> > On Fri, 2011-03-25 at 09:16 -0700, Tom Warren wrote:
> >> Peter,
> >>
> >> On Fri, Mar 25, 2011 at 9:02 AM, Peter Tyser <ptyser@xes-inc.com> wrote:
> >> > Hi Tom,
> >> > Things look pretty good.  Minor comments/questions below.
> >> >
> >> > <snip>
> >> >
> >> >> +/*
> >> >> + * TBD: Move cold_boot() to assembly file.
> >> >> + * Values/offsets of the table vars make this difficult.
> >> >> + */
> >> >> +
> >> >> +void cold_boot(void)
> >> >> +{
> >> >> +     asm volatile(
> >> >> +             "msr    cpsr_c, #0xD3   \n"
> >> >> +             /*
> >> >> +             * Check current processor: CPU or AVP?
> >> >> +             * If CPU, go to CPU boot code, else continue on AVP path.
> >> >> +             */
> >> >> +             "mov    r0, %0          \n"
> >> >> +             "ldrb   r2, [r0, %1]    \n"
> >> >> +             /* are we the CPU? */
> >> >> +             "cmp    r2, %2          \n"
> >> >> +             "mov    sp, %3          \n"
> >> >> +             /*  yep, we are the CPU */
> >> >> +             "bxeq   %4              \n"
> >> >> +
> >> >> +             /* AVP initialization follows this path */
> >> >> +             "mov    sp, %5          \n"
> >> >> +             /* Init and start CPU */
> >> >> +             "b      startup_cpu     \n"
> >> >> +             :
> >> >> +             : "i"(NV_PA_PG_UP_BASE),
> >> >> +             "i"(PG_UP_TAG_0),
> >> >> +             "r"(proc_tag),
> >> >> +             "r"(cpu_boot_stack),
> >> >> +             "r"(_armboot_start),
> >> >> +             "r"(avp_boot_stack)
> >> >> +             : "r0", "r2", "cc", "lr"
> >> >> +     );
> >> >> +}
> >> >
> >> > What errors did you encounter when this was in the assembly file?  It'd
> >> > be nice to put it there now.  Likely it will never get fixed if it
> >> > doesn't implemented correctly off the bat.  If you post the errors
> >> > perhaps someone on the list can provide insight.
> >> I didn't capture a log of the errors when I was trying to put the
> >> cold_boot code into lowlevel_init.S. But I saw fixup errors and
> >> undefined constant errors, all related to the #defines (NV_PG_UP_BASE,
> >> avp/cpu_boot_stack, etc.) and how the compiler/assembler references
> >> indirect and relative constants.
> >>
> >> Note that this code works perfectly as-is, so there's no pressing need
> >> to move it to assembly now, except for a cosmetic/procedural one. I'd
> >> rather get this accepted into mainline, so I can move on to the
> >> eMMC/SPI/USB drivers so people can use the code to boot an OS on our
> >> (many) Tegra2 boards coming to market RSN.
> >>
> >> If some ARM / gcc assembly wizard wants to attempt moving this code to
> >> a .S file, I welcome the help - I may even attack it at a later date,
> >> when I've got more bandwidth. But it isn't a priority for me right
> >> now, unless someone on the list adamantly opposes the code as-s. But
> >> I'd expect anyone with that strong an opinion about to be able to fix
> >> it, or at least attempt it and see why I decided to defer moving it to
> >> assembly for now.
> >
> > I understand your perspective, but why not spend the extra 30 minutes
> > and do it the right way?  Passing the buck to someone else who cares
> > about maintaining high quality code isn't the right thing to do in my
> > opinion.  This patch isn't going to make it into the upcoming release,
> > so it won't gate the other eMMC/SPI/USB drivers you want to add.  The
> > bar to get code into open source project generally is higher than "it
> > works" - it has to adhere to the project's design principles and
> > guidelines.  U-Boot already needs cleanup as is without adding more
> > cruft.  Solving this small issue now results in cleaner code, less
> > headache down the road, and shouldn't take long.  As usual, I'm not the
> > maintainer, so its just my $0.02.
> FWIW, I spent _far_ more than 30 minutes on this .. close to a full
> day of frustration/banging my head against the wall.  I have other
> priorities besides upstreaming Tegra2 U-Boot support, and I can't
> justify spending days on this. As I originally stated, I'm no expert
> in the intricacies of ARM asm programming - my expertise is in x86
> CPUs, and hit a wall with this port to assembly.

Understood.  When situations like that happen its good to ask others for
input rather than using a non-optimal solution.  Then either
- Someone suggests how to fix the code, and the code improves.
- No one helps, which implies your method isn't easy to improve upon,
you gave it a good effort, and so it gets included.

As is, the current patch looks like a non-optimal solution that someone
familiar with ARM asm could have quickly helped with.

> I'm not passing the buck, and while I agree that this code is not the
> cleanest I've seen, I don't think I'm pushing low-quality code here.

Based on the fact that you tried to fix it for a day, it sounds like you
are aware its less than ideal implementation, and the "TBD" comment for
the function further implies that this function doesn't belong here, and
someone should fix it down the road.  My guess is that this will likely
never happen unless it gets fixed now.

> It's 8 lines of embedded assembly, plus a table. Looking at what the C
> compiler produces in assembly, it's twice as long, pushes some of the
> tabled values on the stack and then pulls them back into registers,
> and is, IMHO, harder to understand.  I could just cut-and-paste the
> compiler output into lowlevel_init.S, and add some comments, but is
> that really any better?  Is the goal to get clean, understandable
> code, or messier, harder to parse code in the right files?

> As to adhering to U-Boot's design principles and guidelines, could you
> point me to the section on embedded assembly in C files? I don't
> remember seeing a specific section on that topic, don't see it under
> U-Boot Coding Style, nor in the Linux coding guidelines, and I'd like
> to reference it for future use.

There isn't a specific section about inline assembly, similar to how
there isn't documentation other general C rules like what goes in header
files, when to split up a file into 2 files, why global variables should
be avoided, etc.  The general rule is that inline assembly should only
be used in when necessary in rare cases like low-level control/access of
the CPU, special optimizations in critical paths, etc.

> For now, I'm going to let this percolate, as I have other fish to fry.

Sounds good.  I understand the schedule vs perfection issue - I'm just
voicing my opinions based on how I'd write the code with my infinite
free time, which ultimately doesn't carry a whole lot of weight:)
Regardless of how it works out its great seeing Nvidia push their code
upstream.

Best,
Peter
Alessandro Rubini March 25, 2011, 10:36 p.m. UTC | #6
> FWIW, I spent _far_ more than 30 minutes on this .. close to a full
> day of frustration/banging my head against the wall. [...]

I understand the feeling.

I'm one of those who suggested (but not required) to put full-asm
function in .S files for a reason; I personally think the reboot code
may remain as it is, given the effort involved.  There are minor
details that may be better, but since I'm unable to suggest (and at
least compile-test) them for lack of time, I'm not even listing them.

Thank you very much for your efforts in following previous
suggestions.

/alessandro
diff mbox

Patch

diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
index cb4f92f..4b36693 100644
--- a/arch/arm/cpu/armv7/start.S
+++ b/arch/arm/cpu/armv7/start.S
@@ -70,6 +70,18 @@  _end_vect:
 _TEXT_BASE:
 	.word	CONFIG_SYS_TEXT_BASE
 
+#ifdef CONFIG_TEGRA2
+/*
+ * Tegra2 uses 2 separate CPUs - the AVP (ARM7TDMI) and the CPU (dual A9s).
+ * U-Boot runs on the AVP first, setting things up for the CPU (PLLs,
+ * muxes, clocks, clamps, etc.). Then the AVP halts, and expects the CPU
+ * to pick up its reset vector, which points here.
+ */
+.globl _armboot_start
+_armboot_start:
+        .word _start
+#endif
+
 /*
  * These are defined in the board-specific linker script.
  */
diff --git a/arch/arm/cpu/armv7/tegra2/Makefile b/arch/arm/cpu/armv7/tegra2/Makefile
index 687c887..f1ea915 100644
--- a/arch/arm/cpu/armv7/tegra2/Makefile
+++ b/arch/arm/cpu/armv7/tegra2/Makefile
@@ -28,7 +28,7 @@  include $(TOPDIR)/config.mk
 LIB	=  $(obj)lib$(SOC).o
 
 SOBJS	:= lowlevel_init.o
-COBJS	:= board.o sys_info.o timer.o
+COBJS	:= ap20.o board.o sys_info.o timer.o
 
 SRCS	:= $(SOBJS:.o=.S) $(COBJS:.o=.c)
 OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
diff --git a/arch/arm/cpu/armv7/tegra2/ap20.c b/arch/arm/cpu/armv7/tegra2/ap20.c
new file mode 100644
index 0000000..d0ad41f
--- /dev/null
+++ b/arch/arm/cpu/armv7/tegra2/ap20.c
@@ -0,0 +1,366 @@ 
+/*
+* (C) Copyright 2010-2011
+* NVIDIA Corporation <www.nvidia.com>
+*
+* See file CREDITS for list of people who contributed to this
+* project.
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of
+* the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+* MA 02111-1307 USA
+*/
+
+#include "ap20.h"
+#include <asm/io.h>
+#include <asm/arch/tegra2.h>
+#include <asm/arch/clk_rst.h>
+#include <asm/arch/pmc.h>
+#include <asm/arch/pinmux.h>
+#include <asm/arch/scu.h>
+#include <common.h>
+
+u32 s_first_boot = 1;
+
+static void enable_cpu_clock(int enable)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 reg, clk;
+
+	/*
+	 * NOTE:
+	 * Regardless of whether the request is to enable or disable the CPU
+	 * clock, every processor in the CPU complex except the master (CPU 0)
+	 * will have it's clock stopped because the AVP only talks to the
+	 * master. The AVP does not know (nor does it need to know) that there
+	 * are multiple processors in the CPU complex.
+	 */
+
+	if (enable) {
+		/* Wait until all clocks are stable */
+		udelay(PLL_STABILIZATION_DELAY);
+
+		writel(CCLK_BURST_POLICY, &clkrst->crc_cclk_brst_pol);
+		writel(SUPER_CCLK_DIVIDER, &clkrst->crc_super_cclk_div);
+	}
+
+	/* Fetch the register containing the main CPU complex clock enable */
+	reg = readl(&clkrst->crc_clk_out_enb_l);
+	reg |= CLK_ENB_CPU;
+
+	/*
+	 * Read the register containing the individual CPU clock enables and
+	 * always stop the clock to CPU 1.
+	 */
+	clk = readl(&clkrst->crc_clk_cpu_cmplx);
+	clk |= CPU1_CLK_STP;
+
+	if (enable) {
+		/* Unstop the CPU clock */
+		clk &= ~CPU0_CLK_STP;
+	} else {
+		/* Stop the CPU clock */
+		clk |= CPU0_CLK_STP;
+	}
+
+	writel(clk, &clkrst->crc_clk_cpu_cmplx);
+	writel(reg, &clkrst->crc_clk_out_enb_l);
+}
+
+static int is_cpu_powered(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+
+	return (readl(&pmc->pmc_pwrgate_status) & CPU_PWRED) ? 1 : 0;
+}
+
+static void remove_cpu_io_clamps(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	/* Remove the clamps on the CPU I/O signals */
+	reg = readl(&pmc->pmc_remove_clamping);
+	reg |= CPU_CLMP;
+	writel(reg, &pmc->pmc_remove_clamping);
+
+	/* Give I/O signals time to stabilize */
+	udelay(IO_STABILIZATION_DELAY);
+}
+
+static void powerup_cpu(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	if (!is_cpu_powered()) {
+		/* Toggle the CPU power state (OFF -> ON) */
+		reg = readl(&pmc->pmc_pwrgate_toggle);
+		reg &= PARTID_CP;
+		reg |= START_CP;
+		writel(reg, &pmc->pmc_pwrgate_toggle);
+
+		/* Wait for the power to come up */
+		while (!is_cpu_powered())
+			;			/* Do nothing */
+
+		/*
+		 * Remove the I/O clamps from CPU power partition.
+		 * Recommended only on a Warm boot, if the CPU partition gets
+		 * power gated. Shouldn't cause any harm when called after a
+		 * cold boot according to HW, probably just redundant.
+		 */
+		remove_cpu_io_clamps();
+	}
+}
+
+static void enable_cpu_power_rail(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	reg = readl(&pmc->pmc_cntrl);
+	reg |= CPUPWRREQ_OE;
+	writel(reg, &pmc->pmc_cntrl);
+
+	/*
+	 * The TI PMU65861C needs a 3.75ms delay between enabling
+	 * the power rail and enabling the CPU clock.  This delay
+	 * between SM1EN and SM1 is for switching time + the ramp
+	 * up of the voltage to the CPU (VDD_CPU from PMU).
+	 */
+	udelay(3750);
+}
+
+static void reset_A9_cpu(int reset)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 reg, cpu;
+
+	/*
+	* NOTE:  Regardless of whether the request is to hold the CPU in reset
+	*        or take it out of reset, every processor in the CPU complex
+	*        except the master (CPU 0) will be held in reset because the
+	*        AVP only talks to the master. The AVP does not know that there
+	*        are multiple processors in the CPU complex.
+	*/
+
+	/* Hold CPU 1 in reset */
+	cpu = SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1;
+	writel(cpu, &clkrst->crc_cpu_cmplx_set);
+
+	reg = readl(&clkrst->crc_rst_dev_l);
+	if (reset) {
+		/* Now place CPU0 into reset */
+		cpu |= SET_DBGRESET0 | SET_DERESET0 | SET_CPURESET0;
+		writel(cpu, &clkrst->crc_cpu_cmplx_set);
+
+		/* Enable master CPU reset */
+		reg |= SWR_CPU_RST;
+	} else {
+		/* Take CPU0 out of reset */
+		cpu = CLR_DBGRESET0 | CLR_DERESET0 | CLR_CPURESET0;
+		writel(cpu, &clkrst->crc_cpu_cmplx_clr);
+
+		/* Disable master CPU reset */
+		reg &= ~SWR_CPU_RST;
+	}
+
+	writel(reg, &clkrst->crc_rst_dev_l);
+}
+
+static void clock_enable_coresight(int enable)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 rst, clk, src;
+
+	rst = readl(&clkrst->crc_rst_dev_u);
+	clk = readl(&clkrst->crc_clk_out_enb_u);
+
+	if (enable) {
+		rst &= ~SWR_CSITE_RST;
+		clk |= CLK_ENB_CSITE;
+	} else {
+		rst |= SWR_CSITE_RST;
+		clk &= ~CLK_ENB_CSITE;
+	}
+
+	writel(clk, &clkrst->crc_clk_out_enb_u);
+	writel(rst, &clkrst->crc_rst_dev_u);
+
+	if (enable) {
+		/*
+		 * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
+		 *  1.5, giving an effective frequency of 144MHz.
+		 * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
+		 *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
+		 */
+		src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
+		writel(src, &clkrst->crc_clk_src_csite);
+
+		/* Unlock the CPU CoreSight interfaces */
+		rst = 0xC5ACCE55;
+		writel(rst, CSITE_CPU_DBG0_LAR);
+		writel(rst, CSITE_CPU_DBG1_LAR);
+	}
+}
+
+void start_cpu(u32 reset_vector)
+{
+	/* Enable VDD_CPU */
+	enable_cpu_power_rail();
+
+	/* Hold the CPUs in reset */
+	reset_A9_cpu(1);
+
+	/* Disable the CPU clock */
+	enable_cpu_clock(0);
+
+	/* Enable CoreSight */
+	clock_enable_coresight(1);
+
+	/*
+	 * Set the entry point for CPU execution from reset,
+	 *  if it's a non-zero value.
+	 */
+	if (reset_vector)
+		writel(reset_vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
+
+	/* Enable the CPU clock */
+	enable_cpu_clock(1);
+
+	/* If the CPU doesn't already have power, power it up */
+	if (!is_cpu_powered())
+		powerup_cpu();
+
+	/* Take the CPU out of reset */
+	reset_A9_cpu(0);
+}
+
+
+void halt_avp(void)
+{
+	for (;;) {
+		writel((HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
+			| HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29)),
+			FLOW_CTLR_HALT_COP_EVENTS);
+	}
+}
+
+void enable_scu(void)
+{
+	struct scu_ctlr *scu = (struct scu_ctlr *)NV_PA_ARM_PERIPHBASE;
+	u32 reg;
+
+	/* If SCU already setup/enabled, return */
+	if (readl(&scu->scu_ctrl) & SCU_CTRL_ENABLE)
+		return;
+
+	/* Invalidate all ways for all processors */
+	writel(0xFFFF, &scu->scu_inv_all);
+
+	/* Enable SCU - bit 0 */
+	reg = readl(&scu->scu_ctrl);
+	reg |= SCU_CTRL_ENABLE;
+	writel(reg, &scu->scu_ctrl);
+}
+
+void init_pmc_scratch(void)
+{
+	struct pmc_ctlr *const pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	int i;
+
+	/* SCRATCH0 is initialized by the boot ROM and shouldn't be cleared */
+	for (i = 0; i < 23; i++)
+		writel(0, &pmc->pmc_scratch1+i);
+
+	/* ODMDATA is for kernel use to determine RAM size, LP config, etc. */
+	writel(CONFIG_SYS_BOARD_ODMDATA, &pmc->pmc_scratch20);
+}
+
+void cpu_start(void)
+{
+	struct pmux_tri_ctlr *pmt = (struct pmux_tri_ctlr *)NV_PA_APB_MISC_BASE;
+
+	/* enable JTAG */
+	writel(0xC0, &pmt->pmt_cfg_ctl);
+
+	if (s_first_boot) {
+		/*
+		 * Need to set this before cold-booting,
+		 *  otherwise we'll end up in an infinite loop.
+		 */
+		s_first_boot = 0;
+		cold_boot();
+	}
+}
+
+void tegra2_start()
+{
+	if (s_first_boot) {
+		/* Init Debug UART Port (115200 8n1) */
+		uart_init();
+
+		/* Init PMC scratch memory */
+		init_pmc_scratch();
+	}
+
+#ifdef CONFIG_ENABLE_CORTEXA9
+	/* take the mpcore out of reset */
+	cpu_start();
+
+	/* configure cache */
+	cache_configure();
+#endif
+}
+
+extern ulong _armboot_start;
+u32 cpu_boot_stack = CPU_EARLY_BOOT_STACK_LIMIT;
+u32 avp_boot_stack = AVP_EARLY_BOOT_STACK_LIMIT;
+u32 proc_tag = PG_UP_TAG_0_PID_CPU & 0xFF;
+
+/*
+ * TBD: Move cold_boot() to assembly file.
+ * Values/offsets of the table vars make this difficult.
+ */
+
+void cold_boot(void)
+{
+	asm volatile(
+		"msr	cpsr_c, #0xD3	\n"
+		/*
+		* Check current processor: CPU or AVP?
+		* If CPU, go to CPU boot code, else continue on AVP path.
+		*/
+		"mov	r0, %0		\n"
+		"ldrb	r2, [r0, %1]	\n"
+		/* are we the CPU? */
+		"cmp	r2, %2		\n"
+		"mov	sp, %3		\n"
+		/*  yep, we are the CPU */
+		"bxeq	%4		\n"
+
+		/* AVP initialization follows this path */
+		"mov	sp, %5		\n"
+		/* Init and start CPU */
+		"b	startup_cpu	\n"
+		:
+		: "i"(NV_PA_PG_UP_BASE),
+		"i"(PG_UP_TAG_0),
+		"r"(proc_tag),
+		"r"(cpu_boot_stack),
+		"r"(_armboot_start),
+		"r"(avp_boot_stack)
+		: "r0", "r2", "cc", "lr"
+	);
+}
diff --git a/arch/arm/cpu/armv7/tegra2/ap20.h b/arch/arm/cpu/armv7/tegra2/ap20.h
new file mode 100644
index 0000000..e463d68
--- /dev/null
+++ b/arch/arm/cpu/armv7/tegra2/ap20.h
@@ -0,0 +1,105 @@ 
+/*
+ * (C) Copyright 2010-2011
+ * NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+#include <asm/types.h>
+
+/* Stabilization delays, in usec */
+#define PLL_STABILIZATION_DELAY (300)
+#define IO_STABILIZATION_DELAY	(1000)
+
+#define NVBL_PLLP_KHZ	(216000)
+
+#define PLLX_ENABLED		(1 << 30)
+#define CCLK_BURST_POLICY	0x20008888
+#define SUPER_CCLK_DIVIDER	0x80000000
+
+/* Calculate clock fractional divider value from ref and target frequencies */
+#define CLK_DIVIDER(REF, FREQ)  ((((REF) * 2) / FREQ) - 2)
+
+/* Calculate clock frequency value from reference and clock divider value */
+#define CLK_FREQUENCY(REF, REG)  (((REF) * 2) / (REG + 2))
+
+/* AVP/CPU ID */
+#define PG_UP_TAG_0_PID_CPU	0x55555555	/* CPU aka "a9" aka "mpcore" */
+#define PG_UP_TAG_0             0x0
+
+#define CORESIGHT_UNLOCK	0xC5ACCE55;
+
+/* AP20-Specific Base Addresses */
+
+/* AP20 Base physical address of SDRAM. */
+#define AP20_BASE_PA_SDRAM      0x00000000
+/* AP20 Base physical address of internal SRAM. */
+#define AP20_BASE_PA_SRAM       0x40000000
+/* AP20 Size of internal SRAM (256KB). */
+#define AP20_BASE_PA_SRAM_SIZE  0x00040000
+/* AP20 Base physical address of flash. */
+#define AP20_BASE_PA_NOR_FLASH  0xD0000000
+/* AP20 Base physical address of boot information table. */
+#define AP20_BASE_PA_BOOT_INFO  AP20_BASE_PA_SRAM
+
+/*
+ * Super-temporary stacks for EXTREMELY early startup. The values chosen for
+ * these addresses must be valid on ALL SOCs because this value is used before
+ * we are able to differentiate between the SOC types.
+ *
+ * NOTE: The since CPU's stack will eventually be moved from IRAM to SDRAM, its
+ *       stack is placed below the AVP stack. Once the CPU stack has been moved,
+ *       the AVP is free to use the IRAM the CPU stack previously occupied if
+ *       it should need to do so.
+ *
+ * NOTE: In multi-processor CPU complex configurations, each processor will have
+ *       its own stack of size CPU_EARLY_BOOT_STACK_SIZE. CPU 0 will have a
+ *       limit of CPU_EARLY_BOOT_STACK_LIMIT. Each successive CPU will have a
+ *       stack limit that is CPU_EARLY_BOOT_STACK_SIZE less then the previous
+ *       CPU.
+ */
+
+/* Common AVP early boot stack limit */
+#define AVP_EARLY_BOOT_STACK_LIMIT	\
+	(AP20_BASE_PA_SRAM + (AP20_BASE_PA_SRAM_SIZE/2))
+/* Common AVP early boot stack size */
+#define AVP_EARLY_BOOT_STACK_SIZE	0x1000
+/* Common CPU early boot stack limit */
+#define CPU_EARLY_BOOT_STACK_LIMIT	\
+	(AVP_EARLY_BOOT_STACK_LIMIT - AVP_EARLY_BOOT_STACK_SIZE)
+/* Common CPU early boot stack size */
+#define CPU_EARLY_BOOT_STACK_SIZE	0x1000
+
+#define EXCEP_VECTOR_CPU_RESET_VECTOR	(NV_PA_EVP_BASE + 0x100)
+#define CSITE_CPU_DBG0_LAR		(NV_PA_CSITE_BASE + 0x10FB0)
+#define CSITE_CPU_DBG1_LAR		(NV_PA_CSITE_BASE + 0x12FB0)
+
+#define FLOW_CTLR_HALT_COP_EVENTS	(NV_PA_FLOW_BASE + 4)
+#define FLOW_MODE_STOP			2
+#define HALT_COP_EVENT_JTAG		(1 << 28)
+#define HALT_COP_EVENT_IRQ_1		(1 << 11)
+#define HALT_COP_EVENT_FIQ_1		(1 << 9)
+
+/* Prototypes */
+
+void tegra2_start(void);
+void uart_init(void);
+void udelay(unsigned long);
+void cold_boot(void);
+void cache_configure(void);
+
diff --git a/arch/arm/cpu/armv7/tegra2/lowlevel_init.S b/arch/arm/cpu/armv7/tegra2/lowlevel_init.S
index 7f15746..8b2a925 100644
--- a/arch/arm/cpu/armv7/tegra2/lowlevel_init.S
+++ b/arch/arm/cpu/armv7/tegra2/lowlevel_init.S
@@ -26,6 +26,7 @@ 
 #include <config.h>
 #include <version.h>
 
+
 _TEXT_BASE:
 	.word	CONFIG_SYS_TEXT_BASE	@ sdram load addr from config file
 
@@ -58,6 +59,75 @@  lowlevel_init:
 
 	mov	pc, lr				@ back to arch calling code
 
+
+.globl startup_cpu
+startup_cpu:
+	@ Initialize the AVP, clocks, and memory controller
+	@ SDRAM is guaranteed to be on at this point
+
+	ldr     r0, =cold_boot			@ R0 = reset vector for CPU
+	bl      start_cpu			@ start the CPU
+
+	@ Transfer control to the AVP code */
+	bl      halt_avp
+
+	@ Should never get here
+_loop_forever2:
+	b	_loop_forever2
+
+.globl cache_configure
+cache_configure:
+	stmdb r13!,{r14}
+@	/* invalidate instruction cache */
+	mov r1, #0
+	mcr p15, 0, r1, c7, c5, 0
+
+@	/* invalidate the i&d tlb entries */
+	mcr p15, 0, r1, c8, c5, 0
+	mcr p15, 0, r1, c8, c6, 0
+
+@	/* enable instruction cache */
+	mrc  p15, 0, r1, c1, c0, 0
+	orr  r1, r1, #(1<<12)
+	mcr  p15, 0, r1, c1, c0, 0
+
+	bl enable_scu
+
+@	/* enable SMP mode and FW for CPU0, by writing to Auxiliary Ctl reg */
+	mrc p15, 0, r0, c1, c0, 1
+	orr r0, r0, #0x41
+	mcr p15, 0, r0, c1, c0, 1
+
+@	/* Now flush the Dcache */
+	mov r0, #0
+@	/* 256 cache lines */
+	mov r1, #256
+
+invalidate_loop:
+	add r1, r1, #-1
+	mov r0, r1, lsl #5
+@	/* invalidate d-cache using line (way0) */
+	mcr p15, 0, r0, c7, c6, 2
+
+	orr r2, r0, #(1<<30)
+@	/* invalidate d-cache using line (way1) */
+	mcr p15, 0, r2, c7, c6, 2
+
+	orr r2, r0, #(2<<30)
+@	/* invalidate d-cache using line (way2) */
+	mcr p15, 0, r2, c7, c6, 2
+
+	orr r2, r0, #(3<<30)
+@	/* invalidate d-cache using line (way3) */
+	mcr p15, 0, r2, c7, c6, 2
+	cmp r1, #0
+	bne invalidate_loop
+
+@	/* FIXME: should have ap20's L2 disabled too */
+invalidate_done:
+	ldmia r13!,{pc}
+
+
 	@ the literal pools origin
 	.ltorg
 
diff --git a/arch/arm/include/asm/arch-tegra2/clk_rst.h b/arch/arm/include/asm/arch-tegra2/clk_rst.h
index 6d573bf..d67a5d7 100644
--- a/arch/arm/include/asm/arch-tegra2/clk_rst.h
+++ b/arch/arm/include/asm/arch-tegra2/clk_rst.h
@@ -149,6 +149,9 @@  struct clk_rst_ctlr {
 	uint crc_clk_src_csite;		/*_CSITE_0,		0x1D4 */
 	uint crc_reserved19[9];		/*			0x1D8-1F8 */
 	uint crc_clk_src_osc;		/*_OSC_0,		0x1FC */
+	uint crc_reserved20[80];	/*			0x200-33C */
+	uint crc_cpu_cmplx_set;		/* _CPU_CMPLX_SET_0,	0x340 */
+	uint crc_cpu_cmplx_clr;		/* _CPU_CMPLX_CLR_0,	0x344 */
 };
 
 #define PLL_BYPASS		(1 << 31)
@@ -162,4 +165,28 @@  struct clk_rst_ctlr {
 #define SWR_UARTA_RST		(1 << 6)
 #define CLK_ENB_UARTA		(1 << 6)
 
+#define SWR_CPU_RST		(1 << 0)
+#define CLK_ENB_CPU		(1 << 0)
+#define SWR_CSITE_RST		(1 << 9)
+#define CLK_ENB_CSITE		(1 << 9)
+
+#define SET_CPURESET0		(1 << 0)
+#define SET_DERESET0		(1 << 4)
+#define SET_DBGRESET0		(1 << 12)
+
+#define SET_CPURESET1		(1 << 1)
+#define SET_DERESET1		(1 << 5)
+#define SET_DBGRESET1		(1 << 13)
+
+#define CLR_CPURESET0		(1 << 0)
+#define CLR_DERESET0		(1 << 4)
+#define CLR_DBGRESET0		(1 << 12)
+
+#define CLR_CPURESET1		(1 << 1)
+#define CLR_DERESET1		(1 << 5)
+#define CLR_DBGRESET1		(1 << 13)
+
+#define CPU0_CLK_STP		(1 << 8)
+#define CPU1_CLK_STP		(1 << 9)
+
 #endif	/* CLK_RST_H */
diff --git a/arch/arm/include/asm/arch-tegra2/pmc.h b/arch/arm/include/asm/arch-tegra2/pmc.h
index 7ec9eeb..b1d47cd 100644
--- a/arch/arm/include/asm/arch-tegra2/pmc.h
+++ b/arch/arm/include/asm/arch-tegra2/pmc.h
@@ -121,4 +121,12 @@  struct pmc_ctlr {
 	uint pmc_gate;			/* _GATE_0, offset 15C */
 };
 
+#define CPU_PWRED	1
+#define CPU_CLMP	1
+
+#define PARTID_CP	0xFFFFFFF8
+#define START_CP	(1 << 8)
+
+#define CPUPWRREQ_OE	(1 << 16)
+
 #endif	/* PMC_H */
diff --git a/arch/arm/include/asm/arch-tegra2/scu.h b/arch/arm/include/asm/arch-tegra2/scu.h
new file mode 100644
index 0000000..787ded0
--- /dev/null
+++ b/arch/arm/include/asm/arch-tegra2/scu.h
@@ -0,0 +1,43 @@ 
+/*
+ *  (C) Copyright 2010,2011
+ *  NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _SCU_H_
+#define _SCU_H_
+
+/* ARM Snoop Control Unit (SCU) registers */
+struct scu_ctlr {
+	uint scu_ctrl;		/* SCU Control Register, offset 00 */
+	uint scu_cfg;		/* SCU Config Register, offset 04 */
+	uint scu_cpu_pwr_stat;	/* SCU CPU Power Status Register, offset 08 */
+	uint scu_inv_all;	/* SCU Invalidate All Register, offset 0C */
+	uint scu_reserved0[12];	/* reserved, offset 10-3C */
+	uint scu_filt_start;	/* SCU Filtering Start Address Reg, offset 40 */
+	uint scu_filt_end;	/* SCU Filtering End Address Reg, offset 44 */
+	uint scu_reserved1[2];	/* reserved, offset 48-4C */
+	uint scu_acc_ctl;	/* SCU Access Control Register, offset 50 */
+	uint scu_ns_acc_ctl;	/* SCU Non-secure Access Cntrl Reg, offset 54 */
+};
+
+#define SCU_CTRL_ENABLE		(1 << 0)
+
+#endif	/* SCU_H */
diff --git a/arch/arm/include/asm/arch-tegra2/tegra2.h b/arch/arm/include/asm/arch-tegra2/tegra2.h
index 9001b68..5813cd9 100644
--- a/arch/arm/include/asm/arch-tegra2/tegra2.h
+++ b/arch/arm/include/asm/arch-tegra2/tegra2.h
@@ -25,8 +25,12 @@ 
 #define _TEGRA2_H_
 
 #define NV_PA_SDRAM_BASE	0x00000000
+#define NV_PA_ARM_PERIPHBASE	0x50040000
+#define NV_PA_PG_UP_BASE	0x60000000
 #define NV_PA_TMRUS_BASE	0x60005010
 #define NV_PA_CLK_RST_BASE	0x60006000
+#define NV_PA_FLOW_BASE		0x60007000
+#define NV_PA_EVP_BASE		0x6000F000
 #define NV_PA_APB_MISC_BASE	0x70000000
 #define NV_PA_APB_UARTA_BASE	(NV_PA_APB_MISC_BASE + 0x6000)
 #define NV_PA_APB_UARTB_BASE	(NV_PA_APB_MISC_BASE + 0x6040)
@@ -34,6 +38,7 @@ 
 #define NV_PA_APB_UARTD_BASE	(NV_PA_APB_MISC_BASE + 0x6300)
 #define NV_PA_APB_UARTE_BASE	(NV_PA_APB_MISC_BASE + 0x6400)
 #define NV_PA_PMC_BASE		0x7000E400
+#define NV_PA_CSITE_BASE	0x70040000
 
 #define TEGRA2_SDRC_CS0		NV_PA_SDRAM_BASE
 #define LOW_LEVEL_SRAM_STACK	0x4000FFFC
diff --git a/board/nvidia/common/board.c b/board/nvidia/common/board.c
index b2c412c..078547b 100644
--- a/board/nvidia/common/board.c
+++ b/board/nvidia/common/board.c
@@ -30,6 +30,7 @@ 
 #include <asm/arch/clk_rst.h>
 #include <asm/arch/pinmux.h>
 #include <asm/arch/uart.h>
+#include "board.h"
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -37,6 +38,15 @@  const struct tegra2_sysinfo sysinfo = {
 	CONFIG_TEGRA2_BOARD_STRING
 };
 
+#ifdef CONFIG_BOARD_EARLY_INIT_F
+int board_early_init_f(void)
+{
+	debug("Board Early Init\n");
+	tegra2_start();
+	return 0;
+}
+#endif	/* EARLY_INIT */
+
 /*
  * Routine: timer_init
  * Description: init the timestamp and lastinc value
diff --git a/board/nvidia/common/board.h b/board/nvidia/common/board.h
new file mode 100644
index 0000000..47c7885
--- /dev/null
+++ b/board/nvidia/common/board.h
@@ -0,0 +1,29 @@ 
+/*
+ *  (C) Copyright 2010,2011
+ *  NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _BOARD_H_
+#define _BOARD_H_
+
+void tegra2_start(void);
+
+#endif	/* BOARD_H */
diff --git a/include/configs/harmony.h b/include/configs/harmony.h
index d004f31..34bd899 100644
--- a/include/configs/harmony.h
+++ b/include/configs/harmony.h
@@ -46,4 +46,5 @@ 
 #define CONFIG_MACH_TYPE		MACH_TYPE_HARMONY
 #define CONFIG_SYS_BOARD_ODMDATA	0x300d8011 /* lp1, 1GB */
 
+#define CONFIG_BOARD_EARLY_INIT_F
 #endif /* __CONFIG_H */
diff --git a/include/configs/seaboard.h b/include/configs/seaboard.h
index fd87560..06ce3e2 100644
--- a/include/configs/seaboard.h
+++ b/include/configs/seaboard.h
@@ -37,7 +37,8 @@ 
 #define CONFIG_TEGRA2_ENABLE_UARTD
 #define CONFIG_SYS_NS16550_COM1		NV_PA_APB_UARTD_BASE
 
-#define CONFIG_MACH_TYPE		MACH_TYPE_TEGRA_SEABOARD
+#define CONFIG_MACH_TYPE		MACH_TYPE_SEABOARD
 #define CONFIG_SYS_BOARD_ODMDATA	0x300d8011 /* lp1, 1GB */
 
+#define CONFIG_BOARD_EARLY_INIT_F
 #endif /* __CONFIG_H */
diff --git a/include/configs/tegra2-common.h b/include/configs/tegra2-common.h
index 4f4374a..2924325 100644
--- a/include/configs/tegra2-common.h
+++ b/include/configs/tegra2-common.h
@@ -33,6 +33,8 @@ 
 #define CONFIG_MACH_TEGRA_GENERIC	/* which is a Tegra generic machine */
 #define CONFIG_L2_OFF			/* No L2 cache */
 
+#define CONFIG_ENABLE_CORTEXA9		/* enable CPU (A9 complex) */
+
 #include <asm/arch/tegra2.h>		/* get chip and board defs */
 
 /*