arm: Tegra2: add support for A9 CPU init

Signed-off-by: Tom Warren <twarren@nvidia.com>
diff --git a/arch/arm/cpu/armv7/start.S b/arch/arm/cpu/armv7/start.S
index 6387b8b..2929fc7 100644
--- a/arch/arm/cpu/armv7/start.S
+++ b/arch/arm/cpu/armv7/start.S
@@ -70,6 +70,18 @@
 _TEXT_BASE:
 	.word	CONFIG_SYS_TEXT_BASE
 
+#ifdef CONFIG_TEGRA2
+/*
+ * Tegra2 uses 2 separate CPUs - the AVP (ARM7TDMI) and the CPU (dual A9s).
+ * U-Boot runs on the AVP first, setting things up for the CPU (PLLs,
+ * muxes, clocks, clamps, etc.). Then the AVP halts, and expects the CPU
+ * to pick up its reset vector, which points here.
+ */
+.globl _armboot_start
+_armboot_start:
+        .word _start
+#endif
+
 /*
  * These are defined in the board-specific linker script.
  */
diff --git a/arch/arm/cpu/armv7/tegra2/Makefile b/arch/arm/cpu/armv7/tegra2/Makefile
index 687c887..f1ea915 100644
--- a/arch/arm/cpu/armv7/tegra2/Makefile
+++ b/arch/arm/cpu/armv7/tegra2/Makefile
@@ -28,7 +28,7 @@
 LIB	=  $(obj)lib$(SOC).o
 
 SOBJS	:= lowlevel_init.o
-COBJS	:= board.o sys_info.o timer.o
+COBJS	:= ap20.o board.o sys_info.o timer.o
 
 SRCS	:= $(SOBJS:.o=.S) $(COBJS:.o=.c)
 OBJS	:= $(addprefix $(obj),$(COBJS) $(SOBJS))
diff --git a/arch/arm/cpu/armv7/tegra2/ap20.c b/arch/arm/cpu/armv7/tegra2/ap20.c
new file mode 100644
index 0000000..d3e6797
--- /dev/null
+++ b/arch/arm/cpu/armv7/tegra2/ap20.c
@@ -0,0 +1,329 @@
+/*
+* (C) Copyright 2010-2011
+* NVIDIA Corporation <www.nvidia.com>
+*
+* See file CREDITS for list of people who contributed to this
+* project.
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of
+* the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+* MA 02111-1307 USA
+*/
+
+#include "ap20.h"
+#include <asm/io.h>
+#include <asm/arch/tegra2.h>
+#include <asm/arch/clk_rst.h>
+#include <asm/arch/pmc.h>
+#include <asm/arch/pinmux.h>
+#include <asm/arch/scu.h>
+#include <common.h>
+
+u32 s_first_boot = 1;
+
+static void enable_cpu_clock(int enable)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 reg, clk;
+
+	/*
+	 * NOTE:
+	 * Regardless of whether the request is to enable or disable the CPU
+	 * clock, every processor in the CPU complex except the master (CPU 0)
+	 * will have it's clock stopped because the AVP only talks to the
+	 * master. The AVP does not know (nor does it need to know) that there
+	 * are multiple processors in the CPU complex.
+	 */
+
+	if (enable) {
+		/* Wait until all clocks are stable */
+		udelay(PLL_STABILIZATION_DELAY);
+
+		writel(CCLK_BURST_POLICY, &clkrst->crc_cclk_brst_pol);
+		writel(SUPER_CCLK_DIVIDER, &clkrst->crc_super_cclk_div);
+	}
+
+	/* Fetch the register containing the main CPU complex clock enable */
+	reg = readl(&clkrst->crc_clk_out_enb_l);
+	reg |= CLK_ENB_CPU;
+
+	/*
+	 * Read the register containing the individual CPU clock enables and
+	 * always stop the clock to CPU 1.
+	 */
+	clk = readl(&clkrst->crc_clk_cpu_cmplx);
+	clk |= CPU1_CLK_STP;
+
+	if (enable) {
+		/* Unstop the CPU clock */
+		clk &= ~CPU0_CLK_STP;
+	} else {
+		/* Stop the CPU clock */
+		clk |= CPU0_CLK_STP;
+	}
+
+	writel(clk, &clkrst->crc_clk_cpu_cmplx);
+	writel(reg, &clkrst->crc_clk_out_enb_l);
+}
+
+static int is_cpu_powered(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+
+	return (readl(&pmc->pmc_pwrgate_status) & CPU_PWRED) ? 1 : 0;
+}
+
+static void remove_cpu_io_clamps(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	/* Remove the clamps on the CPU I/O signals */
+	reg = readl(&pmc->pmc_remove_clamping);
+	reg |= CPU_CLMP;
+	writel(reg, &pmc->pmc_remove_clamping);
+
+	/* Give I/O signals time to stabilize */
+	udelay(IO_STABILIZATION_DELAY);
+}
+
+static void powerup_cpu(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+	int timeout = IO_STABILIZATION_DELAY;
+
+	if (!is_cpu_powered()) {
+		/* Toggle the CPU power state (OFF -> ON) */
+		reg = readl(&pmc->pmc_pwrgate_toggle);
+		reg &= PARTID_CP;
+		reg |= START_CP;
+		writel(reg, &pmc->pmc_pwrgate_toggle);
+
+		/* Wait for the power to come up */
+		while (!is_cpu_powered()) {
+			if (timeout-- == 0)
+				printf("CPU failed to power up!\n");
+			else
+				udelay(10);
+		}
+
+		/*
+		 * Remove the I/O clamps from CPU power partition.
+		 * Recommended only on a Warm boot, if the CPU partition gets
+		 * power gated. Shouldn't cause any harm when called after a
+		 * cold boot according to HW, probably just redundant.
+		 */
+		remove_cpu_io_clamps();
+	}
+}
+
+static void enable_cpu_power_rail(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	reg = readl(&pmc->pmc_cntrl);
+	reg |= CPUPWRREQ_OE;
+	writel(reg, &pmc->pmc_cntrl);
+
+	/*
+	 * The TI PMU65861C needs a 3.75ms delay between enabling
+	 * the power rail and enabling the CPU clock.  This delay
+	 * between SM1EN and SM1 is for switching time + the ramp
+	 * up of the voltage to the CPU (VDD_CPU from PMU).
+	 */
+	udelay(3750);
+}
+
+static void reset_A9_cpu(int reset)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 reg, cpu;
+
+	/*
+	* NOTE:  Regardless of whether the request is to hold the CPU in reset
+	*        or take it out of reset, every processor in the CPU complex
+	*        except the master (CPU 0) will be held in reset because the
+	*        AVP only talks to the master. The AVP does not know that there
+	*        are multiple processors in the CPU complex.
+	*/
+
+	/* Hold CPU 1 in reset */
+	cpu = SET_DBGRESET1 | SET_DERESET1 | SET_CPURESET1;
+	writel(cpu, &clkrst->crc_cpu_cmplx_set);
+
+	reg = readl(&clkrst->crc_rst_dev_l);
+	if (reset) {
+		/* Now place CPU0 into reset */
+		cpu |= SET_DBGRESET0 | SET_DERESET0 | SET_CPURESET0;
+		writel(cpu, &clkrst->crc_cpu_cmplx_set);
+
+		/* Enable master CPU reset */
+		reg |= SWR_CPU_RST;
+	} else {
+		/* Take CPU0 out of reset */
+		cpu = CLR_DBGRESET0 | CLR_DERESET0 | CLR_CPURESET0;
+		writel(cpu, &clkrst->crc_cpu_cmplx_clr);
+
+		/* Disable master CPU reset */
+		reg &= ~SWR_CPU_RST;
+	}
+
+	writel(reg, &clkrst->crc_rst_dev_l);
+}
+
+static void clock_enable_coresight(int enable)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 rst, clk, src;
+
+	rst = readl(&clkrst->crc_rst_dev_u);
+	clk = readl(&clkrst->crc_clk_out_enb_u);
+
+	if (enable) {
+		rst &= ~SWR_CSITE_RST;
+		clk |= CLK_ENB_CSITE;
+	} else {
+		rst |= SWR_CSITE_RST;
+		clk &= ~CLK_ENB_CSITE;
+	}
+
+	writel(clk, &clkrst->crc_clk_out_enb_u);
+	writel(rst, &clkrst->crc_rst_dev_u);
+
+	if (enable) {
+		/*
+		 * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
+		 *  1.5, giving an effective frequency of 144MHz.
+		 * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
+		 *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
+		 */
+		src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
+		writel(src, &clkrst->crc_clk_src_csite);
+
+		/* Unlock the CPU CoreSight interfaces */
+		rst = 0xC5ACCE55;
+		writel(rst, CSITE_CPU_DBG0_LAR);
+		writel(rst, CSITE_CPU_DBG1_LAR);
+	}
+}
+
+void start_cpu(u32 reset_vector)
+{
+	/* Enable VDD_CPU */
+	enable_cpu_power_rail();
+
+	/* Hold the CPUs in reset */
+	reset_A9_cpu(1);
+
+	/* Disable the CPU clock */
+	enable_cpu_clock(0);
+
+	/* Enable CoreSight */
+	clock_enable_coresight(1);
+
+	/*
+	 * Set the entry point for CPU execution from reset,
+	 *  if it's a non-zero value.
+	 */
+	if (reset_vector)
+		writel(reset_vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
+
+	/* Enable the CPU clock */
+	enable_cpu_clock(1);
+
+	/* If the CPU doesn't already have power, power it up */
+	powerup_cpu();
+
+	/* Take the CPU out of reset */
+	reset_A9_cpu(0);
+}
+
+
+void halt_avp(void)
+{
+	for (;;) {
+		writel((HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
+			| HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29)),
+			FLOW_CTLR_HALT_COP_EVENTS);
+	}
+}
+
+void enable_scu(void)
+{
+	struct scu_ctlr *scu = (struct scu_ctlr *)NV_PA_ARM_PERIPHBASE;
+	u32 reg;
+
+	/* If SCU already setup/enabled, return */
+	if (readl(&scu->scu_ctrl) & SCU_CTRL_ENABLE)
+		return;
+
+	/* Invalidate all ways for all processors */
+	writel(0xFFFF, &scu->scu_inv_all);
+
+	/* Enable SCU - bit 0 */
+	reg = readl(&scu->scu_ctrl);
+	reg |= SCU_CTRL_ENABLE;
+	writel(reg, &scu->scu_ctrl);
+}
+
+void init_pmc_scratch(void)
+{
+	struct pmc_ctlr *const pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	int i;
+
+	/* SCRATCH0 is initialized by the boot ROM and shouldn't be cleared */
+	for (i = 0; i < 23; i++)
+		writel(0, &pmc->pmc_scratch1+i);
+
+	/* ODMDATA is for kernel use to determine RAM size, LP config, etc. */
+	writel(CONFIG_SYS_BOARD_ODMDATA, &pmc->pmc_scratch20);
+}
+
+void cpu_start(void)
+{
+	struct pmux_tri_ctlr *pmt = (struct pmux_tri_ctlr *)NV_PA_APB_MISC_BASE;
+
+	/* enable JTAG */
+	writel(0xC0, &pmt->pmt_cfg_ctl);
+
+	if (s_first_boot) {
+		/*
+		 * Need to set this before cold-booting,
+		 *  otherwise we'll end up in an infinite loop.
+		 */
+		s_first_boot = 0;
+		cold_boot();
+	}
+}
+
+void tegra2_start()
+{
+	if (s_first_boot) {
+		/* Init Debug UART Port (115200 8n1) */
+		uart_init();
+
+		/* Init PMC scratch memory */
+		init_pmc_scratch();
+	}
+
+#ifdef CONFIG_ENABLE_CORTEXA9
+	/* take the mpcore out of reset */
+	cpu_start();
+
+	/* configure cache */
+	cache_configure();
+#endif
+}
diff --git a/arch/arm/cpu/armv7/tegra2/ap20.h b/arch/arm/cpu/armv7/tegra2/ap20.h
new file mode 100644
index 0000000..49fe340
--- /dev/null
+++ b/arch/arm/cpu/armv7/tegra2/ap20.h
@@ -0,0 +1,104 @@
+/*
+ * (C) Copyright 2010-2011
+ * NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+#include <asm/types.h>
+
+/* Stabilization delays, in usec */
+#define PLL_STABILIZATION_DELAY (300)
+#define IO_STABILIZATION_DELAY	(1000)
+
+#define NVBL_PLLP_KHZ	(216000)
+
+#define PLLX_ENABLED		(1 << 30)
+#define CCLK_BURST_POLICY	0x20008888
+#define SUPER_CCLK_DIVIDER	0x80000000
+
+/* Calculate clock fractional divider value from ref and target frequencies */
+#define CLK_DIVIDER(REF, FREQ)  ((((REF) * 2) / FREQ) - 2)
+
+/* Calculate clock frequency value from reference and clock divider value */
+#define CLK_FREQUENCY(REF, REG)  (((REF) * 2) / (REG + 2))
+
+/* AVP/CPU ID */
+#define PG_UP_TAG_0_PID_CPU	0x55555555	/* CPU aka "a9" aka "mpcore" */
+#define PG_UP_TAG_0             0x0
+
+#define CORESIGHT_UNLOCK	0xC5ACCE55;
+
+/* AP20-Specific Base Addresses */
+
+/* AP20 Base physical address of SDRAM. */
+#define AP20_BASE_PA_SDRAM      0x00000000
+/* AP20 Base physical address of internal SRAM. */
+#define AP20_BASE_PA_SRAM       0x40000000
+/* AP20 Size of internal SRAM (256KB). */
+#define AP20_BASE_PA_SRAM_SIZE  0x00040000
+/* AP20 Base physical address of flash. */
+#define AP20_BASE_PA_NOR_FLASH  0xD0000000
+/* AP20 Base physical address of boot information table. */
+#define AP20_BASE_PA_BOOT_INFO  AP20_BASE_PA_SRAM
+
+/*
+ * Super-temporary stacks for EXTREMELY early startup. The values chosen for
+ * these addresses must be valid on ALL SOCs because this value is used before
+ * we are able to differentiate between the SOC types.
+ *
+ * NOTE: The since CPU's stack will eventually be moved from IRAM to SDRAM, its
+ *       stack is placed below the AVP stack. Once the CPU stack has been moved,
+ *       the AVP is free to use the IRAM the CPU stack previously occupied if
+ *       it should need to do so.
+ *
+ * NOTE: In multi-processor CPU complex configurations, each processor will have
+ *       its own stack of size CPU_EARLY_BOOT_STACK_SIZE. CPU 0 will have a
+ *       limit of CPU_EARLY_BOOT_STACK_LIMIT. Each successive CPU will have a
+ *       stack limit that is CPU_EARLY_BOOT_STACK_SIZE less then the previous
+ *       CPU.
+ */
+
+/* Common AVP early boot stack limit */
+#define AVP_EARLY_BOOT_STACK_LIMIT	\
+	(AP20_BASE_PA_SRAM + (AP20_BASE_PA_SRAM_SIZE/2))
+/* Common AVP early boot stack size */
+#define AVP_EARLY_BOOT_STACK_SIZE	0x1000
+/* Common CPU early boot stack limit */
+#define CPU_EARLY_BOOT_STACK_LIMIT	\
+	(AVP_EARLY_BOOT_STACK_LIMIT - AVP_EARLY_BOOT_STACK_SIZE)
+/* Common CPU early boot stack size */
+#define CPU_EARLY_BOOT_STACK_SIZE	0x1000
+
+#define EXCEP_VECTOR_CPU_RESET_VECTOR	(NV_PA_EVP_BASE + 0x100)
+#define CSITE_CPU_DBG0_LAR		(NV_PA_CSITE_BASE + 0x10FB0)
+#define CSITE_CPU_DBG1_LAR		(NV_PA_CSITE_BASE + 0x12FB0)
+
+#define FLOW_CTLR_HALT_COP_EVENTS	(NV_PA_FLOW_BASE + 4)
+#define FLOW_MODE_STOP			2
+#define HALT_COP_EVENT_JTAG		(1 << 28)
+#define HALT_COP_EVENT_IRQ_1		(1 << 11)
+#define HALT_COP_EVENT_FIQ_1		(1 << 9)
+
+/* Prototypes */
+
+void tegra2_start(void);
+void uart_init(void);
+void udelay(unsigned long);
+void cold_boot(void);
+void cache_configure(void);
diff --git a/arch/arm/cpu/armv7/tegra2/lowlevel_init.S b/arch/arm/cpu/armv7/tegra2/lowlevel_init.S
index 7f15746..f24a2ff 100644
--- a/arch/arm/cpu/armv7/tegra2/lowlevel_init.S
+++ b/arch/arm/cpu/armv7/tegra2/lowlevel_init.S
@@ -26,6 +26,7 @@
 #include <config.h>
 #include <version.h>
 
+
 _TEXT_BASE:
 	.word	CONFIG_SYS_TEXT_BASE	@ sdram load addr from config file
 
@@ -58,8 +59,101 @@
 
 	mov	pc, lr				@ back to arch calling code
 
+
+.globl startup_cpu
+startup_cpu:
+	@ Initialize the AVP, clocks, and memory controller
+	@ SDRAM is guaranteed to be on at this point
+
+	ldr     r0, =cold_boot			@ R0 = reset vector for CPU
+	bl      start_cpu			@ start the CPU
+
+	@ Transfer control to the AVP code
+	bl      halt_avp
+
+	@ Should never get here
+_loop_forever2:
+	b	_loop_forever2
+
+.globl cache_configure
+cache_configure:
+	stmdb	r13!,{r14}
+	@ invalidate instruction cache
+	mov	r1, #0
+	mcr	p15, 0, r1, c7, c5, 0
+
+	@ invalidate the i&d tlb entries
+	mcr	p15, 0, r1, c8, c5, 0
+	mcr	p15, 0, r1, c8, c6, 0
+
+	@ enable instruction cache
+	mrc	p15, 0, r1, c1, c0, 0
+	orr	r1, r1, #(1<<12)
+	mcr	p15, 0, r1, c1, c0, 0
+
+	bl	enable_scu
+
+	@ enable SMP mode and FW for CPU0, by writing to Auxiliary Ctl reg
+	mrc	p15, 0, r0, c1, c0, 1
+	orr	r0, r0, #0x41
+	mcr	p15, 0, r0, c1, c0, 1
+
+	@ Now flush the Dcache
+	mov	r0, #0
+	@ 256 cache lines
+	mov	r1, #256
+
+invalidate_loop:
+	add	r1, r1, #-1
+	mov	r0, r1, lsl #5
+	@ invalidate d-cache using line (way0)
+	mcr	p15, 0, r0, c7, c6, 2
+
+	orr	r2, r0, #(1<<30)
+	@ invalidate d-cache using line (way1)
+	mcr	p15, 0, r2, c7, c6, 2
+
+	orr	r2, r0, #(2<<30)
+	@ invalidate d-cache using line (way2)
+	mcr	p15, 0, r2, c7, c6, 2
+
+	orr	r2, r0, #(3<<30)
+	@ invalidate d-cache using line (way3)
+	mcr	p15, 0, r2, c7, c6, 2
+	cmp	r1, #0
+	bne	invalidate_loop
+
+	@ FIXME: should have ap20's L2 disabled too?
+invalidate_done:
+	ldmia	r13!,{pc}
+
+.globl cold_boot
+cold_boot:
+	msr	cpsr_c, #0xD3
+	@ Check current processor: CPU or AVP?
+	@  If CPU, go to CPU boot code, else continue on AVP path
+
+	ldr	r0, =NV_PA_PG_UP_BASE
+	ldr	r1, [r0]
+	ldr	r2, =PG_UP_TAG_AVP
+
+	@ are we the CPU?
+	ldr	sp, CPU_STACK
+	cmp	r1, r2
+	@ yep, we are the CPU
+	bne	_armboot_start
+
+	@ AVP initialization follows this path
+	ldr	sp, AVP_STACK
+	@ Init AVP and start CPU
+	b	startup_cpu
+
 	@ the literal pools origin
 	.ltorg
 
 SRAM_STACK:
 	.word LOW_LEVEL_SRAM_STACK
+AVP_STACK:
+	.word EARLY_AVP_STACK
+CPU_STACK:
+	.word EARLY_CPU_STACK
diff --git a/arch/arm/include/asm/arch-tegra2/clk_rst.h b/arch/arm/include/asm/arch-tegra2/clk_rst.h
index 6d573bf..d67a5d7 100644
--- a/arch/arm/include/asm/arch-tegra2/clk_rst.h
+++ b/arch/arm/include/asm/arch-tegra2/clk_rst.h
@@ -149,6 +149,9 @@
 	uint crc_clk_src_csite;		/*_CSITE_0,		0x1D4 */
 	uint crc_reserved19[9];		/*			0x1D8-1F8 */
 	uint crc_clk_src_osc;		/*_OSC_0,		0x1FC */
+	uint crc_reserved20[80];	/*			0x200-33C */
+	uint crc_cpu_cmplx_set;		/* _CPU_CMPLX_SET_0,	0x340 */
+	uint crc_cpu_cmplx_clr;		/* _CPU_CMPLX_CLR_0,	0x344 */
 };
 
 #define PLL_BYPASS		(1 << 31)
@@ -162,4 +165,28 @@
 #define SWR_UARTA_RST		(1 << 6)
 #define CLK_ENB_UARTA		(1 << 6)
 
+#define SWR_CPU_RST		(1 << 0)
+#define CLK_ENB_CPU		(1 << 0)
+#define SWR_CSITE_RST		(1 << 9)
+#define CLK_ENB_CSITE		(1 << 9)
+
+#define SET_CPURESET0		(1 << 0)
+#define SET_DERESET0		(1 << 4)
+#define SET_DBGRESET0		(1 << 12)
+
+#define SET_CPURESET1		(1 << 1)
+#define SET_DERESET1		(1 << 5)
+#define SET_DBGRESET1		(1 << 13)
+
+#define CLR_CPURESET0		(1 << 0)
+#define CLR_DERESET0		(1 << 4)
+#define CLR_DBGRESET0		(1 << 12)
+
+#define CLR_CPURESET1		(1 << 1)
+#define CLR_DERESET1		(1 << 5)
+#define CLR_DBGRESET1		(1 << 13)
+
+#define CPU0_CLK_STP		(1 << 8)
+#define CPU1_CLK_STP		(1 << 9)
+
 #endif	/* CLK_RST_H */
diff --git a/arch/arm/include/asm/arch-tegra2/pmc.h b/arch/arm/include/asm/arch-tegra2/pmc.h
index 7ec9eeb..b1d47cd 100644
--- a/arch/arm/include/asm/arch-tegra2/pmc.h
+++ b/arch/arm/include/asm/arch-tegra2/pmc.h
@@ -121,4 +121,12 @@
 	uint pmc_gate;			/* _GATE_0, offset 15C */
 };
 
+#define CPU_PWRED	1
+#define CPU_CLMP	1
+
+#define PARTID_CP	0xFFFFFFF8
+#define START_CP	(1 << 8)
+
+#define CPUPWRREQ_OE	(1 << 16)
+
 #endif	/* PMC_H */
diff --git a/arch/arm/include/asm/arch-tegra2/scu.h b/arch/arm/include/asm/arch-tegra2/scu.h
new file mode 100644
index 0000000..787ded0
--- /dev/null
+++ b/arch/arm/include/asm/arch-tegra2/scu.h
@@ -0,0 +1,43 @@
+/*
+ *  (C) Copyright 2010,2011
+ *  NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _SCU_H_
+#define _SCU_H_
+
+/* ARM Snoop Control Unit (SCU) registers */
+struct scu_ctlr {
+	uint scu_ctrl;		/* SCU Control Register, offset 00 */
+	uint scu_cfg;		/* SCU Config Register, offset 04 */
+	uint scu_cpu_pwr_stat;	/* SCU CPU Power Status Register, offset 08 */
+	uint scu_inv_all;	/* SCU Invalidate All Register, offset 0C */
+	uint scu_reserved0[12];	/* reserved, offset 10-3C */
+	uint scu_filt_start;	/* SCU Filtering Start Address Reg, offset 40 */
+	uint scu_filt_end;	/* SCU Filtering End Address Reg, offset 44 */
+	uint scu_reserved1[2];	/* reserved, offset 48-4C */
+	uint scu_acc_ctl;	/* SCU Access Control Register, offset 50 */
+	uint scu_ns_acc_ctl;	/* SCU Non-secure Access Cntrl Reg, offset 54 */
+};
+
+#define SCU_CTRL_ENABLE		(1 << 0)
+
+#endif	/* SCU_H */
diff --git a/arch/arm/include/asm/arch-tegra2/tegra2.h b/arch/arm/include/asm/arch-tegra2/tegra2.h
index 9001b68..7b0f5cc 100644
--- a/arch/arm/include/asm/arch-tegra2/tegra2.h
+++ b/arch/arm/include/asm/arch-tegra2/tegra2.h
@@ -25,8 +25,12 @@
 #define _TEGRA2_H_
 
 #define NV_PA_SDRAM_BASE	0x00000000
+#define NV_PA_ARM_PERIPHBASE	0x50040000
+#define NV_PA_PG_UP_BASE	0x60000000
 #define NV_PA_TMRUS_BASE	0x60005010
 #define NV_PA_CLK_RST_BASE	0x60006000
+#define NV_PA_FLOW_BASE		0x60007000
+#define NV_PA_EVP_BASE		0x6000F000
 #define NV_PA_APB_MISC_BASE	0x70000000
 #define NV_PA_APB_UARTA_BASE	(NV_PA_APB_MISC_BASE + 0x6000)
 #define NV_PA_APB_UARTB_BASE	(NV_PA_APB_MISC_BASE + 0x6040)
@@ -34,9 +38,13 @@
 #define NV_PA_APB_UARTD_BASE	(NV_PA_APB_MISC_BASE + 0x6300)
 #define NV_PA_APB_UARTE_BASE	(NV_PA_APB_MISC_BASE + 0x6400)
 #define NV_PA_PMC_BASE		0x7000E400
+#define NV_PA_CSITE_BASE	0x70040000
 
 #define TEGRA2_SDRC_CS0		NV_PA_SDRAM_BASE
 #define LOW_LEVEL_SRAM_STACK	0x4000FFFC
+#define EARLY_AVP_STACK		(NV_PA_SDRAM_BASE + 0x20000)
+#define EARLY_CPU_STACK		(EARLY_AVP_STACK - 4096)
+#define PG_UP_TAG_AVP		0xAAAAAAAA
 
 #ifndef __ASSEMBLY__
 struct timerus {
diff --git a/board/nvidia/common/board.c b/board/nvidia/common/board.c
index 369cb22..5edd70f 100644
--- a/board/nvidia/common/board.c
+++ b/board/nvidia/common/board.c
@@ -30,6 +30,7 @@
 #include <asm/arch/clk_rst.h>
 #include <asm/arch/pinmux.h>
 #include <asm/arch/uart.h>
+#include "board.h"
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -37,6 +38,15 @@
 	CONFIG_TEGRA2_BOARD_STRING
 };
 
+#ifdef CONFIG_BOARD_EARLY_INIT_F
+int board_early_init_f(void)
+{
+	debug("Board Early Init\n");
+	tegra2_start();
+	return 0;
+}
+#endif	/* EARLY_INIT */
+
 /*
  * Routine: timer_init
  * Description: init the timestamp and lastinc value
diff --git a/board/nvidia/common/board.h b/board/nvidia/common/board.h
new file mode 100644
index 0000000..47c7885
--- /dev/null
+++ b/board/nvidia/common/board.h
@@ -0,0 +1,29 @@
+/*
+ *  (C) Copyright 2010,2011
+ *  NVIDIA Corporation <www.nvidia.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef _BOARD_H_
+#define _BOARD_H_
+
+void tegra2_start(void);
+
+#endif	/* BOARD_H */
diff --git a/include/configs/harmony.h b/include/configs/harmony.h
index d004f31..34bd899 100644
--- a/include/configs/harmony.h
+++ b/include/configs/harmony.h
@@ -46,4 +46,5 @@
 #define CONFIG_MACH_TYPE		MACH_TYPE_HARMONY
 #define CONFIG_SYS_BOARD_ODMDATA	0x300d8011 /* lp1, 1GB */
 
+#define CONFIG_BOARD_EARLY_INIT_F
 #endif /* __CONFIG_H */
diff --git a/include/configs/seaboard.h b/include/configs/seaboard.h
index 59eef56..06ce3e2 100644
--- a/include/configs/seaboard.h
+++ b/include/configs/seaboard.h
@@ -40,4 +40,5 @@
 #define CONFIG_MACH_TYPE		MACH_TYPE_SEABOARD
 #define CONFIG_SYS_BOARD_ODMDATA	0x300d8011 /* lp1, 1GB */
 
+#define CONFIG_BOARD_EARLY_INIT_F
 #endif /* __CONFIG_H */
diff --git a/include/configs/tegra2-common.h b/include/configs/tegra2-common.h
index 4f4374a..2924325 100644
--- a/include/configs/tegra2-common.h
+++ b/include/configs/tegra2-common.h
@@ -33,6 +33,8 @@
 #define CONFIG_MACH_TEGRA_GENERIC	/* which is a Tegra generic machine */
 #define CONFIG_L2_OFF			/* No L2 cache */
 
+#define CONFIG_ENABLE_CORTEXA9		/* enable CPU (A9 complex) */
+
 #include <asm/arch/tegra2.h>		/* get chip and board defs */
 
 /*