Tegra30: Add AVP (arm720t) files

This provides SPL support for T30 boards - AVP early init, plus
CPU (A9) init/jump to main U-Boot.

Some changes were made to Tegra20 cpu.c to move common routines
into tegra-common/cpu.c and reduce code duplication.

Signed-off-by: Tom Warren <twarren@nvidia.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Simon Glass <sjg@chromium.org>
diff --git a/arch/arm/cpu/arm720t/tegra-common/Makefile b/arch/arm/cpu/arm720t/tegra-common/Makefile
index febd2e3..6cbc6ad 100644
--- a/arch/arm/cpu/arm720t/tegra-common/Makefile
+++ b/arch/arm/cpu/arm720t/tegra-common/Makefile
@@ -28,6 +28,7 @@
 LIB	= $(obj)libtegra-common.o
 
 COBJS-$(CONFIG_SPL_BUILD) += spl.o
+COBJS-y	+= cpu.o
 
 SRCS	:= $(COBJS-y:.o=.c)
 OBJS	:= $(addprefix $(obj),$(COBJS-y))
diff --git a/arch/arm/cpu/arm720t/tegra-common/cpu.c b/arch/arm/cpu/arm720t/tegra-common/cpu.c
new file mode 100644
index 0000000..693d584
--- /dev/null
+++ b/arch/arm/cpu/arm720t/tegra-common/cpu.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2010-2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <asm/arch/clock.h>
+#include <asm/arch/gp_padctrl.h>
+#include <asm/arch/pinmux.h>
+#include <asm/arch/tegra.h>
+#include <asm/arch-tegra/clk_rst.h>
+#include <asm/arch-tegra/pmc.h>
+#include <asm/arch-tegra/scu.h>
+#include "cpu.h"
+
+enum tegra_family_t {
+	TEGRA_FAMILY_T2x,
+	TEGRA_FAMILY_T3x,
+};
+
+
+enum tegra_family_t get_family(void)
+{
+	u32 reg, chip_id;
+
+	reg = readl(NV_PA_APB_MISC_BASE + GP_HIDREV);
+
+	chip_id = reg >> 8;
+	chip_id &= 0xff;
+	debug("  tegra_get_family: chip_id = %x\n", chip_id);
+	if (chip_id == 0x30)
+		return TEGRA_FAMILY_T3x;
+	else
+		return TEGRA_FAMILY_T2x;
+}
+
+int get_num_cpus(void)
+{
+	return get_family() == TEGRA_FAMILY_T3x ? 4 : 2;
+}
+
+/*
+ * Timing tables for each SOC for all four oscillator options.
+ */
+struct clk_pll_table tegra_pll_x_table[TEGRA_SOC_CNT][CLOCK_OSC_FREQ_COUNT] = {
+	/* T20: 1 GHz */
+	{{ 1000, 13, 0, 12},	/* OSC 13M */
+	 { 625,  12, 0, 8},	/* OSC 19.2M */
+	 { 1000, 12, 0, 12},	/* OSC 12M */
+	 { 1000, 26, 0, 12},	/* OSC 26M */
+	},
+
+	/* T25: 1.2 GHz */
+	{{ 923, 10, 0, 12},
+	 { 750, 12, 0, 8},
+	 { 600,  6, 0, 12},
+	 { 600, 13, 0, 12},
+	},
+
+	/* T30: 1.4 GHz */
+	{{ 862, 8, 0, 8},
+	 { 583, 8, 0, 4},
+	 { 700, 6, 0, 8},
+	 { 700, 13, 0, 8},
+	},
+
+	/* TEGRA_SOC2_SLOW: 312 MHz */
+	{{ 312, 13, 0, 12},	/* OSC 13M */
+	 { 260, 16, 0, 8},	/* OSC 19.2M */
+	 { 312, 12, 0, 12},	/* OSC 12M */
+	 { 312, 26, 0, 12},	/* OSC 26M */
+	},
+};
+
+void adjust_pllp_out_freqs(void)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	struct clk_pll *pll = &clkrst->crc_pll[CLOCK_ID_PERIPH];
+	u32 reg;
+
+	/* Set T30 PLLP_OUT1, 2, 3 & 4 freqs to 9.6, 48, 102 & 204MHz */
+	reg = readl(&pll->pll_out[0]);	/* OUTA, contains OUT2 / OUT1 */
+	reg |= (IN_408_OUT_48_DIVISOR << PLLP_OUT2_RATIO) | PLLP_OUT2_OVR
+		| (IN_408_OUT_9_6_DIVISOR << PLLP_OUT1_RATIO) | PLLP_OUT1_OVR;
+	writel(reg, &pll->pll_out[0]);
+
+	reg = readl(&pll->pll_out[1]);   /* OUTB, contains OUT4 / OUT3 */
+	reg |= (IN_408_OUT_204_DIVISOR << PLLP_OUT4_RATIO) | PLLP_OUT4_OVR
+		| (IN_408_OUT_102_DIVISOR << PLLP_OUT3_RATIO) | PLLP_OUT3_OVR;
+	writel(reg, &pll->pll_out[1]);
+}
+
+int pllx_set_rate(struct clk_pll_simple *pll , u32 divn, u32 divm,
+		u32 divp, u32 cpcon)
+{
+	u32 reg;
+
+	/* If PLLX is already enabled, just return */
+	if (readl(&pll->pll_base) & PLL_ENABLE_MASK) {
+		debug("pllx_set_rate: PLLX already enabled, returning\n");
+		return 0;
+	}
+
+	debug(" pllx_set_rate entry\n");
+
+	/* Set BYPASS, m, n and p to PLLX_BASE */
+	reg = PLL_BYPASS_MASK | (divm << PLL_DIVM_SHIFT);
+	reg |= ((divn << PLL_DIVN_SHIFT) | (divp << PLL_DIVP_SHIFT));
+	writel(reg, &pll->pll_base);
+
+	/* Set cpcon to PLLX_MISC */
+	reg = (cpcon << PLL_CPCON_SHIFT);
+
+	/* Set dccon to PLLX_MISC if freq > 600MHz */
+	if (divn > 600)
+		reg |= (1 << PLL_DCCON_SHIFT);
+	writel(reg, &pll->pll_misc);
+
+	/* Enable PLLX */
+	reg = readl(&pll->pll_base);
+	reg |= PLL_ENABLE_MASK;
+
+	/* Disable BYPASS */
+	reg &= ~PLL_BYPASS_MASK;
+	writel(reg, &pll->pll_base);
+
+	/* Set lock_enable to PLLX_MISC */
+	reg = readl(&pll->pll_misc);
+	reg |= PLL_LOCK_ENABLE_MASK;
+	writel(reg, &pll->pll_misc);
+
+	return 0;
+}
+
+void init_pllx(void)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	struct clk_pll_simple *pll = &clkrst->crc_pll_simple[SIMPLE_PLLX];
+	int chip_type;
+	enum clock_osc_freq osc;
+	struct clk_pll_table *sel;
+
+	debug("init_pllx entry\n");
+
+	/* get chip type */
+	chip_type = tegra_get_chip_type();
+	debug(" init_pllx: chip_type = %d\n", chip_type);
+
+	/* get osc freq */
+	osc = clock_get_osc_freq();
+	debug("  init_pllx: osc = %d\n", osc);
+
+	/* set pllx */
+	sel = &tegra_pll_x_table[chip_type][osc];
+	pllx_set_rate(pll, sel->n, sel->m, sel->p, sel->cpcon);
+
+	/* adjust PLLP_out1-4 on T30 */
+	if (chip_type == TEGRA_SOC_T30) {
+		debug("  init_pllx: adjusting PLLP out freqs\n");
+		adjust_pllp_out_freqs();
+	}
+}
+
+void enable_cpu_clock(int enable)
+{
+	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	u32 clk;
+
+	/*
+	 * NOTE:
+	 * Regardless of whether the request is to enable or disable the CPU
+	 * clock, every processor in the CPU complex except the master (CPU 0)
+	 * will have it's clock stopped because the AVP only talks to the
+	 * master.
+	 */
+
+	if (enable) {
+		/* Initialize PLLX */
+		init_pllx();
+
+		/* Wait until all clocks are stable */
+		udelay(PLL_STABILIZATION_DELAY);
+
+		writel(CCLK_BURST_POLICY, &clkrst->crc_cclk_brst_pol);
+		writel(SUPER_CCLK_DIVIDER, &clkrst->crc_super_cclk_div);
+	}
+
+	/*
+	 * Read the register containing the individual CPU clock enables and
+	 * always stop the clocks to CPUs > 0.
+	 */
+	clk = readl(&clkrst->crc_clk_cpu_cmplx);
+	clk |= 1 << CPU1_CLK_STP_SHIFT;
+#if defined(CONFIG_TEGRA30)
+	clk |= 1 << CPU2_CLK_STP_SHIFT;
+	clk |= 1 << CPU3_CLK_STP_SHIFT;
+#endif
+	/* Stop/Unstop the CPU clock */
+	clk &= ~CPU0_CLK_STP_MASK;
+	clk |= !enable << CPU0_CLK_STP_SHIFT;
+	writel(clk, &clkrst->crc_clk_cpu_cmplx);
+
+	clock_enable(PERIPH_ID_CPU);
+}
+
+static int is_cpu_powered(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+
+	return (readl(&pmc->pmc_pwrgate_status) & CPU_PWRED) ? 1 : 0;
+}
+
+static void remove_cpu_io_clamps(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	/* Remove the clamps on the CPU I/O signals */
+	reg = readl(&pmc->pmc_remove_clamping);
+	reg |= CPU_CLMP;
+	writel(reg, &pmc->pmc_remove_clamping);
+
+	/* Give I/O signals time to stabilize */
+	udelay(IO_STABILIZATION_DELAY);
+}
+
+void powerup_cpu(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+	int timeout = IO_STABILIZATION_DELAY;
+
+	if (!is_cpu_powered()) {
+		/* Toggle the CPU power state (OFF -> ON) */
+		reg = readl(&pmc->pmc_pwrgate_toggle);
+		reg &= PARTID_CP;
+		reg |= START_CP;
+		writel(reg, &pmc->pmc_pwrgate_toggle);
+
+		/* Wait for the power to come up */
+		while (!is_cpu_powered()) {
+			if (timeout-- == 0)
+				printf("CPU failed to power up!\n");
+			else
+				udelay(10);
+		}
+
+		/*
+		 * Remove the I/O clamps from CPU power partition.
+		 * Recommended only on a Warm boot, if the CPU partition gets
+		 * power gated. Shouldn't cause any harm when called after a
+		 * cold boot according to HW, probably just redundant.
+		 */
+		remove_cpu_io_clamps();
+	}
+}
+
+void reset_A9_cpu(int reset)
+{
+	/*
+	* NOTE:  Regardless of whether the request is to hold the CPU in reset
+	*        or take it out of reset, every processor in the CPU complex
+	*        except the master (CPU 0) will be held in reset because the
+	*        AVP only talks to the master. The AVP does not know that there
+	*        are multiple processors in the CPU complex.
+	*/
+	int mask = crc_rst_cpu | crc_rst_de | crc_rst_debug;
+	int num_cpus = get_num_cpus();
+	int cpu;
+
+	debug("reset_a9_cpu entry\n");
+	/* Hold CPUs 1 onwards in reset, and CPU 0 if asked */
+	for (cpu = 1; cpu < num_cpus; cpu++)
+		reset_cmplx_set_enable(cpu, mask, 1);
+	reset_cmplx_set_enable(0, mask, reset);
+
+	/* Enable/Disable master CPU reset */
+	reset_set_enable(PERIPH_ID_CPU, reset);
+}
+
+void clock_enable_coresight(int enable)
+{
+	u32 rst, src;
+
+	debug("clock_enable_coresight entry\n");
+	clock_set_enable(PERIPH_ID_CORESIGHT, enable);
+	reset_set_enable(PERIPH_ID_CORESIGHT, !enable);
+
+	if (enable) {
+		/*
+		 * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
+		 *  1.5, giving an effective frequency of 144MHz.
+		 * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
+		 *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
+		 *
+		 * Clock divider request for 204MHz would setup CSITE clock as
+		 * 144MHz for PLLP base 216MHz and 204MHz for PLLP base 408MHz
+		 */
+		if (tegra_get_chip_type() == TEGRA_SOC_T30)
+			src = CLK_DIVIDER(NVBL_PLLP_KHZ, 204000);
+		else
+			src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
+		clock_ll_set_source_divisor(PERIPH_ID_CSI, 0, src);
+
+		/* Unlock the CPU CoreSight interfaces */
+		rst = CORESIGHT_UNLOCK;
+		writel(rst, CSITE_CPU_DBG0_LAR);
+		writel(rst, CSITE_CPU_DBG1_LAR);
+#if defined(CONFIG_TEGRA30)
+		writel(rst, CSITE_CPU_DBG2_LAR);
+		writel(rst, CSITE_CPU_DBG3_LAR);
+#endif
+	}
+}
+
+void halt_avp(void)
+{
+	for (;;) {
+		writel((HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
+			| HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29)),
+			FLOW_CTLR_HALT_COP_EVENTS);
+	}
+}
diff --git a/arch/arm/cpu/arm720t/tegra-common/cpu.h b/arch/arm/cpu/arm720t/tegra-common/cpu.h
index 6804cd7..3e2ea3a 100644
--- a/arch/arm/cpu/arm720t/tegra-common/cpu.h
+++ b/arch/arm/cpu/arm720t/tegra-common/cpu.h
@@ -26,7 +26,11 @@
 #define PLL_STABILIZATION_DELAY (300)
 #define IO_STABILIZATION_DELAY	(1000)
 
+#if defined(CONFIG_TEGRA30)
+#define NVBL_PLLP_KHZ	(408000)
+#else	/* Tegra20 */
 #define NVBL_PLLP_KHZ	(216000)
+#endif
 
 #define PLLX_ENABLED		(1 << 30)
 #define CCLK_BURST_POLICY	0x20008888
@@ -44,50 +48,11 @@
 
 #define CORESIGHT_UNLOCK	0xC5ACCE55;
 
-/* AP20-Specific Base Addresses */
-
-/* AP20 Base physical address of SDRAM. */
-#define AP20_BASE_PA_SDRAM      0x00000000
-/* AP20 Base physical address of internal SRAM. */
-#define AP20_BASE_PA_SRAM       0x40000000
-/* AP20 Size of internal SRAM (256KB). */
-#define AP20_BASE_PA_SRAM_SIZE  0x00040000
-/* AP20 Base physical address of flash. */
-#define AP20_BASE_PA_NOR_FLASH  0xD0000000
-/* AP20 Base physical address of boot information table. */
-#define AP20_BASE_PA_BOOT_INFO  AP20_BASE_PA_SRAM
-
-/*
- * Super-temporary stacks for EXTREMELY early startup. The values chosen for
- * these addresses must be valid on ALL SOCs because this value is used before
- * we are able to differentiate between the SOC types.
- *
- * NOTE: The since CPU's stack will eventually be moved from IRAM to SDRAM, its
- *       stack is placed below the AVP stack. Once the CPU stack has been moved,
- *       the AVP is free to use the IRAM the CPU stack previously occupied if
- *       it should need to do so.
- *
- * NOTE: In multi-processor CPU complex configurations, each processor will have
- *       its own stack of size CPU_EARLY_BOOT_STACK_SIZE. CPU 0 will have a
- *       limit of CPU_EARLY_BOOT_STACK_LIMIT. Each successive CPU will have a
- *       stack limit that is CPU_EARLY_BOOT_STACK_SIZE less then the previous
- *       CPU.
- */
-
-/* Common AVP early boot stack limit */
-#define AVP_EARLY_BOOT_STACK_LIMIT	\
-	(AP20_BASE_PA_SRAM + (AP20_BASE_PA_SRAM_SIZE/2))
-/* Common AVP early boot stack size */
-#define AVP_EARLY_BOOT_STACK_SIZE	0x1000
-/* Common CPU early boot stack limit */
-#define CPU_EARLY_BOOT_STACK_LIMIT	\
-	(AVP_EARLY_BOOT_STACK_LIMIT - AVP_EARLY_BOOT_STACK_SIZE)
-/* Common CPU early boot stack size */
-#define CPU_EARLY_BOOT_STACK_SIZE	0x1000
-
 #define EXCEP_VECTOR_CPU_RESET_VECTOR	(NV_PA_EVP_BASE + 0x100)
 #define CSITE_CPU_DBG0_LAR		(NV_PA_CSITE_BASE + 0x10FB0)
 #define CSITE_CPU_DBG1_LAR		(NV_PA_CSITE_BASE + 0x12FB0)
+#define CSITE_CPU_DBG2_LAR		(NV_PA_CSITE_BASE + 0x14FB0)
+#define CSITE_CPU_DBG3_LAR		(NV_PA_CSITE_BASE + 0x16FB0)
 
 #define FLOW_CTLR_HALT_COP_EVENTS	(NV_PA_FLOW_BASE + 4)
 #define FLOW_MODE_STOP			2
@@ -95,6 +60,23 @@
 #define HALT_COP_EVENT_IRQ_1		(1 << 11)
 #define HALT_COP_EVENT_FIQ_1		(1 << 9)
 
-void start_cpu(u32 reset_vector);
-int ap20_cpu_is_cortexa9(void);
+#define FLOW_MODE_NONE		0
+
+#define SIMPLE_PLLX     (CLOCK_ID_XCPU - CLOCK_ID_FIRST_SIMPLE)
+
+struct clk_pll_table {
+	u16	n;
+	u16	m;
+	u8	p;
+	u8	cpcon;
+};
+
+void clock_enable_coresight(int enable);
+void enable_cpu_clock(int enable);
 void halt_avp(void)  __attribute__ ((noreturn));
+void init_pllx(void);
+void powerup_cpu(void);
+void reset_A9_cpu(int reset);
+void start_cpu(u32 reset_vector);
+int tegra_get_chip_type(void);
+void adjust_pllp_out_freqs(void);
diff --git a/arch/arm/cpu/arm720t/tegra-common/spl.c b/arch/arm/cpu/arm720t/tegra-common/spl.c
index c280ab7..a9a1c39 100644
--- a/arch/arm/cpu/arm720t/tegra-common/spl.c
+++ b/arch/arm/cpu/arm720t/tegra-common/spl.c
@@ -23,7 +23,6 @@
  * MA 02111-1307 USA
  */
 #include <common.h>
-#include "cpu.h"
 #include <spl.h>
 
 #include <asm/io.h>
@@ -32,7 +31,7 @@
 #include <asm/arch/tegra.h>
 #include <asm/arch-tegra/board.h>
 #include <asm/arch/spl.h>
-
+#include "cpu.h"
 
 void spl_board_init(void)
 {
diff --git a/arch/arm/cpu/arm720t/tegra20/cpu.c b/arch/arm/cpu/arm720t/tegra20/cpu.c
index ef7f375..2533899 100644
--- a/arch/arm/cpu/arm720t/tegra20/cpu.c
+++ b/arch/arm/cpu/arm720t/tegra20/cpu.c
@@ -1,160 +1,25 @@
 /*
-* (C) Copyright 2010-2011
-* NVIDIA Corporation <www.nvidia.com>
-*
-* See file CREDITS for list of people who contributed to this
-* project.
-*
-* This program is free software; you can redistribute it and/or
-* modify it under the terms of the GNU General Public License as
-* published by the Free Software Foundation; either version 2 of
-* the License, or (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
-* MA 02111-1307 USA
-*/
+ * Copyright (c) 2010-2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
 
 #include <common.h>
 #include <asm/io.h>
-#include <asm/arch/clock.h>
-#include <asm/arch/pinmux.h>
 #include <asm/arch/tegra.h>
-#include <asm/arch-tegra/clk_rst.h>
 #include <asm/arch-tegra/pmc.h>
-#include <asm/arch-tegra/scu.h>
 #include "../tegra-common/cpu.h"
 
-/* Returns 1 if the current CPU executing is a Cortex-A9, else 0 */
-int ap20_cpu_is_cortexa9(void)
-{
-	u32 id = readb(NV_PA_PG_UP_BASE + PG_UP_TAG_0);
-	return id == (PG_UP_TAG_0_PID_CPU & 0xff);
-}
-
-void init_pllx(void)
-{
-	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
-	struct clk_pll *pll = &clkrst->crc_pll[CLOCK_ID_XCPU];
-	u32 reg;
-
-	/* If PLLX is already enabled, just return */
-	if (readl(&pll->pll_base) & PLL_ENABLE_MASK)
-		return;
-
-	/* Set PLLX_MISC */
-	writel(1 << PLL_CPCON_SHIFT, &pll->pll_misc);
-
-	/* Use 12MHz clock here */
-	reg = PLL_BYPASS_MASK | (12 << PLL_DIVM_SHIFT);
-	reg |= 1000 << PLL_DIVN_SHIFT;
-	writel(reg, &pll->pll_base);
-
-	reg |= PLL_ENABLE_MASK;
-	writel(reg, &pll->pll_base);
-
-	reg &= ~PLL_BYPASS_MASK;
-	writel(reg, &pll->pll_base);
-}
-
-static void enable_cpu_clock(int enable)
-{
-	struct clk_rst_ctlr *clkrst = (struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
-	u32 clk;
-
-	/*
-	 * NOTE:
-	 * Regardless of whether the request is to enable or disable the CPU
-	 * clock, every processor in the CPU complex except the master (CPU 0)
-	 * will have it's clock stopped because the AVP only talks to the
-	 * master. The AVP does not know (nor does it need to know) that there
-	 * are multiple processors in the CPU complex.
-	 */
-
-	if (enable) {
-		/* Initialize PLLX */
-		init_pllx();
-
-		/* Wait until all clocks are stable */
-		udelay(PLL_STABILIZATION_DELAY);
-
-		writel(CCLK_BURST_POLICY, &clkrst->crc_cclk_brst_pol);
-		writel(SUPER_CCLK_DIVIDER, &clkrst->crc_super_cclk_div);
-	}
-
-	/*
-	 * Read the register containing the individual CPU clock enables and
-	 * always stop the clock to CPU 1.
-	 */
-	clk = readl(&clkrst->crc_clk_cpu_cmplx);
-	clk |= 1 << CPU1_CLK_STP_SHIFT;
-
-	/* Stop/Unstop the CPU clock */
-	clk &= ~CPU0_CLK_STP_MASK;
-	clk |= !enable << CPU0_CLK_STP_SHIFT;
-	writel(clk, &clkrst->crc_clk_cpu_cmplx);
-
-	clock_enable(PERIPH_ID_CPU);
-}
-
-static int is_cpu_powered(void)
-{
-	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
-
-	return (readl(&pmc->pmc_pwrgate_status) & CPU_PWRED) ? 1 : 0;
-}
-
-static void remove_cpu_io_clamps(void)
-{
-	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
-	u32 reg;
-
-	/* Remove the clamps on the CPU I/O signals */
-	reg = readl(&pmc->pmc_remove_clamping);
-	reg |= CPU_CLMP;
-	writel(reg, &pmc->pmc_remove_clamping);
-
-	/* Give I/O signals time to stabilize */
-	udelay(IO_STABILIZATION_DELAY);
-}
-
-static void powerup_cpu(void)
-{
-	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
-	u32 reg;
-	int timeout = IO_STABILIZATION_DELAY;
-
-	if (!is_cpu_powered()) {
-		/* Toggle the CPU power state (OFF -> ON) */
-		reg = readl(&pmc->pmc_pwrgate_toggle);
-		reg &= PARTID_CP;
-		reg |= START_CP;
-		writel(reg, &pmc->pmc_pwrgate_toggle);
-
-		/* Wait for the power to come up */
-		while (!is_cpu_powered()) {
-			if (timeout-- == 0)
-				printf("CPU failed to power up!\n");
-			else
-				udelay(10);
-		}
-
-		/*
-		 * Remove the I/O clamps from CPU power partition.
-		 * Recommended only on a Warm boot, if the CPU partition gets
-		 * power gated. Shouldn't cause any harm when called after a
-		 * cold boot according to HW, probably just redundant.
-		 */
-		remove_cpu_io_clamps();
-	}
-}
-
 static void enable_cpu_power_rail(void)
 {
 	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
@@ -173,49 +38,6 @@
 	udelay(3750);
 }
 
-static void reset_A9_cpu(int reset)
-{
-	/*
-	* NOTE:  Regardless of whether the request is to hold the CPU in reset
-	*        or take it out of reset, every processor in the CPU complex
-	*        except the master (CPU 0) will be held in reset because the
-	*        AVP only talks to the master. The AVP does not know that there
-	*        are multiple processors in the CPU complex.
-	*/
-
-	/* Hold CPU 1 in reset, and CPU 0 if asked */
-	reset_cmplx_set_enable(1, crc_rst_cpu | crc_rst_de | crc_rst_debug, 1);
-	reset_cmplx_set_enable(0, crc_rst_cpu | crc_rst_de | crc_rst_debug,
-			       reset);
-
-	/* Enable/Disable master CPU reset */
-	reset_set_enable(PERIPH_ID_CPU, reset);
-}
-
-static void clock_enable_coresight(int enable)
-{
-	u32 rst, src;
-
-	clock_set_enable(PERIPH_ID_CORESIGHT, enable);
-	reset_set_enable(PERIPH_ID_CORESIGHT, !enable);
-
-	if (enable) {
-		/*
-		 * Put CoreSight on PLLP_OUT0 (216 MHz) and divide it down by
-		 *  1.5, giving an effective frequency of 144MHz.
-		 * Set PLLP_OUT0 [bits31:30 = 00], and use a 7.1 divisor
-		 *  (bits 7:0), so 00000001b == 1.5 (n+1 + .5)
-		 */
-		src = CLK_DIVIDER(NVBL_PLLP_KHZ, 144000);
-		clock_ll_set_source_divisor(PERIPH_ID_CSI, 0, src);
-
-		/* Unlock the CPU CoreSight interfaces */
-		rst = 0xC5ACCE55;
-		writel(rst, CSITE_CPU_DBG0_LAR);
-		writel(rst, CSITE_CPU_DBG1_LAR);
-	}
-}
-
 void start_cpu(u32 reset_vector)
 {
 	/* Enable VDD_CPU */
@@ -246,13 +68,3 @@
 	/* Take the CPU out of reset */
 	reset_A9_cpu(0);
 }
-
-
-void halt_avp(void)
-{
-	for (;;) {
-		writel((HALT_COP_EVENT_JTAG | HALT_COP_EVENT_IRQ_1 \
-			| HALT_COP_EVENT_FIQ_1 | (FLOW_MODE_STOP<<29)),
-			FLOW_CTLR_HALT_COP_EVENTS);
-	}
-}
diff --git a/arch/arm/cpu/arm720t/tegra30/Makefile b/arch/arm/cpu/arm720t/tegra30/Makefile
new file mode 100644
index 0000000..bd96997
--- /dev/null
+++ b/arch/arm/cpu/arm720t/tegra30/Makefile
@@ -0,0 +1,41 @@
+#
+# Copyright (c) 2010-2012, NVIDIA CORPORATION.  All rights reserved.
+#
+# (C) Copyright 2000-2008
+# Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+include $(TOPDIR)/config.mk
+
+LIB	= $(obj)lib$(SOC).o
+
+COBJS-y	+= cpu.o
+
+SRCS	:= $(COBJS-y:.o=.c)
+OBJS	:= $(addprefix $(obj),$(COBJS-y))
+
+all:	$(obj).depend $(LIB)
+
+$(LIB):	$(OBJS)
+	$(call cmd_link_o_target, $(OBJS))
+
+#########################################################################
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
diff --git a/arch/arm/cpu/arm720t/tegra30/config.mk b/arch/arm/cpu/arm720t/tegra30/config.mk
new file mode 100644
index 0000000..2388c56
--- /dev/null
+++ b/arch/arm/cpu/arm720t/tegra30/config.mk
@@ -0,0 +1,19 @@
+#
+# Copyright (c) 2010-2012, NVIDIA CORPORATION.  All rights reserved.
+#
+# (C) Copyright 2002
+# Gary Jennejohn, DENX Software Engineering, <garyj@denx.de>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+USE_PRIVATE_LIBGCC = yes
diff --git a/arch/arm/cpu/arm720t/tegra30/cpu.c b/arch/arm/cpu/arm720t/tegra30/cpu.c
new file mode 100644
index 0000000..dedcdd9
--- /dev/null
+++ b/arch/arm/cpu/arm720t/tegra30/cpu.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2010-2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <asm/arch/clock.h>
+#include <asm/arch/flow.h>
+#include <asm/arch/tegra.h>
+#include <asm/arch-tegra/clk_rst.h>
+#include <asm/arch-tegra/pmc.h>
+#include <asm/arch-tegra/tegra_i2c.h>
+#include "../tegra-common/cpu.h"
+
+/* Tegra30-specific CPU init code */
+void tegra_i2c_ll_write_addr(uint addr, uint config)
+{
+	struct i2c_ctlr *reg = (struct i2c_ctlr *)TEGRA_DVC_BASE;
+
+	writel(addr, &reg->cmd_addr0);
+	writel(config, &reg->cnfg);
+}
+
+void tegra_i2c_ll_write_data(uint data, uint config)
+{
+	struct i2c_ctlr *reg = (struct i2c_ctlr *)TEGRA_DVC_BASE;
+
+	writel(data, &reg->cmd_data1);
+	writel(config, &reg->cnfg);
+}
+
+#define TPS65911_I2C_ADDR		0x5A
+#define TPS65911_VDDCTRL_OP_REG		0x28
+#define TPS65911_VDDCTRL_SR_REG		0x27
+#define TPS65911_VDDCTRL_OP_DATA	(0x2300 | TPS65911_VDDCTRL_OP_REG)
+#define TPS65911_VDDCTRL_SR_DATA	(0x0100 | TPS65911_VDDCTRL_SR_REG)
+#define I2C_SEND_2_BYTES		0x0A02
+
+static void enable_cpu_power_rail(void)
+{
+	struct pmc_ctlr *pmc = (struct pmc_ctlr *)NV_PA_PMC_BASE;
+	u32 reg;
+
+	debug("enable_cpu_power_rail entry\n");
+	reg = readl(&pmc->pmc_cntrl);
+	reg |= CPUPWRREQ_OE;
+	writel(reg, &pmc->pmc_cntrl);
+
+	/*
+	 * Bring up CPU VDD via the TPS65911x PMIC on the DVC I2C bus.
+	 * First set VDD to 1.4V, then enable the VDD regulator.
+	 */
+	tegra_i2c_ll_write_addr(TPS65911_I2C_ADDR, 2);
+	tegra_i2c_ll_write_data(TPS65911_VDDCTRL_OP_DATA, I2C_SEND_2_BYTES);
+	udelay(1000);
+	tegra_i2c_ll_write_data(TPS65911_VDDCTRL_SR_DATA, I2C_SEND_2_BYTES);
+	udelay(10 * 1000);
+}
+
+/**
+ * The T30 requires some special clock initialization, including setting up
+ * the dvc i2c, turning on mselect and selecting the G CPU cluster
+ */
+void t30_init_clocks(void)
+{
+	struct clk_rst_ctlr *clkrst =
+			(struct clk_rst_ctlr *)NV_PA_CLK_RST_BASE;
+	struct flow_ctlr *flow = (struct flow_ctlr *)NV_PA_FLOW_BASE;
+	u32 val;
+
+	debug("t30_init_clocks entry\n");
+	/* Set active CPU cluster to G */
+	clrbits_le32(flow->cluster_control, 1 << 0);
+
+	/*
+	 * Switch system clock to PLLP_OUT4 (108 MHz), AVP will now run
+	 * at 108 MHz. This is glitch free as only the source is changed, no
+	 * special precaution needed.
+	 */
+	val = (SCLK_SOURCE_PLLP_OUT4 << SCLK_SWAKEUP_FIQ_SOURCE_SHIFT) |
+		(SCLK_SOURCE_PLLP_OUT4 << SCLK_SWAKEUP_IRQ_SOURCE_SHIFT) |
+		(SCLK_SOURCE_PLLP_OUT4 << SCLK_SWAKEUP_RUN_SOURCE_SHIFT) |
+		(SCLK_SOURCE_PLLP_OUT4 << SCLK_SWAKEUP_IDLE_SOURCE_SHIFT) |
+		(SCLK_SYS_STATE_RUN << SCLK_SYS_STATE_SHIFT);
+	writel(val, &clkrst->crc_sclk_brst_pol);
+
+	writel(SUPER_SCLK_ENB_MASK, &clkrst->crc_super_sclk_div);
+
+	val = (0 << CLK_SYS_RATE_HCLK_DISABLE_SHIFT) |
+		(1 << CLK_SYS_RATE_AHB_RATE_SHIFT) |
+		(0 << CLK_SYS_RATE_PCLK_DISABLE_SHIFT) |
+		(0 << CLK_SYS_RATE_APB_RATE_SHIFT);
+	writel(val, &clkrst->crc_clk_sys_rate);
+
+	/* Put i2c, mselect in reset and enable clocks */
+	reset_set_enable(PERIPH_ID_DVC_I2C, 1);
+	clock_set_enable(PERIPH_ID_DVC_I2C, 1);
+	reset_set_enable(PERIPH_ID_MSELECT, 1);
+	clock_set_enable(PERIPH_ID_MSELECT, 1);
+
+	/* Switch MSELECT clock to PLLP (00) */
+	clock_ll_set_source(PERIPH_ID_MSELECT, 0);
+
+	/*
+	 * Our high-level clock routines are not available prior to
+	 * relocation. We use the low-level functions which require a
+	 * hard-coded divisor. Use CLK_M with divide by (n + 1 = 17)
+	 */
+	clock_ll_set_source_divisor(PERIPH_ID_DVC_I2C, 3, 16);
+
+	/*
+	 * Give clocks time to stabilize, then take i2c and mselect out of
+	 * reset
+	 */
+	udelay(1000);
+	reset_set_enable(PERIPH_ID_DVC_I2C, 0);
+	reset_set_enable(PERIPH_ID_MSELECT, 0);
+}
+
+static void set_cpu_running(int run)
+{
+	struct flow_ctlr *flow = (struct flow_ctlr *)NV_PA_FLOW_BASE;
+
+	debug("set_cpu_running entry, run = %d\n", run);
+	writel(run ? FLOW_MODE_NONE : FLOW_MODE_STOP, &flow->halt_cpu_events);
+}
+
+void start_cpu(u32 reset_vector)
+{
+	debug("start_cpu entry, reset_vector = %x\n", reset_vector);
+	t30_init_clocks();
+
+	/* Enable VDD_CPU */
+	enable_cpu_power_rail();
+
+	set_cpu_running(0);
+
+	/* Hold the CPUs in reset */
+	reset_A9_cpu(1);
+
+	/* Disable the CPU clock */
+	enable_cpu_clock(0);
+
+	/* Enable CoreSight */
+	clock_enable_coresight(1);
+
+	/*
+	 * Set the entry point for CPU execution from reset,
+	 *  if it's a non-zero value.
+	 */
+	if (reset_vector)
+		writel(reset_vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
+
+	/* Enable the CPU clock */
+	enable_cpu_clock(1);
+
+	/* If the CPU doesn't already have power, power it up */
+	powerup_cpu();
+
+	/* Take the CPU out of reset */
+	reset_A9_cpu(0);
+
+	set_cpu_running(1);
+}