ARMv8/FSL_LSCH3: Add FSL_LSCH3 SoC

Freescale LayerScape with Chassis Generation 3 is a set of SoCs with
ARMv8 cores and 3rd generation of Chassis. We use different MMU setup
to support memory map and cache attribute for these SoCs. MMU and cache
are enabled very early to bootst performance, especially for early
development on emulators. After u-boot relocates to DDR, a new MMU
table with QBMan cache access is created in DDR. SMMU pagesize is set
in SMMU_sACR register. Both DDR3 and DDR4 are supported.

Signed-off-by: York Sun <yorksun@freescale.com>
Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
Signed-off-by: Arnab Basu <arnab.basu@freescale.com>
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index af3c494..9dbcdf2 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -73,12 +73,17 @@
 	__asm_invalidate_dcache_all();
 }
 
+void __weak flush_l3_cache(void)
+{
+}
+
 /*
  * Performs a clean & invalidation of the entire data cache at all levels
  */
 void flush_dcache_all(void)
 {
 	__asm_flush_dcache_all();
+	flush_l3_cache();
 }
 
 /*
@@ -211,7 +216,7 @@
  * Enable dCache & iCache, whether cache is actually enabled
  * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
  */
-void enable_caches(void)
+void __weak enable_caches(void)
 {
 	icache_enable();
 	dcache_enable();
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile
new file mode 100644
index 0000000..9249537
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile
@@ -0,0 +1,9 @@
+#
+# Copyright 2014, Freescale Semiconductor
+#
+# SPDX-License-Identifier:	GPL-2.0+
+#
+
+obj-y += cpu.o
+obj-y += lowlevel.o
+obj-y += speed.o
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/README b/arch/arm/cpu/armv8/fsl-lsch3/README
new file mode 100644
index 0000000..de34a91
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/README
@@ -0,0 +1,10 @@
+#
+# Copyright 2014 Freescale Semiconductor
+#
+# SPDX-License-Identifier:      GPL-2.0+
+#
+
+Freescale LayerScape with Chassis Generation 3
+
+This architecture supports Freescale ARMv8 SoCs with Chassis generation 3,
+for example LS2100A.
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
new file mode 100644
index 0000000..46965f0
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/armv8/mmu.h>
+#include <asm/io.h>
+#include <asm/arch-fsl-lsch3/immap_lsch3.h>
+#include "cpu.h"
+#include "speed.h"
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+/*
+ * To start MMU before DDR is available, we create MMU table in SRAM.
+ * The base address of SRAM is CONFIG_SYS_FSL_OCRAM_BASE. We use three
+ * levels of translation tables here to cover 40-bit address space.
+ * We use 4KB granule size, with 40 bits physical address, T0SZ=24
+ * Level 0 IA[39], table address @0
+ * Level 1 IA[31:30], table address @01000, 0x2000
+ * Level 2 IA[29:21], table address @0x3000
+ */
+
+#define SECTION_SHIFT_L0	39UL
+#define SECTION_SHIFT_L1	30UL
+#define SECTION_SHIFT_L2	21UL
+#define BLOCK_SIZE_L0		0x8000000000UL
+#define BLOCK_SIZE_L1		(1 << SECTION_SHIFT_L1)
+#define BLOCK_SIZE_L2		(1 << SECTION_SHIFT_L2)
+#define CONFIG_SYS_IFC_BASE	0x30000000
+#define CONFIG_SYS_IFC_SIZE	0x10000000
+#define CONFIG_SYS_IFC_BASE2	0x500000000
+#define CONFIG_SYS_IFC_SIZE2	0x100000000
+#define TCR_EL2_PS_40BIT	(2 << 16)
+#define LSCH3_VA_BITS		(40)
+#define LSCH3_TCR	(TCR_TG0_4K		| \
+			TCR_EL2_PS_40BIT	| \
+			TCR_SHARED_NON		| \
+			TCR_ORGN_NC		| \
+			TCR_IRGN_NC		| \
+			TCR_T0SZ(LSCH3_VA_BITS))
+
+/*
+ * Final MMU
+ * Let's start from the same layout as early MMU and modify as needed.
+ * IFC regions will be cache-inhibit.
+ */
+#define FINAL_QBMAN_CACHED_MEM	0x818000000UL
+#define FINAL_QBMAN_CACHED_SIZE	0x4000000
+
+
+static inline void early_mmu_setup(void)
+{
+	int el;
+	u64 i;
+	u64 section_l1t0, section_l1t1, section_l2;
+	u64 *level0_table = (u64 *)CONFIG_SYS_FSL_OCRAM_BASE;
+	u64 *level1_table_0 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x1000);
+	u64 *level1_table_1 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x2000);
+	u64 *level2_table = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x3000);
+
+
+	level0_table[0] =
+		(u64)level1_table_0 | PMD_TYPE_TABLE;
+	level0_table[1] =
+		(u64)level1_table_1 | PMD_TYPE_TABLE;
+
+	/*
+	 * set level 1 table 0 to cache_inhibit, covering 0 to 512GB
+	 * set level 1 table 1 to cache enabled, covering 512GB to 1TB
+	 * set level 2 table to cache-inhibit, covering 0 to 1GB
+	 */
+	section_l1t0 = 0;
+	section_l1t1 = BLOCK_SIZE_L0;
+	section_l2 = 0;
+	for (i = 0; i < 512; i++) {
+		set_pgtable_section(level1_table_0, i, section_l1t0,
+				    MT_DEVICE_NGNRNE);
+		set_pgtable_section(level1_table_1, i, section_l1t1,
+				    MT_NORMAL);
+		set_pgtable_section(level2_table, i, section_l2,
+				    MT_DEVICE_NGNRNE);
+		section_l1t0 += BLOCK_SIZE_L1;
+		section_l1t1 += BLOCK_SIZE_L1;
+		section_l2 += BLOCK_SIZE_L2;
+	}
+
+	level1_table_0[0] =
+		(u64)level2_table | PMD_TYPE_TABLE;
+	level1_table_0[1] =
+		0x40000000 | PMD_SECT_AF | PMD_TYPE_SECT |
+		PMD_ATTRINDX(MT_DEVICE_NGNRNE);
+	level1_table_0[2] =
+		0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
+		PMD_ATTRINDX(MT_NORMAL);
+	level1_table_0[3] =
+		0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
+		PMD_ATTRINDX(MT_NORMAL);
+
+	/* Rewrite table to enable cache */
+	set_pgtable_section(level2_table,
+			    CONFIG_SYS_FSL_OCRAM_BASE >> SECTION_SHIFT_L2,
+			    CONFIG_SYS_FSL_OCRAM_BASE,
+			    MT_NORMAL);
+	for (i = CONFIG_SYS_IFC_BASE >> SECTION_SHIFT_L2;
+	     i < (CONFIG_SYS_IFC_BASE + CONFIG_SYS_IFC_SIZE)
+	     >> SECTION_SHIFT_L2; i++) {
+		section_l2 = i << SECTION_SHIFT_L2;
+		set_pgtable_section(level2_table, i,
+				    section_l2, MT_NORMAL);
+	}
+
+	el = current_el();
+	set_ttbr_tcr_mair(el, (u64)level0_table, LSCH3_TCR, MEMORY_ATTRIBUTES);
+	set_sctlr(get_sctlr() | CR_M);
+}
+
+/*
+ * This final tale looks similar to early table, but different in detail.
+ * These tables are in regular memory. Cache on IFC is disabled. One sub table
+ * is added to enable cache for QBMan.
+ */
+static inline void final_mmu_setup(void)
+{
+	int el;
+	u64 i, tbl_base, tbl_limit, section_base;
+	u64 section_l1t0, section_l1t1, section_l2;
+	u64 *level0_table = (u64 *)gd->arch.tlb_addr;
+	u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + 0x1000);
+	u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + 0x2000);
+	u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + 0x3000);
+	u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + 0x4000);
+
+
+	level0_table[0] =
+		(u64)level1_table_0 | PMD_TYPE_TABLE;
+	level0_table[1] =
+		(u64)level1_table_1 | PMD_TYPE_TABLE;
+
+	/*
+	 * set level 1 table 0 to cache_inhibit, covering 0 to 512GB
+	 * set level 1 table 1 to cache enabled, covering 512GB to 1TB
+	 * set level 2 table 0 to cache-inhibit, covering 0 to 1GB
+	 */
+	section_l1t0 = 0;
+	section_l1t1 = BLOCK_SIZE_L0;
+	section_l2 = 0;
+	for (i = 0; i < 512; i++) {
+		set_pgtable_section(level1_table_0, i, section_l1t0,
+				    MT_DEVICE_NGNRNE);
+		set_pgtable_section(level1_table_1, i, section_l1t1,
+				    MT_NORMAL);
+		set_pgtable_section(level2_table_0, i, section_l2,
+				    MT_DEVICE_NGNRNE);
+		section_l1t0 += BLOCK_SIZE_L1;
+		section_l1t1 += BLOCK_SIZE_L1;
+		section_l2 += BLOCK_SIZE_L2;
+	}
+
+	level1_table_0[0] =
+		(u64)level2_table_0 | PMD_TYPE_TABLE;
+	level1_table_0[2] =
+		0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
+		PMD_ATTRINDX(MT_NORMAL);
+	level1_table_0[3] =
+		0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
+		PMD_ATTRINDX(MT_NORMAL);
+
+	/* Rewrite table to enable cache */
+	set_pgtable_section(level2_table_0,
+			    CONFIG_SYS_FSL_OCRAM_BASE >> SECTION_SHIFT_L2,
+			    CONFIG_SYS_FSL_OCRAM_BASE,
+			    MT_NORMAL);
+
+	/*
+	 * Fill in other part of tables if cache is needed
+	 * If finer granularity than 1GB is needed, sub table
+	 * should be created.
+	 */
+	section_base = FINAL_QBMAN_CACHED_MEM & ~(BLOCK_SIZE_L1 - 1);
+	i = section_base >> SECTION_SHIFT_L1;
+	level1_table_0[i] = (u64)level2_table_1 | PMD_TYPE_TABLE;
+	section_l2 = section_base;
+	for (i = 0; i < 512; i++) {
+		set_pgtable_section(level2_table_1, i, section_l2,
+				    MT_DEVICE_NGNRNE);
+		section_l2 += BLOCK_SIZE_L2;
+	}
+	tbl_base = FINAL_QBMAN_CACHED_MEM & (BLOCK_SIZE_L1 - 1);
+	tbl_limit = (FINAL_QBMAN_CACHED_MEM + FINAL_QBMAN_CACHED_SIZE) &
+		    (BLOCK_SIZE_L1 - 1);
+	for (i = tbl_base >> SECTION_SHIFT_L2;
+	     i < tbl_limit >> SECTION_SHIFT_L2; i++) {
+		section_l2 = section_base + (i << SECTION_SHIFT_L2);
+		set_pgtable_section(level2_table_1, i,
+				    section_l2, MT_NORMAL);
+	}
+
+	/* flush new MMU table */
+	flush_dcache_range(gd->arch.tlb_addr,
+			   gd->arch.tlb_addr +  gd->arch.tlb_size);
+
+	/* point TTBR to the new table */
+	el = current_el();
+	asm volatile("dsb sy");
+	if (el == 1) {
+		asm volatile("msr ttbr0_el1, %0"
+			     : : "r" ((u64)level0_table) : "memory");
+	} else if (el == 2) {
+		asm volatile("msr ttbr0_el2, %0"
+			     : : "r" ((u64)level0_table) : "memory");
+	} else if (el == 3) {
+		asm volatile("msr ttbr0_el3, %0"
+			     : : "r" ((u64)level0_table) : "memory");
+	} else {
+		hang();
+	}
+	asm volatile("isb");
+
+	/*
+	 * MMU is already enabled, just need to invalidate TLB to load the
+	 * new table. The new table is compatible with the current table, if
+	 * MMU somehow walks through the new table before invalidation TLB,
+	 * it still works. So we don't need to turn off MMU here.
+	 */
+}
+
+int arch_cpu_init(void)
+{
+	icache_enable();
+	__asm_invalidate_dcache_all();
+	__asm_invalidate_tlb_all();
+	early_mmu_setup();
+	set_sctlr(get_sctlr() | CR_C);
+	return 0;
+}
+
+/*
+ * flush_l3_cache
+ * Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power
+ * state, by writing to HP-F P-state request register.
+ * Fixme: This function should moved to a common file if other SoCs also use
+ * the same Dickens.
+ */
+#define HNF0_PSTATE_REQ 0x04200010
+#define HNF1_PSTATE_REQ 0x04210010
+#define HNF2_PSTATE_REQ 0x04220010
+#define HNF3_PSTATE_REQ 0x04230010
+#define HNF4_PSTATE_REQ 0x04240010
+#define HNF5_PSTATE_REQ 0x04250010
+#define HNF6_PSTATE_REQ 0x04260010
+#define HNF7_PSTATE_REQ 0x04270010
+#define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC)
+#define HNFPSTAT_FAM	0x3
+#define HNFPSTAT_SFONLY 0x01
+
+static void hnf_pstate_req(u64 *ptr, u64 state)
+{
+	int timeout = 1000;
+	out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3));
+	ptr++;
+	/* checking if the transition is completed */
+	while (timeout > 0) {
+		if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3))
+			break;
+		udelay(100);
+		timeout--;
+	}
+}
+
+void flush_l3_cache(void)
+{
+	hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY);
+	hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY);
+	hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY);
+	hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY);
+	hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY);
+	hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY);
+	hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY);
+	hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY);
+	hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM);
+	hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM);
+	hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM);
+	hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM);
+	hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM);
+	hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM);
+	hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM);
+	hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM);
+}
+
+/*
+ * This function is called from lib/board.c.
+ * It recreates MMU table in main memory. MMU and d-cache are enabled earlier.
+ * There is no need to disable d-cache for this operation.
+ */
+void enable_caches(void)
+{
+	final_mmu_setup();
+	__asm_invalidate_tlb_all();
+}
+#endif
+
+static inline u32 initiator_type(u32 cluster, int init_id)
+{
+	struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
+	u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK;
+	u32 type = in_le32(&gur->tp_ityp[idx]);
+
+	if (type & TP_ITYP_AV)
+		return type;
+
+	return 0;
+}
+
+u32 cpu_mask(void)
+{
+	struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
+	int i = 0, count = 0;
+	u32 cluster, type, mask = 0;
+
+	do {
+		int j;
+		cluster = in_le32(&gur->tp_cluster[i].lower);
+		for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
+			type = initiator_type(cluster, j);
+			if (type) {
+				if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
+					mask |= 1 << count;
+				count++;
+			}
+		}
+		i++;
+	} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
+
+	return mask;
+}
+
+/*
+ * Return the number of cores on this SOC.
+ */
+int cpu_numcores(void)
+{
+	return hweight32(cpu_mask());
+}
+
+int fsl_qoriq_core_to_cluster(unsigned int core)
+{
+	struct ccsr_gur __iomem *gur =
+		(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
+	int i = 0, count = 0;
+	u32 cluster;
+
+	do {
+		int j;
+		cluster = in_le32(&gur->tp_cluster[i].lower);
+		for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
+			if (initiator_type(cluster, j)) {
+				if (count == core)
+					return i;
+				count++;
+			}
+		}
+		i++;
+	} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
+
+	return -1;      /* cannot identify the cluster */
+}
+
+u32 fsl_qoriq_core_to_type(unsigned int core)
+{
+	struct ccsr_gur __iomem *gur =
+		(void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
+	int i = 0, count = 0;
+	u32 cluster, type;
+
+	do {
+		int j;
+		cluster = in_le32(&gur->tp_cluster[i].lower);
+		for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
+			type = initiator_type(cluster, j);
+			if (type) {
+				if (count == core)
+					return type;
+				count++;
+			}
+		}
+		i++;
+	} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
+
+	return -1;      /* cannot identify the cluster */
+}
+
+#ifdef CONFIG_DISPLAY_CPUINFO
+int print_cpuinfo(void)
+{
+	struct sys_info sysinfo;
+	char buf[32];
+	unsigned int i, core;
+	u32 type;
+
+	get_sys_info(&sysinfo);
+	puts("Clock Configuration:");
+	for_each_cpu(i, core, cpu_numcores(), cpu_mask()) {
+		if (!(i % 3))
+			puts("\n       ");
+		type = TP_ITYP_VER(fsl_qoriq_core_to_type(core));
+		printf("CPU%d(%s):%-4s MHz  ", core,
+		       type == TY_ITYP_VER_A7 ? "A7 " :
+		       (type == TY_ITYP_VER_A53 ? "A53" :
+			(type == TY_ITYP_VER_A57 ? "A57" : "   ")),
+		       strmhz(buf, sysinfo.freq_processor[core]));
+	}
+	printf("\n       Bus:      %-4s MHz  ",
+	       strmhz(buf, sysinfo.freq_systembus));
+	printf("DDR:      %-4s MHz", strmhz(buf, sysinfo.freq_ddrbus));
+	puts("\n");
+
+	return 0;
+}
+#endif
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
new file mode 100644
index 0000000..28544d7
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
@@ -0,0 +1,7 @@
+/*
+ * Copyright 2014, Freescale Semiconductor
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+int fsl_qoriq_core_to_cluster(unsigned int core);
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
new file mode 100644
index 0000000..ad32b6c
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
@@ -0,0 +1,65 @@
+/*
+ * (C) Copyright 2014 Freescale Semiconductor
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ *
+ * Extracted from armv8/start.S
+ */
+
+#include <config.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+
+ENTRY(lowlevel_init)
+	mov	x29, lr			/* Save LR */
+
+	/* Set the SMMU page size in the sACR register */
+	ldr	x1, =SMMU_BASE
+	ldr	w0, [x1, #0x10]
+	orr	w0, w0, #1 << 16  /* set sACR.pagesize to indicate 64K page */
+	str	w0, [x1, #0x10]
+
+	/* Initialize GIC Secure Bank Status */
+#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
+	branch_if_slave x0, 1f
+	ldr	x0, =GICD_BASE
+	bl	gic_init_secure
+1:
+#ifdef CONFIG_GICV3
+	ldr	x0, =GICR_BASE
+	bl	gic_init_secure_percpu
+#elif defined(CONFIG_GICV2)
+	ldr	x0, =GICD_BASE
+	ldr	x1, =GICC_BASE
+	bl	gic_init_secure_percpu
+#endif
+#endif
+
+	branch_if_master x0, x1, 1f
+
+	/*
+	 * Slave should wait for master clearing spin table.
+	 * This sync prevent salves observing incorrect
+	 * value of spin table and jumping to wrong place.
+	 */
+#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
+#ifdef CONFIG_GICV2
+	ldr	x0, =GICC_BASE
+#endif
+	bl	gic_wait_for_interrupt
+#endif
+
+	/*
+	 * All processors will enter EL2 and optionally EL1.
+	 */
+	bl	armv8_switch_to_el2
+#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
+	bl	armv8_switch_to_el1
+#endif
+	b	2f
+
+1:
+2:
+	mov	lr, x29			/* Restore LR */
+	ret
+ENDPROC(lowlevel_init)
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.c b/arch/arm/cpu/armv8/fsl-lsch3/speed.c
new file mode 100644
index 0000000..dc4a34b
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2014, Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ *
+ * Derived from arch/power/cpu/mpc85xx/speed.c
+ */
+
+#include <common.h>
+#include <linux/compiler.h>
+#include <fsl_ifc.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/arch-fsl-lsch3/immap_lsch3.h>
+#include <asm/arch/clock.h>
+#include "cpu.h"
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#ifndef CONFIG_SYS_FSL_NUM_CC_PLLS
+#define CONFIG_SYS_FSL_NUM_CC_PLLS	6
+#endif
+
+
+void get_sys_info(struct sys_info *sys_info)
+{
+	struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
+#ifdef CONFIG_FSL_IFC
+	struct fsl_ifc *ifc_regs = (void *)CONFIG_SYS_IFC_ADDR;
+	u32 ccr;
+#endif
+	struct ccsr_clk_cluster_group __iomem *clk_grp[2] = {
+		(void *)(CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR),
+		(void *)(CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR)
+	};
+	struct ccsr_clk_ctrl __iomem *clk_ctrl =
+		(void *)(CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR);
+	unsigned int cpu;
+	const u8 core_cplx_pll[16] = {
+		[0] = 0,	/* CC1 PPL / 1 */
+		[1] = 0,	/* CC1 PPL / 2 */
+		[2] = 0,	/* CC1 PPL / 4 */
+		[4] = 1,	/* CC2 PPL / 1 */
+		[5] = 1,	/* CC2 PPL / 2 */
+		[6] = 1,	/* CC2 PPL / 4 */
+		[8] = 2,	/* CC3 PPL / 1 */
+		[9] = 2,	/* CC3 PPL / 2 */
+		[10] = 2,	/* CC3 PPL / 4 */
+		[12] = 3,	/* CC4 PPL / 1 */
+		[13] = 3,	/* CC4 PPL / 2 */
+		[14] = 3,	/* CC4 PPL / 4 */
+	};
+
+	const u8 core_cplx_pll_div[16] = {
+		[0] = 1,	/* CC1 PPL / 1 */
+		[1] = 2,	/* CC1 PPL / 2 */
+		[2] = 4,	/* CC1 PPL / 4 */
+		[4] = 1,	/* CC2 PPL / 1 */
+		[5] = 2,	/* CC2 PPL / 2 */
+		[6] = 4,	/* CC2 PPL / 4 */
+		[8] = 1,	/* CC3 PPL / 1 */
+		[9] = 2,	/* CC3 PPL / 2 */
+		[10] = 4,	/* CC3 PPL / 4 */
+		[12] = 1,	/* CC4 PPL / 1 */
+		[13] = 2,	/* CC4 PPL / 2 */
+		[14] = 4,	/* CC4 PPL / 4 */
+	};
+
+	uint i, cluster;
+	uint freq_c_pll[CONFIG_SYS_FSL_NUM_CC_PLLS];
+	uint ratio[CONFIG_SYS_FSL_NUM_CC_PLLS];
+	unsigned long sysclk = CONFIG_SYS_CLK_FREQ;
+	int cc_group[12] = CONFIG_SYS_FSL_CLUSTER_CLOCKS;
+	u32 c_pll_sel, cplx_pll;
+	void *offset;
+
+	sys_info->freq_systembus = sysclk;
+#ifdef CONFIG_DDR_CLK_FREQ
+	sys_info->freq_ddrbus = CONFIG_DDR_CLK_FREQ;
+#else
+	sys_info->freq_ddrbus = sysclk;
+#endif
+
+	sys_info->freq_systembus *= (in_le32(&gur->rcwsr[0]) >>
+			FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_SHIFT) &
+			FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_MASK;
+	sys_info->freq_ddrbus *= (in_le32(&gur->rcwsr[0]) >>
+			FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_SHIFT) &
+			FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_MASK;
+
+	for (i = 0; i < CONFIG_SYS_FSL_NUM_CC_PLLS; i++) {
+		/*
+		 * fixme: prefer to combine the following into one line, but
+		 * cannot pass compiling without warning about in_le32.
+		 */
+		offset = (void *)((size_t)clk_grp[i/3] +
+			 offsetof(struct ccsr_clk_cluster_group,
+				  pllngsr[i%3].gsr));
+		ratio[i] = (in_le32(offset) >> 1) & 0x3f;
+		if (ratio[i] > 4)
+			freq_c_pll[i] = sysclk * ratio[i];
+		else
+			freq_c_pll[i] = sys_info->freq_systembus * ratio[i];
+	}
+
+	for_each_cpu(i, cpu, cpu_numcores(), cpu_mask()) {
+		cluster = fsl_qoriq_core_to_cluster(cpu);
+		c_pll_sel = (in_le32(&clk_ctrl->clkcncsr[cluster].csr) >> 27)
+			    & 0xf;
+		cplx_pll = core_cplx_pll[c_pll_sel];
+		cplx_pll += cc_group[cluster] - 1;
+		sys_info->freq_processor[cpu] =
+			freq_c_pll[cplx_pll] / core_cplx_pll_div[c_pll_sel];
+	}
+
+#if defined(CONFIG_FSL_IFC)
+	ccr = in_le32(&ifc_regs->ifc_ccr);
+	ccr = ((ccr & IFC_CCR_CLK_DIV_MASK) >> IFC_CCR_CLK_DIV_SHIFT) + 1;
+
+	sys_info->freq_localbus = sys_info->freq_systembus / ccr;
+#endif
+}
+
+
+int get_clocks(void)
+{
+	struct sys_info sys_info;
+	get_sys_info(&sys_info);
+	gd->cpu_clk = sys_info.freq_processor[0];
+	gd->bus_clk = sys_info.freq_systembus;
+	gd->mem_clk = sys_info.freq_ddrbus;
+
+#if defined(CONFIG_FSL_ESDHC)
+	gd->arch.sdhc_clk = gd->bus_clk / 2;
+#endif /* defined(CONFIG_FSL_ESDHC) */
+
+	if (gd->cpu_clk != 0)
+		return 0;
+	else
+		return 1;
+}
+
+/********************************************
+ * get_bus_freq
+ * return system bus freq in Hz
+ *********************************************/
+ulong get_bus_freq(ulong dummy)
+{
+	if (!gd->bus_clk)
+		get_clocks();
+
+	return gd->bus_clk;
+}
+
+/********************************************
+ * get_ddr_freq
+ * return ddr bus freq in Hz
+ *********************************************/
+ulong get_ddr_freq(ulong dummy)
+{
+	if (!gd->mem_clk)
+		get_clocks();
+
+	return gd->mem_clk;
+}
+
+unsigned int mxc_get_clock(enum mxc_clock clk)
+{
+	switch (clk) {
+	case MXC_I2C_CLK:
+		return get_bus_freq(0) / 2;
+	default:
+		printf("Unsupported clock\n");
+	}
+	return 0;
+}
diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.h b/arch/arm/cpu/armv8/fsl-lsch3/speed.h
new file mode 100644
index 0000000..15af5b9
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.h
@@ -0,0 +1,7 @@
+/*
+ * Copyright 2014, Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+void get_sys_info(struct sys_info *sys_info);
diff --git a/arch/arm/include/asm/arch-fsl-lsch3/clock.h b/arch/arm/include/asm/arch-fsl-lsch3/clock.h
new file mode 100644
index 0000000..831af0b
--- /dev/null
+++ b/arch/arm/include/asm/arch-fsl-lsch3/clock.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ *
+ */
+
+#ifndef __ASM_ARCH_FSL_LSCH3_CLOCK_H_
+#define __ASM_ARCH_FSL_LSCH3_CLOCK_H_
+
+#include <common.h>
+
+enum mxc_clock {
+	MXC_ARM_CLK = 0,
+	MXC_BUS_CLK,
+	MXC_UART_CLK,
+	MXC_ESDHC_CLK,
+	MXC_I2C_CLK,
+};
+
+unsigned int mxc_get_clock(enum mxc_clock clk);
+
+#endif /* __ASM_ARCH_FSL_LSCH3_CLOCK_H_ */
diff --git a/arch/arm/include/asm/arch-fsl-lsch3/config.h b/arch/arm/include/asm/arch-fsl-lsch3/config.h
new file mode 100644
index 0000000..c987a19
--- /dev/null
+++ b/arch/arm/include/asm/arch-fsl-lsch3/config.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2014, Freescale Semiconductor
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _ASM_ARMV8_FSL_LSCH3_CONFIG_
+#define _ASM_ARMV8_FSL_LSCH3_CONFIG_
+
+#include <fsl_ddrc_version.h>
+
+#define CONFIG_SYS_FSL_OCRAM_BASE	0x18000000	/* initial RAM */
+/* Link Definitions */
+#define CONFIG_SYS_INIT_SP_ADDR		(CONFIG_SYS_FSL_OCRAM_BASE + 0xfff0)
+
+#define CONFIG_SYS_IMMR				0x01000000
+#define CONFIG_SYS_FSL_DDR_ADDR			(CONFIG_SYS_IMMR + 0x00080000)
+#define CONFIG_SYS_FSL_DDR2_ADDR		(CONFIG_SYS_IMMR + 0x00090000)
+#define CONFIG_SYS_FSL_GUTS_ADDR		(CONFIG_SYS_IMMR + 0x00E00000)
+#define CONFIG_SYS_FSL_PMU_ADDR			(CONFIG_SYS_IMMR + 0x00E30000)
+#define CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR	(CONFIG_SYS_IMMR + 0x00300000)
+#define CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR	(CONFIG_SYS_IMMR + 0x00310000)
+#define CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR	(CONFIG_SYS_IMMR + 0x00370000)
+#define CONFIG_SYS_IFC_ADDR			(CONFIG_SYS_IMMR + 0x01240000)
+#define CONFIG_SYS_NS16550_COM1			(CONFIG_SYS_IMMR + 0x011C0500)
+#define CONFIG_SYS_NS16550_COM2			(CONFIG_SYS_IMMR + 0x011C0600)
+#define CONFIG_SYS_FSL_TIMER_ADDR		0x023d0000
+#define CONFIG_SYS_FSL_PMU_CLTBENR		(CONFIG_SYS_FSL_PMU_ADDR + \
+						 0x18A0)
+
+#define I2C1_BASE_ADDR				(CONFIG_SYS_IMMR + 0x01000000)
+#define I2C2_BASE_ADDR				(CONFIG_SYS_IMMR + 0x01010000)
+#define I2C3_BASE_ADDR				(CONFIG_SYS_IMMR + 0x01020000)
+#define I2C4_BASE_ADDR				(CONFIG_SYS_IMMR + 0x01030000)
+
+/* Generic Interrupt Controller Definitions */
+#define GICD_BASE		0x06000000
+#define GICR_BASE		0x06100000
+
+/* SMMU Defintions */
+#define SMMU_BASE		0x05000000 /* GR0 Base */
+
+/* DDR */
+#define CONFIG_SYS_FSL_DDR_LE
+#define CONFIG_VERY_BIG_RAM
+#define CONFIG_SYS_FSL_DDRC_ARM_GEN3	/* Enable Freescale ARM DDR3 driver */
+#define CONFIG_SYS_FSL_DDR		/* Freescale DDR driver */
+#define CONFIG_SYS_LS2_DDR_BLOCK1_SIZE	((phys_size_t)2 << 30)
+#define CONFIG_MAX_MEM_MAPPED		CONFIG_SYS_LS2_DDR_BLOCK1_SIZE
+#define CONFIG_SYS_FSL_DDR_VER		FSL_DDR_VER_5_0
+
+
+/* IFC */
+#define CONFIG_SYS_FSL_IFC_LE
+
+#ifdef CONFIG_LS2100A
+#define CONFIG_MAX_CPUS				16
+#define CONFIG_SYS_FSL_IFC_BANK_COUNT		8
+#define CONFIG_NUM_DDR_CONTROLLERS		2
+#define CONFIG_SYS_FSL_CLUSTER_CLOCKS		{ 1, 1, 4, 4 }
+#else
+#error SoC not defined
+#endif
+
+#endif /* _ASM_ARMV8_FSL_LSCH3_CONFIG_ */
diff --git a/arch/arm/include/asm/arch-fsl-lsch3/gpio.h b/arch/arm/include/asm/arch-fsl-lsch3/gpio.h
new file mode 100644
index 0000000..f23a78c
--- /dev/null
+++ b/arch/arm/include/asm/arch-fsl-lsch3/gpio.h
@@ -0,0 +1,9 @@
+/*
+ * Copyright 2014, Freescale Semiconductor
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _ASM_ARMV8_FSL_LSCH3_GPIO_H_
+#define _ASM_ARMV8_FSL_LSCH3_GPIO_H_
+#endif	/* _ASM_ARMV8_FSL_LSCH3_GPIO_H_ */
diff --git a/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h b/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h
new file mode 100644
index 0000000..18e66bd
--- /dev/null
+++ b/arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h
@@ -0,0 +1,116 @@
+/*
+ * LayerScape Internal Memory Map
+ *
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef __ARCH_FSL_LSCH3_IMMAP_H
+#define __ARCH_FSL_LSCH3_IMMAP_H_
+
+/* This is chassis generation 3 */
+
+struct sys_info {
+	unsigned long freq_processor[CONFIG_MAX_CPUS];
+	unsigned long freq_systembus;
+	unsigned long freq_ddrbus;
+	unsigned long freq_localbus;
+	unsigned long freq_qe;
+#ifdef CONFIG_SYS_DPAA_FMAN
+	unsigned long freq_fman[CONFIG_SYS_NUM_FMAN];
+#endif
+#ifdef CONFIG_SYS_DPAA_QBMAN
+	unsigned long freq_qman;
+#endif
+#ifdef CONFIG_SYS_DPAA_PME
+	unsigned long freq_pme;
+#endif
+};
+
+/* Global Utilities Block */
+struct ccsr_gur {
+	u32	porsr1;		/* POR status 1 */
+	u32	porsr2;		/* POR status 2 */
+	u8	res_008[0x20-0x8];
+	u32	gpporcr1;	/* General-purpose POR configuration */
+	u32	gpporcr2;	/* General-purpose POR configuration 2 */
+	u32	dcfg_fusesr;	/* Fuse status register */
+	u32	gpporcr3;
+	u32	gpporcr4;
+	u8	res_034[0x70-0x34];
+	u32	devdisr;	/* Device disable control */
+	u32	devdisr2;	/* Device disable control 2 */
+	u32	devdisr3;	/* Device disable control 3 */
+	u32	devdisr4;	/* Device disable control 4 */
+	u32	devdisr5;	/* Device disable control 5 */
+	u32	devdisr6;	/* Device disable control 6 */
+	u32	devdisr7;	/* Device disable control 7 */
+	u8	res_08c[0x90-0x8c];
+	u32	coredisru;	/* uppper portion for support of 64 cores */
+	u32	coredisrl;	/* lower portion for support of 64 cores */
+	u8	res_098[0xa0-0x98];
+	u32	pvr;		/* Processor version */
+	u32	svr;		/* System version */
+	u32	mvr;		/* Manufacturing version */
+	u8	res_0ac[0x100-0xac];
+	u32	rcwsr[32];	/* Reset control word status */
+
+#define FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_SHIFT	2
+#define FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_MASK	0x1f
+#define FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_SHIFT	10
+#define FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_MASK	0x3f
+	u8	res_180[0x200-0x180];
+	u32	scratchrw[32];	/* Scratch Read/Write */
+	u8	res_280[0x300-0x280];
+	u32	scratchw1r[4];	/* Scratch Read (Write once) */
+	u8	res_310[0x400-0x310];
+	u32	bootlocptrl;	/* Boot location pointer low-order addr */
+	u32	bootlocptrh;	/* Boot location pointer high-order addr */
+	u8	res_408[0x500-0x408];
+	u8	res_500[0x740-0x500];	/* add more registers when needed */
+	u32	tp_ityp[64];	/* Topology Initiator Type Register */
+	struct {
+		u32	upper;
+		u32	lower;
+	} tp_cluster[3];	/* Core Cluster n Topology Register */
+	u8	res_858[0x1000-0x858];
+};
+
+#define TP_ITYP_AV		0x00000001	/* Initiator available */
+#define TP_ITYP_TYPE(x)	(((x) & 0x6) >> 1)	/* Initiator Type */
+#define TP_ITYP_TYPE_ARM	0x0
+#define TP_ITYP_TYPE_PPC	0x1		/* PowerPC */
+#define TP_ITYP_TYPE_OTHER	0x2		/* StarCore DSP */
+#define TP_ITYP_TYPE_HA		0x3		/* HW Accelerator */
+#define TP_ITYP_THDS(x)	(((x) & 0x18) >> 3)	/* # threads */
+#define TP_ITYP_VER(x)	(((x) & 0xe0) >> 5)	/* Initiator Version */
+#define TY_ITYP_VER_A7		0x1
+#define TY_ITYP_VER_A53		0x2
+#define TY_ITYP_VER_A57		0x3
+
+#define TP_CLUSTER_EOC		0x80000000	/* end of clusters */
+#define TP_CLUSTER_INIT_MASK	0x0000003f	/* initiator mask */
+#define TP_INIT_PER_CLUSTER     4
+
+struct ccsr_clk_cluster_group {
+	struct {
+		u8	res_00[0x10];
+		u32	csr;
+		u8	res_14[0x20-0x14];
+	} hwncsr[3];
+	u8	res_60[0x80-0x60];
+	struct {
+		u32	gsr;
+		u8	res_84[0xa0-0x84];
+	} pllngsr[3];
+	u8	res_e0[0x100-0xe0];
+};
+
+struct ccsr_clk_ctrl {
+	struct {
+		u32 csr;	/* core cluster n clock control status */
+		u8  res_04[0x20-0x04];
+	} clkcncsr[8];
+};
+#endif /* __ARCH_FSL_LSCH3_IMMAP_H */
diff --git a/arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h b/arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h
new file mode 100644
index 0000000..8f00535
--- /dev/null
+++ b/arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ *
+ */
+
+#ifndef __ASM_ARCH_FSL_LSCH3_IMX_REGS_H_
+#define __ASM_ARCH_FSL_LSCH3_IMX_REGS_H_
+
+#define I2C_QUIRK_REG	/* enable 8-bit driver */
+
+#endif /* __ASM_ARCH_FSL_LSCH3_IMX_REGS_H_ */
diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
index 2a20a77..d3433da 100644
--- a/arch/arm/include/asm/config.h
+++ b/arch/arm/include/asm/config.h
@@ -17,4 +17,8 @@
 #define CONFIG_STATIC_RELA
 #endif
 
+#ifdef CONFIG_FSL_LSCH3
+#include <asm/arch-fsl-lsch3/config.h>
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 74ee9a4..d51ba66 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -78,6 +78,8 @@
 void wait_for_wakeup(void);
 void smp_kick_all_cpus(void);
 
+void flush_l3_cache(void);
+
 #endif	/* __ASSEMBLY__ */
 
 #else /* CONFIG_ARM64 */
diff --git a/drivers/i2c/mxc_i2c.c b/drivers/i2c/mxc_i2c.c
index 48468d7..c14797c 100644
--- a/drivers/i2c/mxc_i2c.c
+++ b/drivers/i2c/mxc_i2c.c
@@ -429,6 +429,11 @@
 	(void *)I2C3_BASE_ADDR
 #elif defined(CONFIG_VF610)
 	(void *)I2C0_BASE_ADDR
+#elif defined(CONFIG_FSL_LSCH3)
+	(void *)I2C1_BASE_ADDR,
+	(void *)I2C2_BASE_ADDR,
+	(void *)I2C3_BASE_ADDR,
+	(void *)I2C4_BASE_ADDR
 #else
 #error "architecture not supported"
 #endif
diff --git a/include/common.h b/include/common.h
index cc74633..2e5a6d3 100644
--- a/include/common.h
+++ b/include/common.h
@@ -687,9 +687,6 @@
 ulong	get_bus_freq  (ulong);
 int get_serial_clock(void);
 
-#if defined(CONFIG_MPC83xx) || defined(CONFIG_MPC85xx)
-ulong get_ddr_freq(ulong);
-#endif
 #if defined(CONFIG_MPC85xx)
 typedef MPC85xx_SYS_INFO sys_info_t;
 void	get_sys_info  ( sys_info_t * );
@@ -705,6 +702,8 @@
 {
 	return get_bus_freq(dummy);
 }
+#else
+ulong get_ddr_freq(ulong);
 #endif
 
 #if defined(CONFIG_4xx)