Merge branch 'master' of git://git.denx.de/u-boot-samsung
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
index d846236..ab8c089 100644
--- a/arch/arm/cpu/armv8/cache.S
+++ b/arch/arm/cpu/armv8/cache.S
@@ -112,7 +112,7 @@
 
 ENTRY(__asm_invalidate_dcache_all)
 	mov	x16, lr
-	mov	x0, #0xffff
+	mov	x0, #0x1
 	bl	__asm_dcache_all
 	mov	lr, x16
 	ret
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index 6bde1cf..b1ea822 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -59,15 +59,15 @@
 	el = current_el();
 	if (el == 1) {
 		set_ttbr_tcr_mair(el, gd->arch.tlb_addr,
-				  TCR_FLAGS | TCR_EL1_IPS_BITS,
+				  TCR_EL1_RSVD | TCR_FLAGS | TCR_EL1_IPS_BITS,
 				  MEMORY_ATTRIBUTES);
 	} else if (el == 2) {
 		set_ttbr_tcr_mair(el, gd->arch.tlb_addr,
-				  TCR_FLAGS | TCR_EL2_IPS_BITS,
+				  TCR_EL2_RSVD | TCR_FLAGS | TCR_EL2_IPS_BITS,
 				  MEMORY_ATTRIBUTES);
 	} else {
 		set_ttbr_tcr_mair(el, gd->arch.tlb_addr,
-				  TCR_FLAGS | TCR_EL3_IPS_BITS,
+				  TCR_EL3_RSVD | TCR_FLAGS | TCR_EL3_IPS_BITS,
 				  MEMORY_ATTRIBUTES);
 	}
 	/* enable the mmu */
diff --git a/arch/arm/include/asm/arch-mx6/iomux.h b/arch/arm/include/asm/arch-mx6/iomux.h
index 9b3a91f..907cb40 100644
--- a/arch/arm/include/asm/arch-mx6/iomux.h
+++ b/arch/arm/include/asm/arch-mx6/iomux.h
@@ -18,6 +18,8 @@
 #define IOMUXC_GPR1_REF_SSP_EN			(1 << 16)
 #define IOMUXC_GPR1_TEST_POWERDOWN		(1 << 18)
 
+#define IOMUXC_GPR1_PCIE_SW_RST		(1 << 29)
+
 /*
  * IOMUXC_GPR5 bit fields
  */
diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
index 0c928d4..587ee39 100644
--- a/arch/arm/include/asm/armv8/mmu.h
+++ b/arch/arm/include/asm/armv8/mmu.h
@@ -103,13 +103,17 @@
 #define TCR_EL2_IPS_BITS	(3 << 16)	/* 42 bits physical address */
 #define TCR_EL3_IPS_BITS	(3 << 16)	/* 42 bits physical address */
 
-/* PTWs cacheable, inner/outer WBWA and non-shareable */
+/* PTWs cacheable, inner/outer WBWA and inner shareable */
 #define TCR_FLAGS		(TCR_TG0_64K |		\
-				TCR_SHARED_NON |	\
+				TCR_SHARED_INNER |	\
 				TCR_ORGN_WBWA |		\
 				TCR_IRGN_WBWA |		\
 				TCR_T0SZ(VA_BITS))
 
+#define TCR_EL1_RSVD		(1 << 31)
+#define TCR_EL2_RSVD		(1 << 31 | 1 << 23)
+#define TCR_EL3_RSVD		(1 << 31 | 1 << 23)
+
 #ifndef __ASSEMBLY__
 
 void set_pgtable_section(u64 *page_table, u64 index,
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 868ea54..cfc7834 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -194,7 +194,7 @@
 static inline unsigned int get_cr(void)
 {
 	unsigned int val;
-	asm("mrc p15, 0, %0, c1, c0, 0	@ get CR" : "=r" (val) : : "cc");
+	asm volatile("mrc p15, 0, %0, c1, c0, 0	@ get CR" : "=r" (val) : : "cc");
 	return val;
 }
 
diff --git a/arch/arm/lib/crt0.S b/arch/arm/lib/crt0.S
index afd4f10..4c3a94a 100644
--- a/arch/arm/lib/crt0.S
+++ b/arch/arm/lib/crt0.S
@@ -25,7 +25,8 @@
  *    the GD ('global data') structure, both located in some readily
  *    available RAM (SRAM, locked cache...). In this context, VARIABLE
  *    global data, initialized or not (BSS), are UNAVAILABLE; only
- *    CONSTANT initialized data are available.
+ *    CONSTANT initialized data are available. GD should be zeroed
+ *    before board_init_f() is called.
  *
  * 2. Call board_init_f(). This function prepares the hardware for
  *    execution from system RAM (DRAM, DDR...) As system RAM may not
@@ -34,24 +35,29 @@
  *    data include the relocation destination, the future stack, and
  *    the future GD location.
  *
- * (the following applies only to non-SPL builds)
- *
  * 3. Set up intermediate environment where the stack and GD are the
  *    ones allocated by board_init_f() in system RAM, but BSS and
  *    initialized non-const data are still not available.
  *
+ * 4a.For U-Boot proper (not SPL), call relocate_code(). This function
+ *    relocates U-Boot from its current location into the relocation
+ *    destination computed by board_init_f().
+ *
- * 4. Call relocate_code(). This function relocates U-Boot from its
- *    current location into the relocation destination computed by
- *    board_init_f().
+ * 4b.For SPL, board_init_f() just returns (to crt0). There is no
+ *    code relocation in SPL.
  *
  * 5. Set up final environment for calling board_init_r(). This
  *    environment has BSS (initialized to 0), initialized non-const
  *    data (initialized to their intended value), and stack in system
- *    RAM. GD has retained values set by board_init_f(). Some CPUs
- *    have some work left to do at this point regarding memory, so
- *    call c_runtime_cpu_setup.
+ *    RAM (for SPL moving the stack and GD into RAM is optional - see
+ *    CONFIG_SPL_STACK_R). GD has retained values set by board_init_f().
+ *
+ * 6. For U-Boot proper (not SPL), some CPUs have some work left to do
+ *    at this point regarding memory, so call c_runtime_cpu_setup.
+ *
+ * 7. Branch to board_init_r().
  *
- * 6. Branch to board_init_r().
+ * For more information see 'Board Initialisation Flow in README.
  */
 
 /*
diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S
index 98a906e..8b34e04 100644
--- a/arch/arm/lib/crt0_64.S
+++ b/arch/arm/lib/crt0_64.S
@@ -27,7 +27,8 @@
  *    the GD ('global data') structure, both located in some readily
  *    available RAM (SRAM, locked cache...). In this context, VARIABLE
  *    global data, initialized or not (BSS), are UNAVAILABLE; only
- *    CONSTANT initialized data are available.
+ *    CONSTANT initialized data are available. GD should be zeroed
+ *    before board_init_f() is called.
  *
  * 2. Call board_init_f(). This function prepares the hardware for
  *    execution from system RAM (DRAM, DDR...) As system RAM may not
@@ -36,24 +37,31 @@
  *    data include the relocation destination, the future stack, and
  *    the future GD location.
  *
- * (the following applies only to non-SPL builds)
- *
  * 3. Set up intermediate environment where the stack and GD are the
  *    ones allocated by board_init_f() in system RAM, but BSS and
  *    initialized non-const data are still not available.
  *
- * 4. Call relocate_code(). This function relocates U-Boot from its
- *    current location into the relocation destination computed by
- *    board_init_f().
+ * 4a.For U-Boot proper (not SPL), call relocate_code(). This function
+ *    relocates U-Boot from its current location into the relocation
+ *    destination computed by board_init_f().
+ *
+ * 4b.For SPL, board_init_f() just returns (to crt0). There is no
+ *    code relocation in SPL.
  *
  * 5. Set up final environment for calling board_init_r(). This
  *    environment has BSS (initialized to 0), initialized non-const
  *    data (initialized to their intended value), and stack in system
- *    RAM. GD has retained values set by board_init_f(). Some CPUs
- *    have some work left to do at this point regarding memory, so
- *    call c_runtime_cpu_setup.
+ *    RAM (for SPL moving the stack and GD into RAM is optional - see
+ *    CONFIG_SPL_STACK_R). GD has retained values set by board_init_f().
+ *
+ * TODO: For SPL, implement stack relocation on AArch64.
  *
- * 6. Branch to board_init_r().
+ * 6. For U-Boot proper (not SPL), some CPUs have some work left to do
+ *    at this point regarding memory, so call c_runtime_cpu_setup.
+ *
+ * 7. Branch to board_init_r().
+ *
+ * For more information see 'Board Initialisation Flow in README.
  */
 
 ENTRY(_main)
@@ -106,6 +114,8 @@
  */
 	bl	c_runtime_cpu_setup		/* still call old routine */
 
+/* TODO: For SPL, call spl_relocate_stack_gd() to alloc stack relocation */
+
 /*
  * Clear BSS section
  */
diff --git a/arch/arm/lib/gic_64.S b/arch/arm/lib/gic_64.S
index a3e18f7..62d0022 100644
--- a/arch/arm/lib/gic_64.S
+++ b/arch/arm/lib/gic_64.S
@@ -46,11 +46,19 @@
 	ldr	w9, [x0, GICD_TYPER]
 	and	w10, w9, #0x1f		/* ITLinesNumber */
 	cbz	w10, 1f			/* No SPIs */
-	add	x11, x0, (GICD_IGROUPRn + 4)
+	add	x11, x0, GICD_IGROUPRn
 	mov	w9, #~0			/* Config SPIs as Grp1 */
+	str	w9, [x11], #0x4
 0:	str	w9, [x11], #0x4
 	sub	w10, w10, #0x1
 	cbnz	w10, 0b
+
+	ldr	x1, =GICC_BASE		/* GICC_CTLR */
+	mov	w0, #3			/* EnableGrp0 | EnableGrp1 */
+	str	w0, [x1]
+
+	mov	w0, #1 << 7		/* allow NS access to GICC_PMR */
+	str	w0, [x1, #4]		/* GICC_PMR */
 #endif
 1:
 	ret
diff --git a/board/toradex/colibri_vf/colibri_vf.c b/board/toradex/colibri_vf/colibri_vf.c
index 39bf0ac..a6d1c5b 100644
--- a/board/toradex/colibri_vf/colibri_vf.c
+++ b/board/toradex/colibri_vf/colibri_vf.c
@@ -119,7 +119,7 @@
 		.trcd_int          = 6,
 		.tras_lockout      = 0,
 		.tdal              = 12,
-		.bstlen            = 0,
+		.bstlen            = 3,
 		.tdll              = 512,
 		.trp_ab            = 6,
 		.tref              = 3120,
diff --git a/doc/README.uniphier b/doc/README.uniphier
index 6ba0320..57b947b 100644
--- a/doc/README.uniphier
+++ b/doc/README.uniphier
@@ -129,10 +129,10 @@
 
  BKSZ    Description              RAM slot            Peripherals
  --------------------------------------------------------------------
- 0b00   15MB RAM / 1MB Peri    00000000-0effffff    0f000000-0fffffff
- 0b01   31MB RAM / 1MB Peri    00000000-1effffff    1f000000-1fffffff
- 0b10   64MB RAM / 1MB Peri    00000000-3effffff    3f000000-3fffffff
- 0b11  127MB RAM / 1MB Peri    00000000-7effffff    7f000000-7fffffff
+ 0b00   15MB RAM / 1MB Peri    00000000-00efffff    00f00000-00ffffff
+ 0b01   31MB RAM / 1MB Peri    00000000-01efffff    01f00000-01ffffff
+ 0b10   64MB RAM / 1MB Peri    00000000-03efffff    03f00000-03ffffff
+ 0b11  127MB RAM / 1MB Peri    00000000-07efffff    07f00000-07ffffff
 
 Set BSKZ[1:0] to 0b01 for U-Boot.
 This mode is the most handy because EA[24] is always supported by the save pin
diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index 5c11ac9..06266f3 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2009-2014 Freescale Semiconductor, Inc. and others
+ * Copyright 2009-2015 Freescale Semiconductor, Inc. and others
  *
  * Description: MPC5125, VF610, MCF54418 and Kinetis K70 Nand driver.
  * Ported to U-Boot by Stefan Agner
@@ -19,9 +19,10 @@
  *
  * Limitations:
  * - Untested on MPC5125 and M54418.
- * - DMA not used.
+ * - DMA and pipelining not used.
  * - 2K pages or less.
- * - Only 2K page w. 64+OOB and hardware ECC.
+ * - HW ECC: Only 2K page with 64+ OOB.
+ * - HW ECC: Only 24 and 32-bit error correction implemented.
  */
 
 #include <common.h>
@@ -53,6 +54,7 @@
 
 #define PAGE_2K				0x0800
 #define OOB_64				0x0040
+#define OOB_MAX				0x0100
 
 /*
  * NFC_CMD2[CODE] values. See section:
@@ -127,32 +129,33 @@
 
 #define NFC_TIMEOUT	(1000)
 
-/* ECC status placed at end of buffers. */
-#define ECC_SRAM_ADDR	((PAGE_2K+256-8) >> 3)
-#define ECC_STATUS_MASK	0x80
-#define ECC_ERR_COUNT	0x3F
-
 /*
- * ECC status is stored at NFC_CFG[ECCADD] +4 for little-endian
- * and +7 for big-endian SOC.
+ * ECC status - seems to consume 8 bytes (double word). The documented
+ * status byte is located in the lowest byte of the second word (which is
+ * the 4th or 7th byte depending on endianness).
+ * Calculate an offset to store the ECC status at the end of the buffer.
  */
-#ifdef CONFIG_VF610
-#define ECC_OFFSET	4
-#else
-#define ECC_OFFSET	7
-#endif
+#define ECC_SRAM_ADDR		(PAGE_2K + OOB_MAX - 8)
+
+#define ECC_STATUS		0x4
+#define ECC_STATUS_MASK		0x80
+#define ECC_STATUS_ERR_COUNT	0x3F
+
+enum vf610_nfc_alt_buf {
+	ALT_BUF_DATA = 0,
+	ALT_BUF_ID = 1,
+	ALT_BUF_STAT = 2,
+	ALT_BUF_ONFI = 3,
+};
 
 struct vf610_nfc {
-	struct mtd_info	  *mtd;
-	struct nand_chip   chip;
-	void __iomem	  *regs;
-	uint               column;
+	struct mtd_info *mtd;
+	struct nand_chip chip;
+	void __iomem *regs;
+	uint buf_offset;
+	int write_sz;
 	/* Status and ID are in alternate locations. */
-	int                alt_buf;
-#define ALT_BUF_ID   1
-#define ALT_BUF_STAT 2
-#define ALT_BUF_ONFI 3
-	struct clk        *clk;
+	enum vf610_nfc_alt_buf alt_buf;
 };
 
 #define mtd_to_nfc(_mtd) \
@@ -170,8 +173,8 @@
 		   48, 49, 50, 51, 52, 53, 54, 55,
 		   56, 57, 58, 59, 60, 61, 62, 63},
 	.oobfree = {
-		{.offset = 8,
-		 .length = 11} }
+		{.offset = 2,
+		 .length = 17} }
 };
 #elif defined(CONFIG_SYS_NAND_VF610_NFC_60_ECC_BYTES)
 #define ECC_HW_MODE ECC_60_BYTE
@@ -226,8 +229,12 @@
 static inline void vf610_nfc_memcpy(void *dst, const void *src, size_t n)
 {
 	/*
-	 * Use this accessor for the interal SRAM buffers. On ARM we can
-	 * treat the SRAM buffer as if its memory, hence use memcpy
+	 * Use this accessor for the internal SRAM buffers. On the ARM
+	 * Freescale Vybrid SoC it's known that the driver can treat
+	 * the SRAM buffer as if it's memory. Other platform might need
+	 * to treat the buffers differently.
+	 *
+	 * For the time being, use memcpy
 	 */
 	memcpy(dst, src, n);
 }
@@ -242,7 +249,7 @@
 }
 
 /* Wait for complete operation */
-static inline void vf610_nfc_done(struct mtd_info *mtd)
+static void vf610_nfc_done(struct mtd_info *mtd)
 {
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 	uint start;
@@ -260,7 +267,7 @@
 
 	while (!(vf610_nfc_read(mtd, NFC_IRQ_STATUS) & IDLE_IRQ_BIT)) {
 		if (get_timer(start) > NFC_TIMEOUT) {
-			printf("Timeout while waiting for !BUSY.\n");
+			printf("Timeout while waiting for IDLE.\n");
 			return;
 		}
 	}
@@ -273,11 +280,13 @@
 
 	if (col < 4) {
 		flash_id = vf610_nfc_read(mtd, NFC_FLASH_STATUS1);
-		return (flash_id >> (3-col)*8) & 0xff;
+		flash_id >>= (3 - col) * 8;
 	} else {
 		flash_id = vf610_nfc_read(mtd, NFC_FLASH_STATUS2);
-		return flash_id >> 24;
+		flash_id >>= 24;
 	}
+
+	return flash_id & 0xff;
 }
 
 static u8 vf610_nfc_get_status(struct mtd_info *mtd)
@@ -345,26 +354,28 @@
 			      int column, int page)
 {
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
-	int page_sz = nfc->chip.options & NAND_BUSWIDTH_16 ? 1 : 0;
+	int trfr_sz = nfc->chip.options & NAND_BUSWIDTH_16 ? 1 : 0;
 
-	nfc->column = max(column, 0);
-	nfc->alt_buf = 0;
+	nfc->buf_offset = max(column, 0);
+	nfc->alt_buf = ALT_BUF_DATA;
 
 	switch (command) {
 	case NAND_CMD_SEQIN:
 		/* Use valid column/page from preread... */
 		vf610_nfc_addr_cycle(mtd, column, page);
+		nfc->buf_offset = 0;
+
 		/*
 		 * SEQIN => data => PAGEPROG sequence is done by the controller
 		 * hence we do not need to issue the command here...
 		 */
 		return;
 	case NAND_CMD_PAGEPROG:
-		page_sz += mtd->writesize + mtd->oobsize;
-		vf610_nfc_transfer_size(nfc->regs, page_sz);
+		trfr_sz += nfc->write_sz;
+		vf610_nfc_ecc_mode(mtd, ECC_HW_MODE);
+		vf610_nfc_transfer_size(nfc->regs, trfr_sz);
 		vf610_nfc_send_commands(nfc->regs, NAND_CMD_SEQIN,
 					command, PROGRAM_PAGE_CMD_CODE);
-		vf610_nfc_ecc_mode(mtd, ECC_HW_MODE);
 		break;
 
 	case NAND_CMD_RESET:
@@ -373,9 +384,9 @@
 		break;
 
 	case NAND_CMD_READOOB:
-		page_sz += mtd->oobsize;
+		trfr_sz += mtd->oobsize;
 		column = mtd->writesize;
-		vf610_nfc_transfer_size(nfc->regs, page_sz);
+		vf610_nfc_transfer_size(nfc->regs, trfr_sz);
 		vf610_nfc_send_commands(nfc->regs, NAND_CMD_READ0,
 					NAND_CMD_READSTART, READ_PAGE_CMD_CODE);
 		vf610_nfc_addr_cycle(mtd, column, page);
@@ -383,18 +394,18 @@
 		break;
 
 	case NAND_CMD_READ0:
-		page_sz += mtd->writesize + mtd->oobsize;
-		column = 0;
-		vf610_nfc_transfer_size(nfc->regs, page_sz);
+		trfr_sz += mtd->writesize + mtd->oobsize;
+		vf610_nfc_transfer_size(nfc->regs, trfr_sz);
+		vf610_nfc_ecc_mode(mtd, ECC_HW_MODE);
 		vf610_nfc_send_commands(nfc->regs, NAND_CMD_READ0,
 					NAND_CMD_READSTART, READ_PAGE_CMD_CODE);
 		vf610_nfc_addr_cycle(mtd, column, page);
-		vf610_nfc_ecc_mode(mtd, ECC_HW_MODE);
 		break;
 
 	case NAND_CMD_PARAM:
 		nfc->alt_buf = ALT_BUF_ONFI;
-		vf610_nfc_transfer_size(nfc->regs, 768);
+		trfr_sz = 3 * sizeof(struct nand_onfi_params);
+		vf610_nfc_transfer_size(nfc->regs, trfr_sz);
 		vf610_nfc_send_command(nfc->regs, NAND_CMD_PARAM,
 				       READ_ONFI_PARAM_CMD_CODE);
 		vf610_nfc_set_field(mtd, NFC_ROW_ADDR, ROW_ADDR_MASK,
@@ -411,7 +422,7 @@
 
 	case NAND_CMD_READID:
 		nfc->alt_buf = ALT_BUF_ID;
-		nfc->column = 0;
+		nfc->buf_offset = 0;
 		vf610_nfc_transfer_size(nfc->regs, 0);
 		vf610_nfc_send_command(nfc->regs, command, READ_ID_CMD_CODE);
 		vf610_nfc_set_field(mtd, NFC_ROW_ADDR, ROW_ADDR_MASK,
@@ -421,21 +432,22 @@
 	case NAND_CMD_STATUS:
 		nfc->alt_buf = ALT_BUF_STAT;
 		vf610_nfc_transfer_size(nfc->regs, 0);
-		vf610_nfc_send_command(nfc->regs, command,
-				       STATUS_READ_CMD_CODE);
+		vf610_nfc_send_command(nfc->regs, command, STATUS_READ_CMD_CODE);
 		break;
 	default:
 		return;
 	}
 
 	vf610_nfc_done(mtd);
+
+	nfc->write_sz = 0;
 }
 
 /* Read data from NFC buffers */
 static void vf610_nfc_read_buf(struct mtd_info *mtd, u_char *buf, int len)
 {
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
-	uint c = nfc->column;
+	uint c = nfc->buf_offset;
 
 	/* Alternate buffers are only supported through read_byte */
 	if (nfc->alt_buf)
@@ -443,28 +455,30 @@
 
 	vf610_nfc_memcpy(buf, nfc->regs + NFC_MAIN_AREA(0) + c, len);
 
-	nfc->column += len;
+	nfc->buf_offset += len;
 }
 
 /* Write data to NFC buffers */
-static void vf610_nfc_write_buf(struct mtd_info *mtd, const u_char *buf,
+static void vf610_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
 				int len)
 {
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
-	uint c = nfc->column;
+	uint c = nfc->buf_offset;
 	uint l;
 
-	l = min((uint)len, mtd->writesize + mtd->oobsize - c);
-	nfc->column += l;
+	l = min_t(uint, len, mtd->writesize + mtd->oobsize - c);
 	vf610_nfc_memcpy(nfc->regs + NFC_MAIN_AREA(0) + c, buf, l);
+
+	nfc->write_sz += l;
+	nfc->buf_offset += l;
 }
 
 /* Read byte from NFC buffers */
-static u8 vf610_nfc_read_byte(struct mtd_info *mtd)
+static uint8_t vf610_nfc_read_byte(struct mtd_info *mtd)
 {
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 	u8 tmp;
-	uint c = nfc->column;
+	uint c = nfc->buf_offset;
 
 	switch (nfc->alt_buf) {
 	case ALT_BUF_ID:
@@ -473,18 +487,17 @@
 	case ALT_BUF_STAT:
 		tmp = vf610_nfc_get_status(mtd);
 		break;
-	case ALT_BUF_ONFI:
 #ifdef __LITTLE_ENDIAN
+	case ALT_BUF_ONFI:
 		/* Reverse byte since the controller uses big endianness */
-		c = nfc->column ^ 0x3;
-		tmp = *((u8 *)(nfc->regs + NFC_MAIN_AREA(0) + c));
-		break;
+		c = nfc->buf_offset ^ 0x3;
+		/* fall-through */
 #endif
 	default:
 		tmp = *((u8 *)(nfc->regs + NFC_MAIN_AREA(0) + c));
 		break;
 	}
-	nfc->column++;
+	nfc->buf_offset++;
 	return tmp;
 }
 
@@ -492,6 +505,7 @@
 static u16 vf610_nfc_read_word(struct mtd_info *mtd)
 {
 	u16 tmp;
+
 	vf610_nfc_read_buf(mtd, (u_char *)&tmp, sizeof(tmp));
 	return tmp;
 }
@@ -511,12 +525,11 @@
 #ifdef CONFIG_VF610
 	u32 tmp = vf610_nfc_read(mtd, NFC_ROW_ADDR);
 	tmp &= ~(ROW_ADDR_CHIP_SEL_RB_MASK | ROW_ADDR_CHIP_SEL_MASK);
-	tmp |= 1 << ROW_ADDR_CHIP_SEL_RB_SHIFT;
 
-	if (chip == 0)
-		tmp |= 1 << ROW_ADDR_CHIP_SEL_SHIFT;
-	else if (chip == 1)
-		tmp |= 2 << ROW_ADDR_CHIP_SEL_SHIFT;
+	if (chip >= 0) {
+		tmp |= 1 << ROW_ADDR_CHIP_SEL_RB_SHIFT;
+		tmp |= (1 << chip) << ROW_ADDR_CHIP_SEL_SHIFT;
+	}
 
 	vf610_nfc_write(mtd, NFC_ROW_ADDR, tmp);
 #endif
@@ -537,52 +550,61 @@
 	return written_bits;
 }
 
-static inline int vf610_nfc_correct_data(struct mtd_info *mtd, u_char *dat)
+static inline int vf610_nfc_correct_data(struct mtd_info *mtd, uint8_t *dat,
+					 uint8_t *oob, int page)
 {
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+	u32 ecc_status_off = NFC_MAIN_AREA(0) + ECC_SRAM_ADDR + ECC_STATUS;
 	u8 ecc_status;
 	u8 ecc_count;
-	int flip;
+	int flips;
+	int flips_threshold = nfc->chip.ecc.strength / 2;
 
-	ecc_status = __raw_readb(nfc->regs + ECC_SRAM_ADDR * 8 + ECC_OFFSET);
-	ecc_count = ecc_status & ECC_ERR_COUNT;
+	ecc_status = vf610_nfc_read(mtd, ecc_status_off) & 0xff;
+	ecc_count = ecc_status & ECC_STATUS_ERR_COUNT;
+
 	if (!(ecc_status & ECC_STATUS_MASK))
 		return ecc_count;
 
-	/* If 'ecc_count' zero or less then buffer is all 0xff or erased. */
-	flip = count_written_bits(dat, nfc->chip.ecc.size, ecc_count);
+	/* Read OOB without ECC unit enabled */
+	vf610_nfc_command(mtd, NAND_CMD_READOOB, 0, page);
+	vf610_nfc_read_buf(mtd, oob, mtd->oobsize);
 
-	/* ECC failed. */
-	if (flip > ecc_count && flip > (nfc->chip.ecc.strength / 2))
-		return -1;
+	/*
+	 * On an erased page, bit count (including OOB) should be zero or
+	 * at least less then half of the ECC strength.
+	 */
+	flips = count_written_bits(dat, nfc->chip.ecc.size, flips_threshold);
+	flips += count_written_bits(oob, mtd->oobsize, flips_threshold);
+
+	if (unlikely(flips > flips_threshold))
+		return -EINVAL;
 
 	/* Erased page. */
 	memset(dat, 0xff, nfc->chip.ecc.size);
-	return 0;
+	memset(oob, 0xff, mtd->oobsize);
+	return flips;
 }
 
-
 static int vf610_nfc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 				uint8_t *buf, int oob_required, int page)
 {
 	int eccsize = chip->ecc.size;
 	int stat;
-	uint8_t *p = buf;
-
-
-	vf610_nfc_read_buf(mtd, p, eccsize);
 
+	vf610_nfc_read_buf(mtd, buf, eccsize);
 	if (oob_required)
 		vf610_nfc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
 
-	stat = vf610_nfc_correct_data(mtd, p);
+	stat = vf610_nfc_correct_data(mtd, buf, chip->oob_poi, page);
 
-	if (stat < 0)
+	if (stat < 0) {
 		mtd->ecc_stats.failed++;
-	else
+		return 0;
+	} else {
 		mtd->ecc_stats.corrected += stat;
-
-	return 0;
+		return stat;
+	}
 }
 
 /*
@@ -591,10 +613,15 @@
 static int vf610_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 			       const uint8_t *buf, int oob_required)
 {
+	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+
 	vf610_nfc_write_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
 		vf610_nfc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
 
+	/* Always write whole page including OOB due to HW ECC */
+	nfc->write_sz = mtd->writesize + mtd->oobsize;
+
 	return 0;
 }
 
@@ -635,12 +662,6 @@
 	if (cfg.width == 16)
 		chip->options |= NAND_BUSWIDTH_16;
 
-	/* Use 8-bit mode during initialization */
-	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_16BIT);
-
-	/* Disable subpage writes as we do not provide ecc->hwctl */
-	chip->options |= NAND_NO_SUBPAGE_WRITE;
-
 	chip->dev_ready = vf610_nfc_dev_ready;
 	chip->cmdfunc = vf610_nfc_command;
 	chip->read_byte = vf610_nfc_read_byte;
@@ -649,30 +670,22 @@
 	chip->write_buf = vf610_nfc_write_buf;
 	chip->select_chip = vf610_nfc_select_chip;
 
-	/* Bad block options. */
-	if (cfg.flash_bbt)
-		chip->bbt_options = NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB |
-				    NAND_BBT_CREATE;
+	chip->options |= NAND_NO_SUBPAGE_WRITE;
+
+	chip->ecc.size = PAGE_2K;
 
 	/* Set configuration register. */
+	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_16BIT);
 	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_ADDR_AUTO_INCR_BIT);
 	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_BUFNO_AUTO_INCR_BIT);
 	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_BOOT_MODE_BIT);
 	vf610_nfc_clear(mtd, NFC_FLASH_CONFIG, CONFIG_DMA_REQ_BIT);
 	vf610_nfc_set(mtd, NFC_FLASH_CONFIG, CONFIG_FAST_FLASH_BIT);
 
-	/* Enable Idle IRQ */
-	vf610_nfc_set(mtd, NFC_IRQ_STATUS, IDLE_EN_BIT);
-
-	/* PAGE_CNT = 1 */
+	/* Disable virtual pages, only one elementary transfer unit */
 	vf610_nfc_set_field(mtd, NFC_FLASH_CONFIG, CONFIG_PAGE_CNT_MASK,
 			    CONFIG_PAGE_CNT_SHIFT, 1);
 
-	/* Set ECC_STATUS offset */
-	vf610_nfc_set_field(mtd, NFC_FLASH_CONFIG,
-			    CONFIG_ECC_SRAM_ADDR_MASK,
-			    CONFIG_ECC_SRAM_ADDR_SHIFT, ECC_SRAM_ADDR);
-
 	/* first scan to find the device and get the page size */
 	if (nand_scan_ident(mtd, CONFIG_SYS_MAX_NAND_DEVICE, NULL)) {
 		err = -ENXIO;
@@ -682,11 +695,14 @@
 	if (cfg.width == 16)
 		vf610_nfc_set(mtd, NFC_FLASH_CONFIG, CONFIG_16BIT);
 
-	chip->ecc.mode = NAND_ECC_SOFT; /* default */
+	/* Bad block options. */
+	if (cfg.flash_bbt)
+		chip->bbt_options = NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB |
+				    NAND_BBT_CREATE;
 
 	/* Single buffer only, max 256 OOB minus ECC status */
-	if (mtd->writesize + mtd->oobsize > PAGE_2K + 256 - 8) {
-		dev_err(nfc->dev, "Unsupported flash size\n");
+	if (mtd->writesize + mtd->oobsize > PAGE_2K + OOB_MAX - 8) {
+		dev_err(nfc->dev, "Unsupported flash page size\n");
 		err = -ENXIO;
 		goto error;
 	}
@@ -698,6 +714,13 @@
 			goto error;
 		}
 
+		if (chip->ecc.size != mtd->writesize) {
+			dev_err(nfc->dev, "ecc size: %d\n", chip->ecc.size);
+			dev_err(nfc->dev, "Step size needs to be page size\n");
+			err = -ENXIO;
+			goto error;
+		}
+
 		/* Current HW ECC layouts only use 64 bytes of OOB */
 		if (mtd->oobsize > 64)
 			mtd->oobsize = 64;
@@ -718,7 +741,13 @@
 		chip->ecc.bytes = 60;
 #endif
 
-		/* Enable ECC_STATUS */
+		/* Set ECC_STATUS offset */
+		vf610_nfc_set_field(mtd, NFC_FLASH_CONFIG,
+				    CONFIG_ECC_SRAM_ADDR_MASK,
+				    CONFIG_ECC_SRAM_ADDR_SHIFT,
+				    ECC_SRAM_ADDR >> 3);
+
+		/* Enable ECC status in SRAM */
 		vf610_nfc_set(mtd, NFC_FLASH_CONFIG, CONFIG_ECC_SRAM_REQ_BIT);
 	}
 
diff --git a/drivers/pci/pcie_imx.c b/drivers/pci/pcie_imx.c
index 1568f20..f1e189e 100644
--- a/drivers/pci/pcie_imx.c
+++ b/drivers/pci/pcie_imx.c
@@ -19,6 +19,7 @@
 #include <asm/io.h>
 #include <linux/sizes.h>
 #include <errno.h>
+#include <asm/arch/sys_proto.h>
 
 #define PCI_ACCESS_READ  0
 #define PCI_ACCESS_WRITE 1
@@ -430,6 +431,10 @@
 static int imx6_pcie_assert_core_reset(void)
 {
 	struct iomuxc *iomuxc_regs = (struct iomuxc *)IOMUXC_BASE_ADDR;
+
+	if (is_mx6dqp())
+		setbits_le32(&iomuxc_regs->gpr[1], IOMUXC_GPR1_PCIE_SW_RST);
+
 #if defined(CONFIG_MX6SX)
 	struct gpc *gpc_regs = (struct gpc *)GPC_BASE_ADDR;
 
@@ -536,6 +541,9 @@
 
 	enable_pcie_clock();
 
+	if (is_mx6dqp())
+		clrbits_le32(&iomuxc_regs->gpr[1], IOMUXC_GPR1_PCIE_SW_RST);
+
 	/*
 	 * Wait for the clock to settle a bit, when the clock are sourced
 	 * from the CPU, we need about 30 ms to settle.
diff --git a/include/configs/vf610twr.h b/include/configs/vf610twr.h
index 324ba8f..7f4260a 100644
--- a/include/configs/vf610twr.h
+++ b/include/configs/vf610twr.h
@@ -116,20 +116,37 @@
 
 #define CONFIG_BOOTDELAY		3
 
-#define CONFIG_LOADADDR			0x82000000
+#define CONFIG_SYS_LOAD_ADDR		0x82000000
 
 /* We boot from the gfxRAM area of the OCRAM. */
 #define CONFIG_SYS_TEXT_BASE		0x3f408000
 #define CONFIG_BOARD_SIZE_LIMIT		524288
 
+/*
+ * We do have 128MB of memory on the Vybrid Tower board. Leave the last
+ * 16MB alone to avoid conflicts with Cortex-M4 firmwares running from
+ * DDR3. Hence, limit the memory range for image processing to 112MB
+ * using bootm_size. All of the following must be within this range.
+ * We have the default load at 32MB into DDR (for the kernel), FDT at
+ * 64MB and the ramdisk 512KB above that (allowing for hopefully never
+ * seen large trees). This allows a reasonable split between ramdisk
+ * and kernel size, where the ram disk can be a bit larger.
+ */
+#define MEM_LAYOUT_ENV_SETTINGS \
+	"bootm_size=0x07000000\0" \
+	"loadaddr=0x82000000\0" \
+	"kernel_addr_r=0x82000000\0" \
+	"fdt_addr=0x84000000\0" \
+	"fdt_addr_r=0x84000000\0" \
+	"rdaddr=0x84080000\0" \
+	"ramdisk_addr_r=0x84080000\0"
+
 #define CONFIG_EXTRA_ENV_SETTINGS \
+	MEM_LAYOUT_ENV_SETTINGS \
 	"script=boot.scr\0" \
 	"image=zImage\0" \
 	"console=ttyLP1\0" \
-	"fdt_high=0xffffffff\0" \
-	"initrd_high=0xffffffff\0" \
 	"fdt_file=vf610-twr.dtb\0" \
-	"fdt_addr=0x81000000\0" \
 	"boot_fdt=try\0" \
 	"ip_dyn=yes\0" \
 	"mmcdev=" __stringify(CONFIG_SYS_MMC_ENV_DEV) "\0" \
@@ -224,8 +241,6 @@
 #define CONFIG_SYS_MEMTEST_START	0x80010000
 #define CONFIG_SYS_MEMTEST_END		0x87C00000
 
-#define CONFIG_SYS_LOAD_ADDR		CONFIG_LOADADDR
-
 /*
  * Stack sizes
  * The stack sizes are set up in start.S using the settings below
diff --git a/tools/imximage.c b/tools/imximage.c
index 0da48a7..7c21922 100644
--- a/tools/imximage.c
+++ b/tools/imximage.c
@@ -160,54 +160,80 @@
 	}
 }
 
+static struct dcd_v2_cmd *gd_last_cmd;
+
 static void set_dcd_param_v2(struct imx_header *imxhdr, uint32_t dcd_len,
 		int32_t cmd)
 {
 	dcd_v2_t *dcd_v2 = &imxhdr->header.hdr_v2.dcd_table;
+	struct dcd_v2_cmd *d = gd_last_cmd;
+	struct dcd_v2_cmd *d2;
+	int len;
+
+	if (!d)
+		d = &dcd_v2->dcd_cmd;
+	d2 = d;
+	len = be16_to_cpu(d->write_dcd_command.length);
+	if (len > 4)
+		d2 = (struct dcd_v2_cmd *)(((char *)d) + len);
 
 	switch (cmd) {
 	case CMD_WRITE_DATA:
-		dcd_v2->write_dcd_command.tag = DCD_WRITE_DATA_COMMAND_TAG;
-		dcd_v2->write_dcd_command.length = cpu_to_be16(
-				dcd_len * sizeof(dcd_addr_data_t) + 4);
-		dcd_v2->write_dcd_command.param = DCD_WRITE_DATA_PARAM;
+		if ((d->write_dcd_command.tag == DCD_WRITE_DATA_COMMAND_TAG) &&
+		    (d->write_dcd_command.param == DCD_WRITE_DATA_PARAM))
+			break;
+		d = d2;
+		d->write_dcd_command.tag = DCD_WRITE_DATA_COMMAND_TAG;
+		d->write_dcd_command.length = cpu_to_be16(4);
+		d->write_dcd_command.param = DCD_WRITE_DATA_PARAM;
 		break;
 	case CMD_WRITE_CLR_BIT:
-		dcd_v2->write_dcd_command.tag = DCD_WRITE_DATA_COMMAND_TAG;
-		dcd_v2->write_dcd_command.length = cpu_to_be16(
-				dcd_len * sizeof(dcd_addr_data_t) + 4);
-		dcd_v2->write_dcd_command.param = DCD_WRITE_CLR_BIT_PARAM;
+		if ((d->write_dcd_command.tag == DCD_WRITE_DATA_COMMAND_TAG) &&
+		    (d->write_dcd_command.param == DCD_WRITE_CLR_BIT_PARAM))
+			break;
+		d = d2;
+		d->write_dcd_command.tag = DCD_WRITE_DATA_COMMAND_TAG;
+		d->write_dcd_command.length = cpu_to_be16(4);
+		d->write_dcd_command.param = DCD_WRITE_CLR_BIT_PARAM;
 		break;
 	/*
 	 * Check data command only supports one entry,
-	 * so use 0xC = size(address + value + command).
 	 */
 	case CMD_CHECK_BITS_SET:
-		dcd_v2->write_dcd_command.tag = DCD_CHECK_DATA_COMMAND_TAG;
-		dcd_v2->write_dcd_command.length = cpu_to_be16(0xC);
-		dcd_v2->write_dcd_command.param = DCD_CHECK_BITS_SET_PARAM;
+		d = d2;
+		d->write_dcd_command.tag = DCD_CHECK_DATA_COMMAND_TAG;
+		d->write_dcd_command.length = cpu_to_be16(4);
+		d->write_dcd_command.param = DCD_CHECK_BITS_SET_PARAM;
 		break;
 	case CMD_CHECK_BITS_CLR:
-		dcd_v2->write_dcd_command.tag = DCD_CHECK_DATA_COMMAND_TAG;
-		dcd_v2->write_dcd_command.length = cpu_to_be16(0xC);
-		dcd_v2->write_dcd_command.param = DCD_CHECK_BITS_SET_PARAM;
+		d = d2;
+		d->write_dcd_command.tag = DCD_CHECK_DATA_COMMAND_TAG;
+		d->write_dcd_command.length = cpu_to_be16(4);
+		d->write_dcd_command.param = DCD_CHECK_BITS_SET_PARAM;
 		break;
 	default:
 		break;
 	}
+	gd_last_cmd = d;
 }
 
 static void set_dcd_val_v2(struct imx_header *imxhdr, char *name, int lineno,
 					int fld, uint32_t value, uint32_t off)
 {
-	dcd_v2_t *dcd_v2 = &imxhdr->header.hdr_v2.dcd_table;
+	struct dcd_v2_cmd *d = gd_last_cmd;
+	int len;
+
+	len = be16_to_cpu(d->write_dcd_command.length);
+	off = (len - 4) >> 3;
 
 	switch (fld) {
 	case CFG_REG_ADDRESS:
-		dcd_v2->addr_data[off].addr = cpu_to_be32(value);
+		d->addr_data[off].addr = cpu_to_be32(value);
 		break;
 	case CFG_REG_VALUE:
-		dcd_v2->addr_data[off].value = cpu_to_be32(value);
+		d->addr_data[off].value = cpu_to_be32(value);
+		off++;
+		d->write_dcd_command.length = cpu_to_be16((off << 3) + 4);
 		break;
 	default:
 		break;
@@ -236,12 +262,20 @@
 						char *name, int lineno)
 {
 	dcd_v2_t *dcd_v2 = &imxhdr->header.hdr_v2.dcd_table;
+	struct dcd_v2_cmd *d = gd_last_cmd;
+	int len;
+
+	if (!d)
+		d = &dcd_v2->dcd_cmd;
+	len = be16_to_cpu(d->write_dcd_command.length);
+	if (len > 4)
+		d = (struct dcd_v2_cmd *)(((char *)d) + len);
+
+	len = (char *)d - (char *)&dcd_v2->header;
 
 	dcd_v2->header.tag = DCD_HEADER_TAG;
-	dcd_v2->header.length = cpu_to_be16(
-			dcd_len * sizeof(dcd_addr_data_t) + 8);
+	dcd_v2->header.length = cpu_to_be16(len);
 	dcd_v2->header.version = DCD_VERSION;
-	set_dcd_param_v2(imxhdr, dcd_len, CMD_WRITE_DATA);
 }
 
 static void set_imx_hdr_v1(struct imx_header *imxhdr, uint32_t dcd_len,
@@ -314,6 +348,7 @@
 		max_dcd_entries = MAX_HW_CFG_SIZE_V1;
 		break;
 	case IMXIMAGE_V2:
+		gd_last_cmd = NULL;
 		set_dcd_val = set_dcd_val_v2;
 		set_dcd_param = set_dcd_param_v2;
 		set_dcd_rst = set_dcd_rst_v2;
@@ -361,8 +396,8 @@
 	dcd_v2_t *dcd_v2 = &hdr_v2->dcd_table;
 	uint32_t size, version;
 
-	size = be16_to_cpu(dcd_v2->header.length) - 8;
-	if (size > (MAX_HW_CFG_SIZE_V2 * sizeof(dcd_addr_data_t))) {
+	size = be16_to_cpu(dcd_v2->header.length);
+	if (size > (MAX_HW_CFG_SIZE_V2 * sizeof(dcd_addr_data_t)) + 8) {
 		fprintf(stderr,
 			"Error: Image corrupt DCD size %d exceed maximum %d\n",
 			(uint32_t)(size / sizeof(dcd_addr_data_t)),
diff --git a/tools/imximage.h b/tools/imximage.h
index d41c74f..c7b9b5c 100644
--- a/tools/imximage.h
+++ b/tools/imximage.h
@@ -133,10 +133,14 @@
 	uint8_t param;
 } __attribute__((packed)) write_dcd_command_t;
 
-typedef struct {
-	ivt_header_t header;
+struct dcd_v2_cmd {
 	write_dcd_command_t write_dcd_command;
 	dcd_addr_data_t addr_data[MAX_HW_CFG_SIZE_V2];
+};
+
+typedef struct {
+	ivt_header_t header;
+	struct dcd_v2_cmd dcd_cmd;
 	uint32_t padding[1]; /* end up on an 8-byte boundary */
 } dcd_v2_t;