Merge branch 'master' of git://git.denx.de/u-boot-samsung
diff --git a/Makefile b/Makefile
index ad51e60..9919314 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2015
 PATCHLEVEL = 10
 SUBLEVEL =
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME =
 
 # *DOCUMENTATION*
diff --git a/arch/arm/cpu/arm926ejs/armada100/dram.c b/arch/arm/cpu/arm926ejs/armada100/dram.c
index 8d7c71f..f3b9a66 100644
--- a/arch/arm/cpu/arm926ejs/armada100/dram.c
+++ b/arch/arm/cpu/arm926ejs/armada100/dram.c
@@ -72,7 +72,6 @@
 	}
 }
 
-#ifndef CONFIG_SYS_BOARD_DRAM_INIT
 int dram_init(void)
 {
 	int i;
@@ -113,4 +112,3 @@
 {
 	dram_init();
 }
-#endif /* CONFIG_SYS_BOARD_DRAM_INIT */
diff --git a/arch/arm/cpu/arm926ejs/lpc32xx/cpu.c b/arch/arm/cpu/arm926ejs/lpc32xx/cpu.c
index f757474..bee9318 100644
--- a/arch/arm/cpu/arm926ejs/lpc32xx/cpu.c
+++ b/arch/arm/cpu/arm926ejs/lpc32xx/cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 by Vladimir Zapolskiy <vz@mleia.com>
+ * Copyright (C) 2011-2015 by Vladimir Zapolskiy <vz@mleia.com>
  *
  * SPDX-License-Identifier:	GPL-2.0+
  */
@@ -20,12 +20,23 @@
 	/* Enable watchdog clock */
 	setbits_le32(&clk->timclk_ctrl, CLK_TIMCLK_WATCHDOG);
 
-	/* Reset pulse length is 13005 peripheral clock frames */
-	writel(13000, &wdt->pulse);
+	/* To be compatible with the original U-Boot code:
+	 * addr: - 0: perform hard reset.
+	 *       - !=0: perform a soft reset; i.e. "RESOUT_N" not asserted). */
+	if (addr == 0) {
+		/* Reset pulse length is 13005 peripheral clock frames */
+		writel(13000, &wdt->pulse);
+
+		/* Force WDOG_RESET2 and RESOUT_N signal active */
+		writel(WDTIM_MCTRL_RESFRC2 | WDTIM_MCTRL_RESFRC1
+		       | WDTIM_MCTRL_M_RES2, &wdt->mctrl);
+	} else {
+		/* Force match output active */
+		writel(0x01, &wdt->emr);
 
-	/* Force WDOG_RESET2 and RESOUT_N signal active */
-	writel(WDTIM_MCTRL_RESFRC2 | WDTIM_MCTRL_RESFRC1 | WDTIM_MCTRL_M_RES2,
-	       &wdt->mctrl);
+		/* Internal reset on match output (no pulse on "RESOUT_N") */
+		writel(WDTIM_MCTRL_M_RES1, &wdt->mctrl);
+	}
 
 	while (1)
 		/* NOP */;
diff --git a/arch/arm/include/asm/arch-lpc32xx/cpu.h b/arch/arm/include/asm/arch-lpc32xx/cpu.h
index 0b5dca1..0de894b 100644
--- a/arch/arm/include/asm/arch-lpc32xx/cpu.h
+++ b/arch/arm/include/asm/arch-lpc32xx/cpu.h
@@ -27,7 +27,7 @@
 #define HS_UART7_BASE	0x4001C000	/* High speed UART 7 registers base */
 #define RTC_BASE	0x40024000	/* RTC registers base               */
 #define GPIO_BASE	0x40028000	/* GPIO registers base              */
-#define MUX_BASE	0x40028100	/* MUX registers base               */
+#define MUX_BASE	0x40028000	/* MUX registers base               */
 #define WDT_BASE	0x4003C000	/* Watchdog timer registers base    */
 #define TIMER0_BASE	0x40044000	/* Timer0 registers base            */
 #define TIMER1_BASE	0x4004C000	/* Timer1 registers base            */
diff --git a/arch/arm/include/asm/arch-lpc32xx/gpio_grp.h b/arch/arm/include/asm/arch-lpc32xx/gpio_grp.h
new file mode 100644
index 0000000..c9cf9df
--- /dev/null
+++ b/arch/arm/include/asm/arch-lpc32xx/gpio_grp.h
@@ -0,0 +1,40 @@
+/*
+ * LPC32xx GPIO interface macro for pin mapping.
+ *
+ * (C) Copyright 2015  DENX Software Engineering GmbH
+ * Written-by: Sylvain Lemieux <slemieux@@tycoint.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _LPC32XX_GPIO_GRP_H
+#define _LPC32XX_GPIO_GRP_H
+
+/*
+ * Macro to map the pin for the lpc32xx_gpio driver.
+ * Note: - GPIOS are considered here as homogeneous and linear, from 0 to 127;
+ *         mapping is done per register, as group of 32.
+ *         (see drivers/gpio/lpc32xx_gpio.c for details).
+ *       - macros can be use with the following pins:
+ *         P0.0 - P0.7
+ *         P1.0 - P1.23
+ *         P2.0 - P2.12
+ *         P3 GPI_0 - GPI_9 / GPI_15 - GPI_23 / GPI_25 / GPI_27 - GPI_28
+ *         P3 GPO_0 - GPO_23
+ *         P3 GPIO_0 - GPIO_5 (output register only)
+ */
+#define LPC32XX_GPIO_P0_GRP 0
+#define LPC32XX_GPIO_P1_GRP 32
+#define LPC32XX_GPIO_P2_GRP 64
+#define LPC32XX_GPI_P3_GRP  96
+#define LPC32XX_GPO_P3_GRP  96
+#define LPC32XX_GPIO_P3_GRP (LPC32XX_GPO_P3_GRP + 25)
+
+/*
+ * A specific GPIO can be selected with this macro
+ * ie, GPIO P0.1 can be selected with LPC32XX_GPIO(LPC32XX_GPIO_P0_GRP, 1)
+ * See the LPC32x0 User's guide for GPIO group numbers
+ */
+#define LPC32XX_GPIO(x, y) ((x) + (y))
+
+#endif /* _LPC32XX_GPIO_GRP_H */
diff --git a/arch/arm/include/asm/arch-lpc32xx/mux.h b/arch/arm/include/asm/arch-lpc32xx/mux.h
index dc1b5bc..665ea3f 100644
--- a/arch/arm/include/asm/arch-lpc32xx/mux.h
+++ b/arch/arm/include/asm/arch-lpc32xx/mux.h
@@ -12,7 +12,24 @@
  */
 
 struct mux_regs {
+	u32 reserved1[10];
+	u32 p2_mux_set;
+	u32 p2_mux_clr;
+	u32 p2_mux_state;
+	u32 reserved2[51];
 	u32 p_mux_set;
 	u32 p_mux_clr;
 	u32 p_mux_state;
+	u32 reserved3;
+	u32 p3_mux_set;
+	u32 p3_mux_clr;
+	u32 p3_mux_state;
+	u32 reserved4;
+	u32 p0_mux_set;
+	u32 p0_mux_clr;
+	u32 p0_mux_state;
+	u32 reserved5;
+	u32 p1_mux_set;
+	u32 p1_mux_clr;
+	u32 p1_mux_state;
 };
diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile
index 446ce04..21c56a4 100644
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -14,6 +14,10 @@
 
 obj-y	= cpu.o
 obj-y	+= dram.o
+ifndef CONFIG_SPL_BUILD
+obj-$(CONFIG_SYS_MVEBU_DDR_A38X) += ../../../drivers/ddr/marvell/a38x/xor.o
+obj-$(CONFIG_SYS_MVEBU_DDR_AXP) += ../../../drivers/ddr/marvell/axp/xor.o
+endif
 obj-y	+= gpio.o
 obj-y	+= mbus.o
 obj-y	+= timer.o
diff --git a/arch/arm/mach-mvebu/cpu.c b/arch/arm/mach-mvebu/cpu.c
index 9496d5f..ea83e21 100644
--- a/arch/arm/mach-mvebu/cpu.c
+++ b/arch/arm/mach-mvebu/cpu.c
@@ -18,19 +18,13 @@
 #define DDR_SIZE_CS_OFF(n)	(0x0004 + ((n) << 3))
 
 static struct mbus_win windows[] = {
-	/* PCIE MEM address space */
-	{ DEFADR_PCI_MEM, 256 << 20, CPU_TARGET_PCIE13, CPU_ATTR_PCIE_MEM },
-
-	/* PCIE IO address space */
-	{ DEFADR_PCI_IO, 64 << 10, CPU_TARGET_PCIE13, CPU_ATTR_PCIE_IO },
-
 	/* SPI */
-	{ DEFADR_SPIF, 8 << 20, CPU_TARGET_DEVICEBUS_BOOTROM_SPI,
-	  CPU_ATTR_SPIFLASH },
+	{ MBUS_SPI_BASE, MBUS_SPI_SIZE,
+	  CPU_TARGET_DEVICEBUS_BOOTROM_SPI, CPU_ATTR_SPIFLASH },
 
 	/* NOR */
-	{ DEFADR_BOOTROM, 8 << 20, CPU_TARGET_DEVICEBUS_BOOTROM_SPI,
-	  CPU_ATTR_BOOTROM },
+	{ MBUS_BOOTROM_BASE, MBUS_BOOTROM_SIZE,
+	  CPU_TARGET_DEVICEBUS_BOOTROM_SPI, CPU_ATTR_BOOTROM },
 };
 
 void reset_cpu(unsigned long ignored)
@@ -177,17 +171,69 @@
 	asm("mcr p15, 4, %0, c15, c0" : : "r" (addr));
 }
 
+#define MV_USB_PHY_BASE			(MVEBU_AXP_USB_BASE + 0x800)
+#define MV_USB_PHY_PLL_REG(reg)		(MV_USB_PHY_BASE | (((reg) & 0xF) << 2))
+#define MV_USB_X3_BASE(addr)		(MVEBU_AXP_USB_BASE | BIT(11) | \
+					 (((addr) & 0xF) << 6))
+#define MV_USB_X3_PHY_CHANNEL(dev, reg)	(MV_USB_X3_BASE((dev) + 1) |	\
+					 (((reg) & 0xF) << 2))
 
-int arch_cpu_init(void)
+static void setup_usb_phys(void)
 {
-#ifndef CONFIG_SPL_BUILD
+	int dev;
+
 	/*
-	 * Only with disabled MMU its possible to switch the base
-	 * register address on Armada 38x. Without this the SDRAM
-	 * located at >= 0x4000.0000 is also not accessible, as its
-	 * still locked to cache.
+	 * USB PLL init
 	 */
-	mmu_disable();
+
+	/* Setup PLL frequency */
+	/* USB REF frequency = 25 MHz */
+	clrsetbits_le32(MV_USB_PHY_PLL_REG(1), 0x3ff, 0x605);
+
+	/* Power up PLL and PHY channel */
+	clrsetbits_le32(MV_USB_PHY_PLL_REG(2), 0, BIT(9));
+
+	/* Assert VCOCAL_START */
+	clrsetbits_le32(MV_USB_PHY_PLL_REG(1), 0, BIT(21));
+
+	mdelay(1);
+
+	/*
+	 * USB PHY init (change from defaults) specific for 40nm (78X30 78X60)
+	 */
+
+	for (dev = 0; dev < 3; dev++) {
+		clrsetbits_le32(MV_USB_X3_PHY_CHANNEL(dev, 3), 0, BIT(15));
+
+		/* Assert REG_RCAL_START in channel REG 1 */
+		clrsetbits_le32(MV_USB_X3_PHY_CHANNEL(dev, 1), 0, BIT(12));
+		udelay(40);
+		clrsetbits_le32(MV_USB_X3_PHY_CHANNEL(dev, 1), BIT(12), 0);
+	}
+}
+
+int arch_cpu_init(void)
+{
+#ifndef CONFIG_SPL_BUILD
+	if (mvebu_soc_family() == MVEBU_SOC_A38X) {
+		struct pl310_regs *const pl310 =
+			(struct pl310_regs *)CONFIG_SYS_PL310_BASE;
+
+		/*
+		 * Only with disabled MMU its possible to switch the base
+		 * register address on Armada 38x. Without this the SDRAM
+		 * located at >= 0x4000.0000 is also not accessible, as its
+		 * still locked to cache.
+		 *
+		 * So to fully release / unlock this area from cache, we need
+		 * to first flush all caches, then disable the MMU and
+		 * disable the L2 cache.
+		 */
+		icache_disable();
+		dcache_disable();
+		mmu_disable();
+		clrbits_le32(&pl310->pl310_ctrl, L2X0_CTRL_EN);
+	}
 #endif
 
 	/* Linux expects the internal registers to be at 0xf1000000 */
@@ -227,10 +273,33 @@
 	 */
 	mvebu_mbus_probe(windows, ARRAY_SIZE(windows));
 
+	if (mvebu_soc_family() == MVEBU_SOC_AXP) {
+		/* Enable GBE0, GBE1, LCD and NFC PUP */
+		clrsetbits_le32(ARMADA_XP_PUP_ENABLE, 0,
+				GE0_PUP_EN | GE1_PUP_EN | LCD_PUP_EN |
+				NAND_PUP_EN | SPI_PUP_EN);
+
+		/* Configure USB PLL and PHYs on AXP */
+		setup_usb_phys();
+	}
+
+	/* Enable NAND and NAND arbiter */
+	clrsetbits_le32(MVEBU_SOC_DEV_MUX_REG, 0, NAND_EN | NAND_ARBITER_EN);
+
+	/* Disable MBUS error propagation */
+	clrsetbits_le32(SOC_COHERENCY_FABRIC_CTRL_REG, MBUS_ERR_PROP_EN, 0);
+
 	return 0;
 }
 #endif /* CONFIG_ARCH_CPU_INIT */
 
+u32 mvebu_get_nand_clock(void)
+{
+	return CONFIG_SYS_MVEBU_PLL_CLOCK /
+		((readl(MVEBU_CORE_DIV_CLK_CTRL(1)) &
+		  NAND_ECC_DIVCKL_RATIO_MASK) >> NAND_ECC_DIVCKL_RATIO_OFFS);
+}
+
 /*
  * SOC specific misc init
  */
diff --git a/arch/arm/mach-mvebu/dram.c b/arch/arm/mach-mvebu/dram.c
index db18791..ddc5b7e 100644
--- a/arch/arm/mach-mvebu/dram.c
+++ b/arch/arm/mach-mvebu/dram.c
@@ -12,6 +12,15 @@
 #include <asm/arch/cpu.h>
 #include <asm/arch/soc.h>
 
+#ifdef CONFIG_SYS_MVEBU_DDR_A38X
+#include "../../../drivers/ddr/marvell/axp/xor.h"
+#include "../../../drivers/ddr/marvell/axp/xor_regs.h"
+#endif
+#ifdef CONFIG_SYS_MVEBU_DDR_AXP
+#include "../../../drivers/ddr/marvell/axp/xor.h"
+#include "../../../drivers/ddr/marvell/axp/xor_regs.h"
+#endif
+
 DECLARE_GLOBAL_DATA_PTR;
 
 struct sdram_bank {
@@ -28,6 +37,16 @@
 #define REG_CPUCS_WIN_WIN0_CS(x)	(((x) & 0x3) << 2)
 #define REG_CPUCS_WIN_SIZE(x)		(((x) & 0xff) << 24)
 
+#define SDRAM_SIZE_MAX			0xc0000000
+
+#define SCRUB_MAGIC		0xbeefdead
+
+#define SCRB_XOR_UNIT		0
+#define SCRB_XOR_CHAN		1
+#define SCRB_XOR_WIN		0
+
+#define XEBARX_BASE_OFFS	16
+
 /*
  * mvebu_sdram_bar - reads SDRAM Base Address Register
  */
@@ -93,32 +112,146 @@
 	mvebu_sdram_bs_set(bank, size);
 }
 
+#if defined(CONFIG_SYS_MVEBU_DDR_A38X) || defined(CONFIG_SYS_MVEBU_DDR_AXP)
+static u32 xor_ctrl_save;
+static u32 xor_base_save;
+static u32 xor_mask_save;
+
+static void mv_xor_init2(u32 cs)
+{
+	u32 reg, base, size, base2;
+	u32 bank_attr[4] = { 0xe00, 0xd00, 0xb00, 0x700 };
+
+	xor_ctrl_save = reg_read(XOR_WINDOW_CTRL_REG(SCRB_XOR_UNIT,
+						     SCRB_XOR_CHAN));
+	xor_base_save = reg_read(XOR_BASE_ADDR_REG(SCRB_XOR_UNIT,
+						   SCRB_XOR_WIN));
+	xor_mask_save = reg_read(XOR_SIZE_MASK_REG(SCRB_XOR_UNIT,
+						   SCRB_XOR_WIN));
+
+	/* Enable Window x for each CS */
+	reg = 0x1;
+	reg |= (0x3 << 16);
+	reg_write(XOR_WINDOW_CTRL_REG(SCRB_XOR_UNIT, SCRB_XOR_CHAN), reg);
+
+	base = 0;
+	size = mvebu_sdram_bs(cs) - 1;
+	if (size) {
+		base2 = ((base / (64 << 10)) << XEBARX_BASE_OFFS) |
+			bank_attr[cs];
+		reg_write(XOR_BASE_ADDR_REG(SCRB_XOR_UNIT, SCRB_XOR_WIN),
+			  base2);
+
+		base += size + 1;
+		size = (size / (64 << 10)) << 16;
+		/* Window x - size - 256 MB */
+		reg_write(XOR_SIZE_MASK_REG(SCRB_XOR_UNIT, SCRB_XOR_WIN), size);
+	}
+
+	mv_xor_hal_init(0);
+
+	return;
+}
+
+static void mv_xor_finish2(void)
+{
+	reg_write(XOR_WINDOW_CTRL_REG(SCRB_XOR_UNIT, SCRB_XOR_CHAN),
+		  xor_ctrl_save);
+	reg_write(XOR_BASE_ADDR_REG(SCRB_XOR_UNIT, SCRB_XOR_WIN),
+		  xor_base_save);
+	reg_write(XOR_SIZE_MASK_REG(SCRB_XOR_UNIT, SCRB_XOR_WIN),
+		  xor_mask_save);
+}
+
+static void dram_ecc_scrubbing(void)
+{
+	int cs;
+	u32 size, temp;
+	u32 total_mem = 0;
+	u64 total;
+	u32 start_addr;
+
+	/*
+	 * The DDR training code from the bin_hdr / SPL already
+	 * scrubbed the DDR till 0x1000000. And the main U-Boot
+	 * is loaded to an address < 0x1000000. So we need to
+	 * skip this range to not re-scrub this area again.
+	 */
+	temp = reg_read(REG_SDRAM_CONFIG_ADDR);
+	temp |= (1 << REG_SDRAM_CONFIG_IERR_OFFS);
+	reg_write(REG_SDRAM_CONFIG_ADDR, temp);
+
+	for (cs = 0; cs < CONFIG_NR_DRAM_BANKS; cs++) {
+		size = mvebu_sdram_bs(cs) - 1;
+		if (size == 0)
+			continue;
+
+		total = (u64)size + 1;
+		total_mem += (u32)(total / (1 << 30));
+		start_addr = 0;
+		mv_xor_init2(cs);
+
+		/* Skip first 16 MiB */
+		if (0 == cs) {
+			start_addr = 0x1000000;
+			size -= start_addr;
+		}
+
+		mv_xor_mem_init(SCRB_XOR_CHAN, start_addr, size,
+				SCRUB_MAGIC, SCRUB_MAGIC);
+
+		/* Wait for previous transfer completion */
+		while (mv_xor_state_get(SCRB_XOR_CHAN) != MV_IDLE)
+			;
+
+		mv_xor_finish2();
+	}
+
+	temp = reg_read(REG_SDRAM_CONFIG_ADDR);
+	temp &= ~(1 << REG_SDRAM_CONFIG_IERR_OFFS);
+	reg_write(REG_SDRAM_CONFIG_ADDR, temp);
+}
+
+static int ecc_enabled(void)
+{
+	if (reg_read(REG_SDRAM_CONFIG_ADDR) & (1 << REG_SDRAM_CONFIG_ECC_OFFS))
+		return 1;
+
-#ifndef CONFIG_SYS_BOARD_DRAM_INIT
+	return 0;
+}
+#else
+static void dram_ecc_scrubbing(void)
+{
+}
+
+static int ecc_enabled(void)
+{
+	return 0;
+}
+#endif
+
 int dram_init(void)
 {
+	u64 size = 0;
 	int i;
 
-	gd->ram_size = 0;
 	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
-		gd->bd->bi_dram[i].start = mvebu_sdram_bar(i);
-		gd->bd->bi_dram[i].size = mvebu_sdram_bs(i);
 		/*
 		 * It is assumed that all memory banks are consecutive
 		 * and without gaps.
 		 * If the gap is found, ram_size will be reported for
 		 * consecutive memory only
 		 */
-		if (gd->bd->bi_dram[i].start != gd->ram_size)
+		if (mvebu_sdram_bar(i) != size)
 			break;
 
 		/*
 		 * Don't report more than 3GiB of SDRAM, otherwise there is no
 		 * address space left for the internal registers etc.
 		 */
-		if ((gd->ram_size + gd->bd->bi_dram[i].size != 0) &&
-		    (gd->ram_size + gd->bd->bi_dram[i].size <= (3 << 30)))
-			gd->ram_size += gd->bd->bi_dram[i].size;
-
+		size += mvebu_sdram_bs(i);
+		if (size > SDRAM_SIZE_MAX)
+			size = SDRAM_SIZE_MAX;
 	}
 
 	for (; i < CONFIG_NR_DRAM_BANKS; i++) {
@@ -130,6 +263,12 @@
 		gd->bd->bi_dram[i].size = 0;
 	}
 
+
+	if (ecc_enabled())
+		dram_ecc_scrubbing();
+
+	gd->ram_size = size;
+
 	return 0;
 }
 
@@ -139,6 +278,25 @@
  */
 void dram_init_banksize(void)
 {
+	u64 size = 0;
+	int i;
+
+	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+		gd->bd->bi_dram[i].start = mvebu_sdram_bar(i);
+		gd->bd->bi_dram[i].size = mvebu_sdram_bs(i);
+
+		/* Clip the banksize to 1GiB if it exceeds the max size */
+		size += gd->bd->bi_dram[i].size;
+		if (size > SDRAM_SIZE_MAX)
+			mvebu_sdram_bs_set(i, 0x40000000);
+	}
+}
+
+void board_add_ram_info(int use_default)
+{
-	dram_init();
+	if (ecc_enabled())
+		printf(" (ECC");
+	else
+		printf(" (ECC not");
+	printf(" enabled)");
 }
-#endif /* CONFIG_SYS_BOARD_DRAM_INIT */
diff --git a/arch/arm/mach-mvebu/include/mach/cpu.h b/arch/arm/mach-mvebu/include/mach/cpu.h
index 8bcdef6..6fa4173 100644
--- a/arch/arm/mach-mvebu/include/mach/cpu.h
+++ b/arch/arm/mach-mvebu/include/mach/cpu.h
@@ -65,10 +65,14 @@
 /*
  * Default Device Address MAP BAR values
  */
-#define DEFADR_PCI_MEM		0x90000000
-#define DEFADR_PCI_IO		0xC0000000
-#define DEFADR_SPIF		0xF4000000
-#define DEFADR_BOOTROM		0xF8000000
+#define MBUS_PCI_MEM_BASE	0xE8000000
+#define MBUS_PCI_MEM_SIZE	(128 << 20)
+#define MBUS_PCI_IO_BASE	0xF1100000
+#define MBUS_PCI_IO_SIZE	(64 << 10)
+#define MBUS_SPI_BASE		0xF4000000
+#define MBUS_SPI_SIZE		(8 << 20)
+#define MBUS_BOOTROM_BASE	0xF8000000
+#define MBUS_BOOTROM_SIZE	(8 << 20)
 
 struct mbus_win {
 	u32 base;
@@ -113,6 +117,7 @@
 void mvebu_sdram_size_adjust(enum memory_bank bank);
 int mvebu_mbus_probe(struct mbus_win windows[], int count);
 int mvebu_soc_family(void);
+u32 mvebu_get_nand_clock(void);
 
 int mv_sdh_init(unsigned long regbase, u32 max_clk, u32 min_clk, u32 quirks);
 
diff --git a/arch/arm/mach-mvebu/include/mach/soc.h b/arch/arm/mach-mvebu/include/mach/soc.h
index 125b5f2..a8a6b27 100644
--- a/arch/arm/mach-mvebu/include/mach/soc.h
+++ b/arch/arm/mach-mvebu/include/mach/soc.h
@@ -11,6 +11,8 @@
 #ifndef _MVEBU_SOC_H
 #define _MVEBU_SOC_H
 
+#define BIT(x)			(1 << (x))
+
 #define SOC_MV78460_ID		0x7846
 #define SOC_88F6810_ID		0x6810
 #define SOC_88F6820_ID		0x6820
@@ -25,6 +27,9 @@
 #define CONFIG_SYS_TCLK		250000000	/* 250MHz */
 #endif
 
+/* Armada XP PLL frequency (used for NAND clock generation) */
+#define CONFIG_SYS_MVEBU_PLL_CLOCK	2000000000
+
 /* SOC specific definations */
 #define INTREG_BASE		0xd0000000
 #define INTREG_BASE_ADDR_REG	(INTREG_BASE + 0x20080)
@@ -53,19 +58,43 @@
 #define MVEBU_GPIO1_BASE	(MVEBU_REGISTER(0x18140))
 #define MVEBU_GPIO2_BASE	(MVEBU_REGISTER(0x18180))
 #define MVEBU_SYSTEM_REG_BASE	(MVEBU_REGISTER(0x18200))
+#define MVEBU_CLOCK_BASE	(MVEBU_REGISTER(0x18700))
 #define MVEBU_CPU_WIN_BASE	(MVEBU_REGISTER(0x20000))
 #define MVEBU_SDRAM_BASE	(MVEBU_REGISTER(0x20180))
 #define MVEBU_TIMER_BASE	(MVEBU_REGISTER(0x20300))
 #define MVEBU_EGIGA2_BASE	(MVEBU_REGISTER(0x30000))
 #define MVEBU_EGIGA3_BASE	(MVEBU_REGISTER(0x34000))
 #define MVEBU_REG_PCIE_BASE	(MVEBU_REGISTER(0x40000))
+#define MVEBU_AXP_USB_BASE      (MVEBU_REGISTER(0x50000))
 #define MVEBU_USB20_BASE	(MVEBU_REGISTER(0x58000))
 #define MVEBU_EGIGA0_BASE	(MVEBU_REGISTER(0x70000))
 #define MVEBU_EGIGA1_BASE	(MVEBU_REGISTER(0x74000))
 #define MVEBU_AXP_SATA_BASE	(MVEBU_REGISTER(0xa0000))
 #define MVEBU_SATA0_BASE	(MVEBU_REGISTER(0xa8000))
+#define MVEBU_NAND_BASE		(MVEBU_REGISTER(0xd0000))
 #define MVEBU_SDIO_BASE		(MVEBU_REGISTER(0xd8000))
 
+#define SOC_COHERENCY_FABRIC_CTRL_REG	(MVEBU_REGISTER(0x20200))
+#define MBUS_ERR_PROP_EN	(1 << 8)
+
+#define MBUS_BRIDGE_WIN_CTRL_REG (MVEBU_REGISTER(0x20250))
+#define MBUS_BRIDGE_WIN_BASE_REG (MVEBU_REGISTER(0x20254))
+
+#define MVEBU_SOC_DEV_MUX_REG	(MVEBU_SYSTEM_REG_BASE + 0x08)
+#define NAND_EN			BIT(0)
+#define NAND_ARBITER_EN		BIT(27)
+
+#define ARMADA_XP_PUP_ENABLE	(MVEBU_SYSTEM_REG_BASE + 0x44c)
+#define GE0_PUP_EN		BIT(0)
+#define GE1_PUP_EN		BIT(1)
+#define LCD_PUP_EN		BIT(2)
+#define NAND_PUP_EN		BIT(4)
+#define SPI_PUP_EN		BIT(5)
+
+#define MVEBU_CORE_DIV_CLK_CTRL(i)	(MVEBU_CLOCK_BASE + ((i) * 0x8))
+#define NAND_ECC_DIVCKL_RATIO_OFFS	8
+#define NAND_ECC_DIVCKL_RATIO_MASK	(0x3F << NAND_ECC_DIVCKL_RATIO_OFFS)
+
 #define SDRAM_MAX_CS		4
 #define SDRAM_ADDR_MASK		0xFF000000
 
diff --git a/arch/arm/mach-mvebu/mbus.c b/arch/arm/mach-mvebu/mbus.c
index 9b76bce..771cce6 100644
--- a/arch/arm/mach-mvebu/mbus.c
+++ b/arch/arm/mach-mvebu/mbus.c
@@ -52,10 +52,9 @@
 #include <asm/io.h>
 #include <asm/arch/cpu.h>
 #include <asm/arch/soc.h>
+#include <linux/compat.h>
 #include <linux/mbus.h>
 
-#define BIT(nr)			(1UL << (nr))
-
 /* DDR target is the same on all platforms */
 #define TARGET_DDR		0
 
@@ -407,6 +406,53 @@
 	return 0;
 }
 
+static void mvebu_mbus_get_lowest_base(struct mvebu_mbus_state *mbus,
+				       phys_addr_t *base)
+{
+	int win;
+	*base = 0xffffffff;
+
+	for (win = 0; win < mbus->soc->num_wins; win++) {
+		u64 wbase;
+		u32 wsize;
+		u8 wtarget, wattr;
+		int enabled;
+
+		mvebu_mbus_read_window(mbus, win,
+				       &enabled, &wbase, &wsize,
+				       &wtarget, &wattr, NULL);
+
+		if (!enabled)
+			continue;
+
+		if (wbase < *base)
+			*base = wbase;
+	}
+}
+
+static void mvebu_config_mbus_bridge(struct mvebu_mbus_state *mbus)
+{
+	phys_addr_t base;
+	u32 val;
+	u32 size;
+
+	/* Set MBUS bridge base/ctrl */
+	mvebu_mbus_get_lowest_base(&mbus_state, &base);
+
+	size = 0xffffffff - base + 1;
+	if (!is_power_of_2(size)) {
+		/* Round up to next power of 2 */
+		size = 1 << (ffs(base) + 1);
+		base = 0xffffffff - size + 1;
+	}
+
+	/* Now write base and size */
+	writel(base, MBUS_BRIDGE_WIN_BASE_REG);
+	/* Align window size to 64KiB */
+	val = (size / (64 << 10)) - 1;
+	writel((val << 16) | 0x1, MBUS_BRIDGE_WIN_CTRL_REG);
+}
+
 int mbus_dt_setup_win(struct mvebu_mbus_state *mbus,
 		      u32 base, u32 size, u8 target, u8 attr)
 {
@@ -426,6 +472,13 @@
 			return -ENOMEM;
 	}
 
+	/*
+	 * Re-configure the mbus bridge registers each time this function
+	 * is called. Since it may get called from the board code in
+	 * later boot stages as well.
+	 */
+	mvebu_config_mbus_bridge(mbus);
+
 	return 0;
 }
 
diff --git a/board/Marvell/db-88f6820-gp/db-88f6820-gp.c b/board/Marvell/db-88f6820-gp/db-88f6820-gp.c
index e661fa1..84ca55c 100644
--- a/board/Marvell/db-88f6820-gp/db-88f6820-gp.c
+++ b/board/Marvell/db-88f6820-gp/db-88f6820-gp.c
@@ -7,6 +7,7 @@
 #include <common.h>
 #include <i2c.h>
 #include <miiphy.h>
+#include <netdev.h>
 #include <asm/io.h>
 #include <asm/arch/cpu.h>
 #include <asm/arch/soc.h>
@@ -15,8 +16,6 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
-#define BIT(nr)				(1UL << (nr))
-
 #define ETH_PHY_CTRL_REG		0
 #define ETH_PHY_CTRL_POWER_DOWN_BIT	11
 #define ETH_PHY_CTRL_POWER_DOWN_MASK	(1 << ETH_PHY_CTRL_POWER_DOWN_BIT)
@@ -132,3 +131,9 @@
 
 	return 0;
 }
+
+int board_eth_init(bd_t *bis)
+{
+	cpu_eth_init(bis); /* Built in controller(s) come first */
+	return pci_eth_init(bis);
+}
diff --git a/board/Marvell/db-mv784mp-gp/db-mv784mp-gp.c b/board/Marvell/db-mv784mp-gp/db-mv784mp-gp.c
index 00ca878..d7aa149 100644
--- a/board/Marvell/db-mv784mp-gp/db-mv784mp-gp.c
+++ b/board/Marvell/db-mv784mp-gp/db-mv784mp-gp.c
@@ -6,14 +6,13 @@
 
 #include <common.h>
 #include <miiphy.h>
+#include <netdev.h>
 #include <asm/io.h>
 #include <asm/arch/cpu.h>
 #include <asm/arch/soc.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
-#define BIT(nr)				(1UL << (nr))
-
 #define ETH_PHY_CTRL_REG		0
 #define ETH_PHY_CTRL_POWER_DOWN_BIT	11
 #define ETH_PHY_CTRL_POWER_DOWN_MASK	(1 << ETH_PHY_CTRL_POWER_DOWN_BIT)
@@ -82,6 +81,12 @@
 	return 0;
 }
 
+int board_eth_init(bd_t *bis)
+{
+	cpu_eth_init(bis); /* Built in controller(s) come first */
+	return pci_eth_init(bis);
+}
+
 #ifdef CONFIG_RESET_PHY_R
 /* Configure and enable MV88E1545 PHY */
 void reset_phy(void)
diff --git a/common/board_f.c b/common/board_f.c
index c959774..a2be577 100644
--- a/common/board_f.c
+++ b/common/board_f.c
@@ -499,7 +499,6 @@
 static int reserve_global_data(void)
 {
 	gd->start_addr_sp -= sizeof(gd_t);
-	gd->start_addr_sp &= ~0xf;
 	gd->new_gd = (gd_t *)map_sysmem(gd->start_addr_sp, sizeof(gd_t));
 	debug("Reserving %zu Bytes for Global Data at: %08lx\n",
 			sizeof(gd_t), gd->start_addr_sp);
diff --git a/drivers/ddr/marvell/axp/ddr3_axp_config.h b/drivers/ddr/marvell/axp/ddr3_axp_config.h
index 800d2d1..a672044 100644
--- a/drivers/ddr/marvell/axp/ddr3_axp_config.h
+++ b/drivers/ddr/marvell/axp/ddr3_axp_config.h
@@ -44,7 +44,7 @@
  * DDR3_TRAINING_DEBUG - Debug prints of internal code
  */
 #define DDR_TARGET_FABRIC			5
-#define DRAM_ECC				0
+#define DRAM_ECC				1
 
 #ifdef MV_DDR_32BIT
 #define BUS_WIDTH                               32
diff --git a/drivers/ddr/marvell/axp/xor.c b/drivers/ddr/marvell/axp/xor.c
index 66c96ae..54924ca 100644
--- a/drivers/ddr/marvell/axp/xor.c
+++ b/drivers/ddr/marvell/axp/xor.c
@@ -18,7 +18,6 @@
 static u32 xor_regs_base_backup[MAX_CS];
 static u32 xor_regs_mask_backup[MAX_CS];
 
-static void mv_xor_hal_init(u32 chan_num);
 static int mv_xor_cmd_set(u32 chan, int command);
 static int mv_xor_ctrl_set(u32 chan, u32 xor_ctrl);
 
@@ -110,7 +109,7 @@
  * RETURN:
  *       MV_BAD_PARAM if parameters to function invalid, MV_OK otherwise.
  */
-static void mv_xor_hal_init(u32 chan_num)
+void mv_xor_hal_init(u32 chan_num)
 {
 	u32 i;
 
diff --git a/drivers/ddr/marvell/axp/xor.h b/drivers/ddr/marvell/axp/xor.h
index 3536487..3ff784d 100644
--- a/drivers/ddr/marvell/axp/xor.h
+++ b/drivers/ddr/marvell/axp/xor.h
@@ -60,6 +60,7 @@
 	u32 src_addr1;		/* Mode: Source Block address pointer */
 } __packed;
 
+void mv_xor_hal_init(u32 chan_num);
 int mv_xor_state_get(u32 chan);
 void mv_sys_xor_init(MV_DRAM_INFO *dram_info);
 void mv_sys_xor_finish(void);
diff --git a/drivers/i2c/lpc32xx_i2c.c b/drivers/i2c/lpc32xx_i2c.c
index 78d26e4..7f4eccd 100644
--- a/drivers/i2c/lpc32xx_i2c.c
+++ b/drivers/i2c/lpc32xx_i2c.c
@@ -72,7 +72,7 @@
 	if (speed == 0)
 		return -EINVAL;
 
-	half_period = (105000000 / speed) / 2;
+	half_period = (get_hclk_clk_rate() / speed) / 2;
 
 	if ((half_period > 255) || (half_period < 0))
 		return -EINVAL;
@@ -172,12 +172,12 @@
 				*(data++) = readl(&i2c->rx);
 			}
 		}
+		/* wait for end of transation */
+		while (!((stat = readl(&i2c->stat)) & LPC32XX_I2C_STAT_TDI))
+			;
+		/* clear end-of-transaction flag */
+		writel(1, &i2c->stat);
 	}
-	/* wait for end of transation */
-	while (!((stat = readl(&i2c->stat)) & LPC32XX_I2C_STAT_TDI))
-		;
-	/* clear end-of-transaction flag */
-	writel(1, &i2c->stat);
 	/* success */
 	return 0;
 }
@@ -200,6 +200,8 @@
 	if (alen | length)
 		/* Address slave in write mode */
 		writel((dev<<1) | LPC32XX_I2C_TX_START, &i2c->tx);
+	else
+		return 0;
 	/* write address bytes */
 	while (alen) {
 		/* wait for transmit fifo not full */
diff --git a/drivers/net/mvneta.c b/drivers/net/mvneta.c
index efaae16..38ad14e 100644
--- a/drivers/net/mvneta.c
+++ b/drivers/net/mvneta.c
@@ -41,7 +41,6 @@
 	printf(fmt, ##args)
 
 #define CONFIG_NR_CPUS		1
-#define BIT(nr)			(1UL << (nr))
 #define ETH_HLEN		14	/* Total octets in header */
 
 /* 2(HW hdr) 14(MAC hdr) 4(CRC) 32(extra for cache prefetch) */
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index adc238f..bcf8127 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -21,6 +21,7 @@
 obj-$(CONFIG_PCI_MSC01) += pci_msc01.o
 obj-$(CONFIG_PCIE_IMX) += pcie_imx.o
 obj-$(CONFIG_FTPCI100) += pci_ftpci100.o
+obj-$(CONFIG_PCI_MVEBU) += pci_mvebu.o
 obj-$(CONFIG_SH4_PCI) += pci_sh4.o
 obj-$(CONFIG_SH7751_PCI) +=pci_sh7751.o
 obj-$(CONFIG_SH7780_PCI) +=pci_sh7780.o
diff --git a/drivers/pci/pci_mvebu.c b/drivers/pci/pci_mvebu.c
new file mode 100644
index 0000000..fd2744d
--- /dev/null
+++ b/drivers/pci/pci_mvebu.c
@@ -0,0 +1,423 @@
+/*
+ * PCIe driver for Marvell MVEBU SoCs
+ *
+ * Based on Barebox drivers/pci/pci-mvebu.c
+ *
+ * Ported to U-Boot by:
+ * Anton Schubert <anton.schubert@gmx.de>
+ * Stefan Roese <sr@denx.de>
+ *
+ * SPDX-License-Identifier:	GPL-2.0
+ */
+
+#include <common.h>
+#include <pci.h>
+#include <asm/errno.h>
+#include <asm/io.h>
+#include <asm/arch/cpu.h>
+#include <asm/arch/soc.h>
+#include <linux/mbus.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/* PCIe unit register offsets */
+#define SELECT(x, n)			((x >> n) & 1UL)
+
+#define PCIE_DEV_ID_OFF			0x0000
+#define PCIE_CMD_OFF			0x0004
+#define PCIE_DEV_REV_OFF		0x0008
+#define  PCIE_BAR_LO_OFF(n)		(0x0010 + ((n) << 3))
+#define  PCIE_BAR_HI_OFF(n)		(0x0014 + ((n) << 3))
+#define PCIE_CAPAB_OFF			0x0060
+#define PCIE_CTRL_STAT_OFF		0x0068
+#define PCIE_HEADER_LOG_4_OFF		0x0128
+#define  PCIE_BAR_CTRL_OFF(n)		(0x1804 + (((n) - 1) * 4))
+#define  PCIE_WIN04_CTRL_OFF(n)		(0x1820 + ((n) << 4))
+#define  PCIE_WIN04_BASE_OFF(n)		(0x1824 + ((n) << 4))
+#define  PCIE_WIN04_REMAP_OFF(n)	(0x182c + ((n) << 4))
+#define PCIE_WIN5_CTRL_OFF		0x1880
+#define PCIE_WIN5_BASE_OFF		0x1884
+#define PCIE_WIN5_REMAP_OFF		0x188c
+#define PCIE_CONF_ADDR_OFF		0x18f8
+#define  PCIE_CONF_ADDR_EN		BIT(31)
+#define  PCIE_CONF_REG(r)		((((r) & 0xf00) << 16) | ((r) & 0xfc))
+#define  PCIE_CONF_BUS(b)		(((b) & 0xff) << 16)
+#define  PCIE_CONF_DEV(d)		(((d) & 0x1f) << 11)
+#define  PCIE_CONF_FUNC(f)		(((f) & 0x7) << 8)
+#define  PCIE_CONF_ADDR(dev, reg) \
+	(PCIE_CONF_BUS(PCI_BUS(dev)) | PCIE_CONF_DEV(PCI_DEV(dev))    | \
+	 PCIE_CONF_FUNC(PCI_FUNC(dev)) | PCIE_CONF_REG(reg) | \
+	 PCIE_CONF_ADDR_EN)
+#define PCIE_CONF_DATA_OFF		0x18fc
+#define PCIE_MASK_OFF			0x1910
+#define  PCIE_MASK_ENABLE_INTS          (0xf << 24)
+#define PCIE_CTRL_OFF			0x1a00
+#define  PCIE_CTRL_X1_MODE		BIT(0)
+#define PCIE_STAT_OFF			0x1a04
+#define  PCIE_STAT_BUS                  (0xff << 8)
+#define  PCIE_STAT_DEV                  (0x1f << 16)
+#define  PCIE_STAT_LINK_DOWN		BIT(0)
+#define PCIE_DEBUG_CTRL			0x1a60
+#define  PCIE_DEBUG_SOFT_RESET		BIT(20)
+
+struct resource {
+	u32 start;
+	u32 end;
+};
+
+struct mvebu_pcie {
+	struct pci_controller hose;
+	char *name;
+	void __iomem *base;
+	void __iomem *membase;
+	struct resource mem;
+	void __iomem *iobase;
+	u32 port;
+	u32 lane;
+	u32 lane_mask;
+	pci_dev_t dev;
+};
+
+#define to_pcie(_hc)	container_of(_hc, struct mvebu_pcie, pci)
+
+/*
+ * MVEBU PCIe controller needs MEMORY and I/O BARs to be mapped
+ * into SoCs address space. Each controller will map 32M of MEM
+ * and 64K of I/O space when registered.
+ */
+static void __iomem *mvebu_pcie_membase = (void __iomem *)MBUS_PCI_MEM_BASE;
+#define PCIE_MEM_SIZE	(32 << 20)
+
+#if defined(CONFIG_ARMADA_38X)
+#define PCIE_BASE(if)					\
+	((if) == 0 ?					\
+	 MVEBU_REG_PCIE_BASE + 0x40000 :		\
+	 MVEBU_REG_PCIE_BASE + 0x4000 * (if))
+
+/*
+ * On A38x MV6820 these PEX ports are supported:
+ *  0 - Port 0.0
+ *  1 - Port 0.1
+ *  2 - Port 0.2
+ */
+#define MAX_PEX 3
+static struct mvebu_pcie pcie_bus[MAX_PEX];
+
+static void mvebu_get_port_lane(struct mvebu_pcie *pcie, int pex_idx,
+				int *mem_target, int *mem_attr)
+{
+	u8 port[] = { 0, 1, 2 };
+	u8 lane[] = { 0, 0, 0 };
+	u8 target[] = { 8, 4, 4 };
+	u8 attr[] = { 0xe8, 0xe8, 0xd8 };
+
+	pcie->port = port[pex_idx];
+	pcie->lane = lane[pex_idx];
+	*mem_target = target[pex_idx];
+	*mem_attr = attr[pex_idx];
+}
+#else
+#define PCIE_BASE(if)							\
+	((if) < 8 ?							\
+	 (MVEBU_REG_PCIE_BASE + ((if) / 4) * 0x40000 + ((if) % 4) * 0x4000) : \
+	 (MVEBU_REG_PCIE_BASE + 0x2000 + ((if) % 8) * 0x40000))
+
+/*
+ * On AXP MV78460 these PEX ports are supported:
+ *  0 - Port 0.0
+ *  1 - Port 0.1
+ *  2 - Port 0.2
+ *  3 - Port 0.3
+ *  4 - Port 1.0
+ *  5 - Port 1.1
+ *  6 - Port 1.2
+ *  7 - Port 1.3
+ *  8 - Port 2.0
+ *  9 - Port 3.0
+ */
+#define MAX_PEX 10
+static struct mvebu_pcie pcie_bus[MAX_PEX];
+
+static void mvebu_get_port_lane(struct mvebu_pcie *pcie, int pex_idx,
+				int *mem_target, int *mem_attr)
+{
+	u8 port[] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 3 };
+	u8 lane[] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 0 };
+	u8 target[] = { 4, 4, 4, 4, 8, 8, 8, 8, 4, 8 };
+	u8 attr[] = { 0xe8, 0xd8, 0xb8, 0x78,
+		      0xe8, 0xd8, 0xb8, 0x78,
+		      0xf8, 0xf8 };
+
+	pcie->port = port[pex_idx];
+	pcie->lane = lane[pex_idx];
+	*mem_target = target[pex_idx];
+	*mem_attr = attr[pex_idx];
+}
+#endif
+
+static inline bool mvebu_pcie_link_up(struct mvebu_pcie *pcie)
+{
+	u32 val;
+	val = readl(pcie->base + PCIE_STAT_OFF);
+	return !(val & PCIE_STAT_LINK_DOWN);
+}
+
+static void mvebu_pcie_set_local_bus_nr(struct mvebu_pcie *pcie, int busno)
+{
+	u32 stat;
+
+	stat = readl(pcie->base + PCIE_STAT_OFF);
+	stat &= ~PCIE_STAT_BUS;
+	stat |= busno << 8;
+	writel(stat, pcie->base + PCIE_STAT_OFF);
+}
+
+static void mvebu_pcie_set_local_dev_nr(struct mvebu_pcie *pcie, int devno)
+{
+	u32 stat;
+
+	stat = readl(pcie->base + PCIE_STAT_OFF);
+	stat &= ~PCIE_STAT_DEV;
+	stat |= devno << 16;
+	writel(stat, pcie->base + PCIE_STAT_OFF);
+}
+
+static int mvebu_pcie_get_local_bus_nr(struct mvebu_pcie *pcie)
+{
+	u32 stat;
+
+	stat = readl(pcie->base + PCIE_STAT_OFF);
+	return (stat & PCIE_STAT_BUS) >> 8;
+}
+
+static int mvebu_pcie_get_local_dev_nr(struct mvebu_pcie *pcie)
+{
+	u32 stat;
+
+	stat = readl(pcie->base + PCIE_STAT_OFF);
+	return (stat & PCIE_STAT_DEV) >> 16;
+}
+
+static inline struct mvebu_pcie *hose_to_pcie(struct pci_controller *hose)
+{
+	return container_of(hose, struct mvebu_pcie, hose);
+}
+
+static int mvebu_pcie_read_config_dword(struct pci_controller *hose,
+		pci_dev_t dev, int offset, u32 *val)
+{
+	struct mvebu_pcie *pcie = hose_to_pcie(hose);
+	int local_bus = PCI_BUS(pcie->dev);
+	int local_dev = PCI_DEV(pcie->dev);
+	u32 reg;
+
+	/* Only allow one other device besides the local one on the local bus */
+	if (PCI_BUS(dev) == local_bus && PCI_DEV(dev) != local_dev) {
+		if (local_dev == 0 && PCI_DEV(dev) != 1) {
+			/*
+			 * If local dev is 0, the first other dev can
+			 * only be 1
+			 */
+			*val = 0xffffffff;
+			return 1;
+		} else if (local_dev != 0 && PCI_DEV(dev) != 0) {
+			/*
+			 * If local dev is not 0, the first other dev can
+			 * only be 0
+			 */
+			*val = 0xffffffff;
+			return 1;
+		}
+	}
+
+	/* write address */
+	reg = PCIE_CONF_ADDR(dev, offset);
+	writel(reg, pcie->base + PCIE_CONF_ADDR_OFF);
+	*val = readl(pcie->base + PCIE_CONF_DATA_OFF);
+
+	return 0;
+}
+
+static int mvebu_pcie_write_config_dword(struct pci_controller *hose,
+		pci_dev_t dev, int offset, u32 val)
+{
+	struct mvebu_pcie *pcie = hose_to_pcie(hose);
+	int local_bus = PCI_BUS(pcie->dev);
+	int local_dev = PCI_DEV(pcie->dev);
+
+	/* Only allow one other device besides the local one on the local bus */
+	if (PCI_BUS(dev) == local_bus && PCI_DEV(dev) != local_dev) {
+		if (local_dev == 0 && PCI_DEV(dev) != 1) {
+			/*
+			 * If local dev is 0, the first other dev can
+			 * only be 1
+			 */
+			return 1;
+		} else if (local_dev != 0 && PCI_DEV(dev) != 0) {
+			/*
+			 * If local dev is not 0, the first other dev can
+			 * only be 0
+			 */
+			return 1;
+		}
+	}
+
+	writel(PCIE_CONF_ADDR(dev, offset), pcie->base + PCIE_CONF_ADDR_OFF);
+	writel(val, pcie->base + PCIE_CONF_DATA_OFF);
+
+	return 0;
+}
+
+/*
+ * Setup PCIE BARs and Address Decode Wins:
+ * BAR[0,2] -> disabled, BAR[1] -> covers all DRAM banks
+ * WIN[0-3] -> DRAM bank[0-3]
+ */
+static void mvebu_pcie_setup_wins(struct mvebu_pcie *pcie)
+{
+	const struct mbus_dram_target_info *dram = mvebu_mbus_dram_info();
+	u32 size;
+	int i;
+
+	/* First, disable and clear BARs and windows. */
+	for (i = 1; i < 3; i++) {
+		writel(0, pcie->base + PCIE_BAR_CTRL_OFF(i));
+		writel(0, pcie->base + PCIE_BAR_LO_OFF(i));
+		writel(0, pcie->base + PCIE_BAR_HI_OFF(i));
+	}
+
+	for (i = 0; i < 5; i++) {
+		writel(0, pcie->base + PCIE_WIN04_CTRL_OFF(i));
+		writel(0, pcie->base + PCIE_WIN04_BASE_OFF(i));
+		writel(0, pcie->base + PCIE_WIN04_REMAP_OFF(i));
+	}
+
+	writel(0, pcie->base + PCIE_WIN5_CTRL_OFF);
+	writel(0, pcie->base + PCIE_WIN5_BASE_OFF);
+	writel(0, pcie->base + PCIE_WIN5_REMAP_OFF);
+
+	/* Setup windows for DDR banks. Count total DDR size on the fly. */
+	size = 0;
+	for (i = 0; i < dram->num_cs; i++) {
+		const struct mbus_dram_window *cs = dram->cs + i;
+
+		writel(cs->base & 0xffff0000,
+		       pcie->base + PCIE_WIN04_BASE_OFF(i));
+		writel(0, pcie->base + PCIE_WIN04_REMAP_OFF(i));
+		writel(((cs->size - 1) & 0xffff0000) |
+		       (cs->mbus_attr << 8) |
+		       (dram->mbus_dram_target_id << 4) | 1,
+		       pcie->base + PCIE_WIN04_CTRL_OFF(i));
+
+		size += cs->size;
+	}
+
+	/* Round up 'size' to the nearest power of two. */
+	if ((size & (size - 1)) != 0)
+		size = 1 << fls(size);
+
+	/* Setup BAR[1] to all DRAM banks. */
+	writel(dram->cs[0].base | 0xc, pcie->base + PCIE_BAR_LO_OFF(1));
+	writel(0, pcie->base + PCIE_BAR_HI_OFF(1));
+	writel(((size - 1) & 0xffff0000) | 0x1,
+	       pcie->base + PCIE_BAR_CTRL_OFF(1));
+}
+
+void pci_init_board(void)
+{
+	int mem_target, mem_attr, i;
+	int bus = 0;
+	u32 reg;
+	u32 soc_ctrl = readl(MVEBU_SYSTEM_REG_BASE + 0x4);
+
+	/* Check SoC Control Power State */
+	debug("%s: SoC Control %08x, 0en %01lx, 1en %01lx, 2en %01lx\n",
+	      __func__, soc_ctrl, SELECT(soc_ctrl, 0), SELECT(soc_ctrl, 1),
+	      SELECT(soc_ctrl, 2));
+
+	for (i = 0; i < MAX_PEX; i++) {
+		struct mvebu_pcie *pcie = &pcie_bus[i];
+		struct pci_controller *hose = &pcie->hose;
+
+		/* Get port number, lane number and memory target / attr */
+		mvebu_get_port_lane(pcie, i, &mem_target, &mem_attr);
+
+		/* Don't read at all from pci registers if port power is down */
+		if (pcie->lane == 0 && SELECT(soc_ctrl, pcie->port) == 0) {
+			i += 3;
+			debug("%s: skipping port %d\n", __func__, pcie->port);
+			continue;
+		}
+
+		pcie->base = (void __iomem *)PCIE_BASE(i);
+
+		/* Check link and skip ports that have no link */
+		if (!mvebu_pcie_link_up(pcie)) {
+			debug("%s: PCIe %d.%d - down\n", __func__,
+			      pcie->port, pcie->lane);
+			continue;
+		}
+		debug("%s: PCIe %d.%d - up, base %08x\n", __func__,
+		      pcie->port, pcie->lane, (u32)pcie->base);
+
+		/* Read Id info and local bus/dev */
+		debug("direct conf read %08x, local bus %d, local dev %d\n",
+		      readl(pcie->base), mvebu_pcie_get_local_bus_nr(pcie),
+		      mvebu_pcie_get_local_dev_nr(pcie));
+
+		mvebu_pcie_set_local_bus_nr(pcie, bus);
+		mvebu_pcie_set_local_dev_nr(pcie, 0);
+		pcie->dev = PCI_BDF(bus, 0, 0);
+
+		pcie->mem.start = (u32)mvebu_pcie_membase;
+		pcie->mem.end = pcie->mem.start + PCIE_MEM_SIZE - 1;
+		mvebu_pcie_membase += PCIE_MEM_SIZE;
+
+		if (mvebu_mbus_add_window_by_id(mem_target, mem_attr,
+						(phys_addr_t)pcie->mem.start,
+						PCIE_MEM_SIZE)) {
+			printf("PCIe unable to add mbus window for mem at %08x+%08x\n",
+			       (u32)pcie->mem.start, PCIE_MEM_SIZE);
+		}
+
+		/* Setup windows and configure host bridge */
+		mvebu_pcie_setup_wins(pcie);
+
+		/* Master + slave enable. */
+		reg = readl(pcie->base + PCIE_CMD_OFF);
+		reg |= PCI_COMMAND_MEMORY;
+		reg |= PCI_COMMAND_MASTER;
+		reg |= BIT(10);		/* disable interrupts */
+		writel(reg, pcie->base + PCIE_CMD_OFF);
+
+		/* Setup U-Boot PCI Controller */
+		hose->first_busno = 0;
+		hose->current_busno = bus;
+
+		/* PCI memory space */
+		pci_set_region(hose->regions + 0, pcie->mem.start,
+			       pcie->mem.start, PCIE_MEM_SIZE, PCI_REGION_MEM);
+		pci_set_region(hose->regions + 1,
+			       0, 0,
+			       gd->ram_size,
+			       PCI_REGION_MEM | PCI_REGION_SYS_MEMORY);
+		hose->region_count = 2;
+
+		pci_set_ops(hose,
+			    pci_hose_read_config_byte_via_dword,
+			    pci_hose_read_config_word_via_dword,
+			    mvebu_pcie_read_config_dword,
+			    pci_hose_write_config_byte_via_dword,
+			    pci_hose_write_config_word_via_dword,
+			    mvebu_pcie_write_config_dword);
+		pci_register_hose(hose);
+
+		hose->last_busno = pci_hose_scan(hose);
+
+		/* Set BAR0 to internal registers */
+		writel(SOC_REGS_PHY_BASE, pcie->base + PCIE_BAR_LO_OFF(0));
+		writel(0, pcie->base + PCIE_BAR_HI_OFF(0));
+
+		bus = hose->last_busno + 1;
+	}
+}
diff --git a/drivers/usb/host/ehci-marvell.c b/drivers/usb/host/ehci-marvell.c
index 3a9f60f..50fa01c 100644
--- a/drivers/usb/host/ehci-marvell.c
+++ b/drivers/usb/host/ehci-marvell.c
@@ -30,14 +30,22 @@
  */
 #ifdef CONFIG_ARMADA_XP
 
-#define MVUSB0_BASE		MVEBU_USB20_BASE
+/*
+ * Armada XP and Armada 38x have different base addresses for
+ * the USB 2.0 EHCI host controller. So we need to provide
+ * a mechnism to support both here.
+ */
+#define MVUSB0_BASE					\
+	(mvebu_soc_family() == MVEBU_SOC_A38X ?		\
+	 MVEBU_USB20_BASE : MVEBU_AXP_USB_BASE)
+#define MVUSB_BASE(port)	MVUSB0_BASE + ((port) << 12)
 
 /*
  * Once all the older Marvell SoC's (Orion, Kirkwood) are converted
  * to the common mvebu archticture including the mbus setup, this
  * will be the only function needed to configure the access windows
  */
-static void usb_brg_adrdec_setup(void)
+static void usb_brg_adrdec_setup(int index)
 {
 	const struct mbus_dram_target_info *dram;
 	int i;
@@ -45,8 +53,8 @@
 	dram = mvebu_mbus_dram_info();
 
 	for (i = 0; i < 4; i++) {
-		writel(0, MVUSB0_BASE + USB_WINDOW_CTRL(i));
-		writel(0, MVUSB0_BASE + USB_WINDOW_BASE(i));
+		writel(0, MVUSB_BASE(index) + USB_WINDOW_CTRL(i));
+		writel(0, MVUSB_BASE(index) + USB_WINDOW_BASE(i));
 	}
 
 	for (i = 0; i < dram->num_cs; i++) {
@@ -55,14 +63,16 @@
 		/* Write size, attributes and target id to control register */
 		writel(((cs->size - 1) & 0xffff0000) | (cs->mbus_attr << 8) |
 		       (dram->mbus_dram_target_id << 4) | 1,
-		       MVUSB0_BASE + USB_WINDOW_CTRL(i));
+		       MVUSB_BASE(index) + USB_WINDOW_CTRL(i));
 
 		/* Write base address to base register */
-		writel(cs->base, MVUSB0_BASE + USB_WINDOW_BASE(i));
+		writel(cs->base, MVUSB_BASE(index) + USB_WINDOW_BASE(i));
 	}
 }
 #else
-static void usb_brg_adrdec_setup(void)
+#define MVUSB_BASE(port)	MVUSB0_BASE
+
+static void usb_brg_adrdec_setup(int index)
 {
 	int i;
 	u32 size, base, attrib;
@@ -111,9 +121,9 @@
 int ehci_hcd_init(int index, enum usb_init_type init,
 		struct ehci_hccr **hccr, struct ehci_hcor **hcor)
 {
-	usb_brg_adrdec_setup();
+	usb_brg_adrdec_setup(index);
 
-	*hccr = (struct ehci_hccr *)(MVUSB0_BASE + 0x100);
+	*hccr = (struct ehci_hccr *)(MVUSB_BASE(index) + 0x100);
 	*hcor = (struct ehci_hcor *)((uint32_t) *hccr
 			+ HC_LENGTH(ehci_readl(&(*hccr)->cr_capbase)));
 
diff --git a/include/asm-generic/global_data.h b/include/asm-generic/global_data.h
index cc369fc..2155265 100644
--- a/include/asm-generic/global_data.h
+++ b/include/asm-generic/global_data.h
@@ -99,8 +99,7 @@
 	int pcidelay_done;
 #endif
 	struct udevice *cur_serial_dev;	/* current serial device */
-	/* arch-specific data */
-	struct arch_global_data arch __attribute__((aligned(16)));
+	struct arch_global_data arch;	/* architecture-specific data */
 } gd_t;
 #endif
 
diff --git a/include/configs/db-88f6820-gp.h b/include/configs/db-88f6820-gp.h
index 739c2bf..1dd4182 100644
--- a/include/configs/db-88f6820-gp.h
+++ b/include/configs/db-88f6820-gp.h
@@ -20,7 +20,12 @@
 #define CONFIG_SYS_GENERIC_BOARD
 #define CONFIG_DISPLAY_BOARDINFO_LATE
 
-#define	CONFIG_SYS_TEXT_BASE	0x04000000
+/*
+ * TEXT_BASE needs to be below 16MiB, since this area is scrubbed
+ * for DDR ECC byte filling in the SPL before loading the main
+ * U-Boot into it.
+ */
+#define	CONFIG_SYS_TEXT_BASE	0x00800000
 #define CONFIG_SYS_TCLK		250000000	/* 250MHz */
 
 /*
@@ -36,6 +41,7 @@
 #define CONFIG_CMD_FS_GENERIC
 #define CONFIG_CMD_I2C
 #define CONFIG_CMD_MMC
+#define CONFIG_CMD_PCI
 #define CONFIG_CMD_PING
 #define CONFIG_CMD_SCSI
 #define CONFIG_CMD_SF
@@ -101,6 +107,13 @@
 #define CONFIG_SYS_NETA_INTERFACE_TYPE	PHY_INTERFACE_MODE_RGMII
 #define PHY_ANEG_TIMEOUT	8000	/* PHY needs a longer aneg time */
 
+/* PCIe support */
+#define CONFIG_PCI
+#define CONFIG_PCI_MVEBU
+#define CONFIG_PCI_PNP
+#define CONFIG_PCI_SCAN_SHOW
+#define CONFIG_E1000	/* enable Intel E1000 support for testing */
+
 #define CONFIG_SYS_CONSOLE_INFO_QUIET	/* don't print console @ startup */
 #define CONFIG_SYS_ALT_MEMTEST
 
diff --git a/include/configs/db-mv784mp-gp.h b/include/configs/db-mv784mp-gp.h
index 41e6fdc..eec2ba9 100644
--- a/include/configs/db-mv784mp-gp.h
+++ b/include/configs/db-mv784mp-gp.h
@@ -17,7 +17,12 @@
 #define CONFIG_SYS_GENERIC_BOARD
 #define CONFIG_DISPLAY_BOARDINFO_LATE
 
-#define	CONFIG_SYS_TEXT_BASE	0x04000000
+/*
+ * TEXT_BASE needs to be below 16MiB, since this area is scrubbed
+ * for DDR ECC byte filling in the SPL before loading the main
+ * U-Boot into it.
+ */
+#define	CONFIG_SYS_TEXT_BASE	0x00800000
 #define CONFIG_SYS_TCLK		250000000	/* 250MHz */
 
 /*
@@ -28,11 +33,13 @@
 #define CONFIG_CMD_ENV
 #define CONFIG_CMD_I2C
 #define CONFIG_CMD_IDE
+#define CONFIG_CMD_PCI
 #define CONFIG_CMD_PING
 #define CONFIG_CMD_SF
 #define CONFIG_CMD_SPI
 #define CONFIG_CMD_TFTPPUT
 #define CONFIG_CMD_TIME
+#define CONFIG_CMD_USB
 
 /* I2C */
 #define CONFIG_SYS_I2C
@@ -41,6 +48,13 @@
 #define CONFIG_SYS_I2C_SLAVE		0x0
 #define CONFIG_SYS_I2C_SPEED		100000
 
+/* USB/EHCI configuration */
+#define CONFIG_USB_EHCI
+#define CONFIG_USB_STORAGE
+#define CONFIG_USB_EHCI_MARVELL
+#define CONFIG_EHCI_IS_TDI
+#define CONFIG_USB_MAX_CONTROLLER_COUNT 3
+
 /* SPI NOR flash default params, used by sf commands */
 #define CONFIG_SF_DEFAULT_SPEED		1000000
 #define CONFIG_SF_DEFAULT_MODE		SPI_MODE_3
@@ -89,6 +103,13 @@
 #define CONFIG_DOS_PARTITION
 #endif /* CONFIG_CMD_IDE */
 
+/* PCIe support */
+#define CONFIG_PCI
+#define CONFIG_PCI_MVEBU
+#define CONFIG_PCI_PNP
+#define CONFIG_PCI_SCAN_SHOW
+#define CONFIG_E1000	/* enable Intel E1000 support for testing */
+
 /*
  * mv-common.h should be defined after CMD configs since it used them
  * to enable certain macros
@@ -136,6 +157,7 @@
 #define CONFIG_SPL_SPI_BUS		0
 #define CONFIG_SPL_SPI_CS		0
 #define CONFIG_SYS_SPI_U_BOOT_OFFS	0x20000
+#define CONFIG_SYS_U_BOOT_OFFS		CONFIG_SYS_SPI_U_BOOT_OFFS
 
 /* Enable DDR support in SPL (DDR3 training from Marvell bin_hdr) */
 #define CONFIG_SYS_MVEBU_DDR_AXP
diff --git a/include/configs/maxbcm.h b/include/configs/maxbcm.h
index 0fb117f..3530a26 100644
--- a/include/configs/maxbcm.h
+++ b/include/configs/maxbcm.h
@@ -15,7 +15,12 @@
 #define CONFIG_SYS_GENERIC_BOARD
 #define CONFIG_DISPLAY_BOARDINFO_LATE
 
-#define	CONFIG_SYS_TEXT_BASE	0x04000000
+/*
+ * TEXT_BASE needs to be below 16MiB, since this area is scrubbed
+ * for DDR ECC byte filling in the SPL before loading the main
+ * U-Boot into it.
+ */
+#define	CONFIG_SYS_TEXT_BASE	0x00800000
 #define CONFIG_SYS_TCLK		250000000	/* 250MHz */
 
 /*
diff --git a/include/configs/odroid_xu3.h.rej b/include/configs/odroid_xu3.h.rej
deleted file mode 100644
index a1c2964..0000000
--- a/include/configs/odroid_xu3.h.rej
+++ /dev/null
@@ -1,10 +0,0 @@
---- include/configs/odroid_xu3.h
-+++ include/configs/odroid_xu3.h
-@@ -10,7 +10,6 @@
- 
- #include "exynos5420-common.h"
- 
--#define CONFIG_SYS_PROMPT		"ODROID-XU3 # "
- #define CONFIG_IDENT_STRING		" for ODROID-XU3"
- 
- #define CONFIG_BOARD_COMMON